summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorsharkdp <davidpeter@web.de>2020-04-12 17:35:30 +0200
committerDavid Peter <sharkdp@users.noreply.github.com>2020-04-12 19:19:33 +0200
commitf2cef702a02265fef883efba58ef0de5080916f7 (patch)
tree6e216d4b87b49e1d9758d817ac8854ef10edc24e /tests
parent8b3d54ce29b07b53b6dc422a956b39392b04acc3 (diff)
Add script to find slow-to-highlight files
Diffstat (limited to 'tests')
-rw-r--r--tests/scripts/find-slow-to-highlight-files.py101
1 files changed, 101 insertions, 0 deletions
diff --git a/tests/scripts/find-slow-to-highlight-files.py b/tests/scripts/find-slow-to-highlight-files.py
new file mode 100644
index 00000000..be45efa6
--- /dev/null
+++ b/tests/scripts/find-slow-to-highlight-files.py
@@ -0,0 +1,101 @@
+import time
+import os
+import subprocess as sp
+
+
+# Threshold speed in chars per second
+THRESHOLD_SPEED = 20000
+
+# Maximum time we allow `bat` to run
+BAT_TIMEOUT_SEC = 10
+
+# Maximum number of files to measure
+MAX_NUM_FILES = 100
+
+# Root folder for the search
+SEARCH_ROOT = os.getenv("HOME")
+
+
+def find_slow_files(startup_time, language, glob_pattern):
+ out = sp.check_output(
+ [
+ "fd",
+ "--hidden",
+ "--no-ignore",
+ "--type=file",
+ "--max-results",
+ str(MAX_NUM_FILES),
+ "--glob",
+ glob_pattern,
+ SEARCH_ROOT,
+ ]
+ )
+
+ paths = out.split(b"\n")[:-1]
+ print(f"Language: {language}, glob pattern: {glob_pattern} ({len(paths)} matches)")
+
+ for path in paths:
+ num_chars = int(sp.check_output(["wc", "-c", path]).split(b" ")[0].decode())
+
+ if num_chars < 500:
+ # It is hard to measure the exact speed for short files
+ continue
+
+ try:
+ start = time.time()
+ sp.check_output(["bat", "--color=always", path], timeout=BAT_TIMEOUT_SEC)
+ duration = time.time() - start - startup_time
+
+ if duration <= 0:
+ continue
+
+ highlighting_speed = num_chars / duration
+
+ if highlighting_speed < THRESHOLD_SPEED:
+ print(f" {highlighting_speed:10.0f} chars/s: {path.decode()}")
+
+ except sp.CalledProcessError:
+ print(f" Error while highlighting file '{path.decode()}'.")
+
+ except sp.TimeoutExpired:
+ if num_chars < THRESHOLD_SPEED * BAT_TIMEOUT_SEC:
+ print(f" Error: bat timed out on file '{path.decode()}'.")
+ else:
+ print(
+ f" Warning: bat timed out on file '{path.decode()} (but the file is large)."
+ )
+
+
+def measure_bat_startup_speed():
+ min_duration = None
+ for _ in range(20):
+ start = time.time()
+ p = sp.Popen(
+ ["bat", "--color=always", "--language=py"], stdin=sp.PIPE, stdout=sp.PIPE
+ )
+ p.communicate(input=b"test")
+ duration = time.time() - start
+
+ if not min_duration or duration < min_duration:
+ min_duration = duration
+
+ return min_duration
+
+
+def main():
+ print("Measuring 'bat' startup speed ... ", flush=True, end="")
+ startup_time = measure_bat_startup_speed()
+ print(f"{startup_time * 1000:.1f} ms")
+
+ output = sp.check_output(["bat", "--list-languages"]).decode()
+
+ for line in output.strip().split("\n"):
+ language, extensions = line.split(":")
+ for ext in extensions.split(","):
+ find_slow_files(startup_time, language, ext)
+ if not ext.startswith("."):
+ find_slow_files(startup_time, language, f"*.{ext}")
+
+
+if __name__ == "__main__":
+ main()