nixos/test-driver: use a variety of different Tesseract settings for OCR

When performing OCR, some of the Tesseract settings perform better than others on a variety of different workloads, but they mostly take ~negligible incremental time to run compared to the overhead of running the ImageMagick filters. After this commit, we try using all three of the current Tesseract models (classic, LSTM, and classic+LSTM) to generate output text. This fixes chromium-90's tests at release-20.09, and should make cases where you're looking for *specific* text better, with the tradeoff of running Tesseract multiple times. To make it sensible to cherrypick this into release-20.09, this doesn't change the existing API surface for the test driver. In particular, get_screen_text continues to have the existing behaviour.
author: Luke Granger-Brown <git@lukegb.com> 2021-04-23 15:02:36 +0000
committer: Luke Granger-Brown <git@lukegb.com> 2021-04-23 18:42:35 +0000
commit: 4de343cccfd6591ab798e03600d8b3626db8efbd (patch)
tree: e50e66fdbac073c8a0953fe2270498b4b9456e74 /nixos/lib/test-driver
parent: 3ac9de55b08fa8b20f8e56dfe0016d51834eb079 (diff)
1 files changed, 48 insertions, 31 deletions
diff --git a/nixos/lib/test-driver/test-driver.py b/nixos/lib/test-driver/test-driver.py
index 96b75a49928f..7800a49e4107 100644
--- a/nixos/lib/test-driver/test-driver.py
+++ b/nixos/lib/test-driver/test-driver.py
@@ -1,7 +1,7 @@
 #! /somewhere/python3
 from contextlib import contextmanager, _GeneratorContextManager
 from queue import Queue, Empty
-from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List
+from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List, Iterable
 from xml.sax.saxutils import XMLGenerator
 import queue
 import io
@@ -205,6 +205,37 @@ class Logger:
         self.xml.endElement("nest")
 
 
+def _perform_ocr_on_screenshot(
+    screenshot_path: str, model_ids: Iterable[int]
+) -> List[str]:
+    if shutil.which("tesseract") is None:
+        raise Exception("OCR requested but enableOCR is false")
+
+    magick_args = (
+        "-filter Catrom -density 72 -resample 300 "
+        + "-contrast -normalize -despeckle -type grayscale "
+        + "-sharpen 1 -posterize 3 -negate -gamma 100 "
+        + "-blur 1x65535"
+    )
+
+    tess_args = f"-c debug_file=/dev/null --psm 11"
+
+    cmd = f"convert {magick_args} {screenshot_path} tiff:{screenshot_path}.tiff"
+    ret = subprocess.run(cmd, shell=True, capture_output=True)
+    if ret.returncode != 0:
+        raise Exception(f"TIFF conversion failed with exit code {ret.returncode}")
+
+    model_results = []
+    for model_id in model_ids:
+        cmd = f"tesseract {screenshot_path}.tiff - {tess_args} --oem {model_id}"
+        ret = subprocess.run(cmd, shell=True, capture_output=True)
+        if ret.returncode != 0:
+            raise Exception(f"OCR failed with exit code {ret.returncode}")
+        model_results.append(ret.stdout.decode("utf-8"))
+
+    return model_results
+
+
 class Machine:
     def __init__(self, args: Dict[str, Any]) -> None:
         if "name" in args:
@@ -637,43 +668,29 @@ class Machine:
         """Debugging: Dump the contents of the TTY<n>"""
         self.execute("fold -w 80 /dev/vcs{} | systemd-cat".format(tty))
 
-    def get_screen_text(self) -> str:
-        if shutil.which("tesseract") is None:
-            raise Exception("get_screen_text used but enableOCR is false")
-
-        magick_args = (
-            "-filter Catrom -density 72 -resample 300 "
-            + "-contrast -normalize -despeckle -type grayscale "
-            + "-sharpen 1 -posterize 3 -negate -gamma 100 "
-            + "-blur 1x65535"
-        )
-
-        tess_args = "-c debug_file=/dev/null --psm 11 --oem 2"
+    def _get_screen_text_variants(self, model_ids: Iterable[int]) -> List[str]:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            screenshot_path = os.path.join(tmpdir, "ppm")
+            self.send_monitor_command(f"screendump {screenshot_path}")
+            return _perform_ocr_on_screenshot(screenshot_path, model_ids)
 
-        with self.nested("performing optical character recognition"):
-            with tempfile.NamedTemporaryFile() as tmpin:
-                self.send_monitor_command("screendump {}".format(tmpin.name))
-
-                cmd = "convert {} {} tiff:- | tesseract - - {}".format(
-                    magick_args, tmpin.name, tess_args
-                )
-                ret = subprocess.run(cmd, shell=True, capture_output=True)
-                if ret.returncode != 0:
-                    raise Exception(
-                        "OCR failed with exit code {}".format(ret.returncode)
-                    )
+    def get_screen_text_variants(self) -> List[str]:
+        return self._get_screen_text_variants([0, 1, 2])
 
-                return ret.stdout.decode("utf-8")
+    def get_screen_text(self) -> str:
+        return self._get_screen_text_variants([2])[0]
 
     def wait_for_text(self, regex: str) -> None:
         def screen_matches(last: bool) -> bool:
-            text = self.get_screen_text()
-            matches = re.search(regex, text) is not None
+            variants = self.get_screen_text_variants()
+            for text in variants:
+                if re.search(regex, text) is not None:
+                    return True
 
-            if last and not matches:
-                self.log("Last OCR attempt failed. Text was: {}".format(text))
+            if last:
+                self.log("Last OCR attempt failed. Text was: {}".format(variants))
 
-            return matches
+            return False
 
         with self.nested("waiting for {} to appear on screen".format(regex)):
             retry(screen_matches)
author	Luke Granger-Brown <git@lukegb.com>	2021-04-23 15:02:36 +0000
committer	Luke Granger-Brown <git@lukegb.com>	2021-04-23 18:42:35 +0000
commit	4de343cccfd6591ab798e03600d8b3626db8efbd (patch)
tree	e50e66fdbac073c8a0953fe2270498b4b9456e74 /nixos/lib/test-driver
parent	3ac9de55b08fa8b20f8e56dfe0016d51834eb079 (diff)