summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--nixos/lib/testing.nix2
-rw-r--r--pkgs/applications/graphics/tesseract/default.nix28
2 files changed, 28 insertions, 2 deletions
diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix
index 8539fef0a19d..d8b5df004df8 100644
--- a/nixos/lib/testing.nix
+++ b/nixos/lib/testing.nix
@@ -93,7 +93,7 @@ rec {
vms = map (m: m.config.system.build.vm) (lib.attrValues nodes);
- ocrProg = tesseract;
+ ocrProg = tesseract.override { enableLanguages = [ "eng" ]; };
# Generate onvenience wrappers for running the test driver
# interactively with the specified network, and for starting the
diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix
index 1f1da9a389f2..b5e1707c4fe4 100644
--- a/pkgs/applications/graphics/tesseract/default.nix
+++ b/pkgs/applications/graphics/tesseract/default.nix
@@ -1,5 +1,8 @@
{ stdenv, fetchFromGitHub, pkgconfig, leptonica, libpng, libtiff
, icu, pango, opencl-headers
+
+# Supported list of languages or `null' for all available languages
+, enableLanguages ? null
}:
stdenv.mkDerivation rec {
@@ -25,7 +28,30 @@ stdenv.mkDerivation rec {
LIBLEPT_HEADERSDIR = "${leptonica}/include";
- postInstall = "cp -Rt \"$out/share/tessdata\" \"$tessdata/\"*";
+ # Copy the .traineddata files of the languages specified in enableLanguages
+ # into `$out/share/tessdata' and check afterwards if copying was successful.
+ postInstall = let
+ mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}";
+ mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg;
+ findLangArgs = if enableLanguages != null
+ then "\\( ${mkFindArgs enableLanguages} \\)"
+ else "-iname '*.traineddata'";
+ in ''
+ numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \
+ ${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)"
+
+ ${if enableLanguages != null then ''
+ expected=${toString (builtins.length enableLanguages)}
+ '' else ''
+ expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)"
+ ''}
+
+ if [ "$numLangs" -ne "$expected" ]; then
+ echo "Expected $expected languages, but $numLangs" \
+ "were copied to \`$out/share/tessdata'" >&2
+ exit 1
+ fi
+ '';
meta = {
description = "OCR engine";