ocrmypdf: init at 8.2.3

author: Robert Djubek <envy1988@gmail.com> 2019-03-14 03:13:25 +0000
committer: Robert Djubek <envy1988@gmail.com> 2019-08-14 04:45:09 +0000
commit: 352239e24a7da18f4eb22993cd05e8535d6b01a5 (patch)
tree: a8b26520a91e23a27a5851bb170aafa4ab1d3243 /pkgs/tools/text
parent: a141544cb5877dbb35959cb04cf48aa89ef45ef1 (diff)
1 files changed, 103 insertions, 0 deletions
diff --git a/pkgs/tools/text/ocrmypdf/default.nix b/pkgs/tools/text/ocrmypdf/default.nix
new file mode 100644
index 000000000000..514f3f675399
--- /dev/null
+++ b/pkgs/tools/text/ocrmypdf/default.nix
@@ -0,0 +1,103 @@
+{ fetchFromGitHub
+, ghostscript
+, img2pdf
+, jbig2enc
+, leptonica
+, pngquant
+, python3
+, python3Packages
+, qpdf
+, stdenv
+, tesseract4
+, unpaper
+}:
+
+let
+  inherit (python3Packages) buildPythonApplication;
+
+  runtimeDeps = with python3Packages; [
+    ghostscript
+    jbig2enc
+    leptonica
+    pngquant
+    qpdf
+    tesseract4
+    unpaper
+    pillow
+  ];
+
+in buildPythonApplication rec {
+  pname = "ocrmypdf";
+  version = "8.2.3";
+  disabled = ! python3Packages.isPy3k;
+
+  src = fetchFromGitHub {
+    owner = "jbarlow83";
+    repo = "OCRmyPDF";
+    rev = "v${version}";
+    sha256 = "1ldlyhxkav34y9d7g2kx3d4p26c2b82vnwi0ywnfynb16sav36d5";
+  };
+
+  nativeBuildInputs = with python3Packages; [
+    pytestrunner
+    setuptools
+    setuptools-scm-git-archive
+    setuptools_scm
+  ];
+
+  propagatedBuildInputs = with python3Packages; [
+    cffi
+    chardet
+    img2pdf
+    pdfminer
+    pikepdf
+    reportlab
+    ruffus
+  ];
+
+  checkInputs = with python3Packages; [
+    hocr-tools
+    pypdf2
+    pytest
+    pytest-helpers-namespace
+    pytest_xdist
+    pytestcov
+    pytestrunner
+    python-xmp-toolkit
+    setuptools
+  ] ++ runtimeDeps;
+
+
+  postPatch = ''
+    substituteInPlace src/ocrmypdf/leptonica.py \
+      --replace "ffi.dlopen(find_library('lept'))" \
+      'ffi.dlopen("${stdenv.lib.makeLibraryPath [leptonica]}/liblept${stdenv.hostPlatform.extensions.sharedLibrary}")'
+  '';
+
+  # The tests take potentially 20+ minutes, depending on machine
+  doCheck = false;
+
+  # These tests fail and it might be upstream problem... or packaging. :)
+  # development is happening on macos and the pinned test versions are
+  # significantly newer than nixpkgs has. Program still works...
+  # (to the extent I've used it) -- Kiwi
+  checkPhase = ''
+    export HOME=$TMPDIR
+    pytest -k 'not test_force_ocr_on_pdf_with_no_images \
+    and not test_tesseract_crash \
+    and not test_tesseract_crash_autorotate \
+    and not test_ghostscript_pdfa_failure \
+    and not test_gs_render_failure \
+    and not test_gs_raster_failure \
+    and not test_bad_utf8 \
+    and not test_old_unpaper'
+  '';
+
+  meta = with stdenv.lib; {
+    homepage = "https://github.com/jbarlow83/OCRmyPDF";
+    description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
+    license = licenses.gpl3;
+    platforms = platforms.linux;
+    maintainers = [ maintainers.kiwi ];
+  };
+}
author	Robert Djubek <envy1988@gmail.com>	2019-03-14 03:13:25 +0000
committer	Robert Djubek <envy1988@gmail.com>	2019-08-14 04:45:09 +0000
commit	352239e24a7da18f4eb22993cd05e8535d6b01a5 (patch)
tree	a8b26520a91e23a27a5851bb170aafa4ab1d3243 /pkgs/tools/text
parent	a141544cb5877dbb35959cb04cf48aa89ef45ef1 (diff)