summaryrefslogtreecommitdiffstats
path: root/pkgs/tools/text
diff options
context:
space:
mode:
authorFrederik Rietdijk <fridh@fridh.nl>2019-08-18 12:53:44 +0200
committerFrederik Rietdijk <fridh@fridh.nl>2019-08-18 12:53:44 +0200
commitf65aa21bb2b0a45ac671e8c9d656b1dfc88cda33 (patch)
tree98bb48130a5c464418af19f314236e112ad20c55 /pkgs/tools/text
parentb8a79ac677bbc0f7b662bcc0c4fcb9df34c9f5f3 (diff)
parentffbb4d26f9f93681576815be82142d14f2aa8234 (diff)
Merge master into staging-next
Diffstat (limited to 'pkgs/tools/text')
-rw-r--r--pkgs/tools/text/ocrmypdf/default.nix103
1 files changed, 103 insertions, 0 deletions
diff --git a/pkgs/tools/text/ocrmypdf/default.nix b/pkgs/tools/text/ocrmypdf/default.nix
new file mode 100644
index 000000000000..514f3f675399
--- /dev/null
+++ b/pkgs/tools/text/ocrmypdf/default.nix
@@ -0,0 +1,103 @@
+{ fetchFromGitHub
+, ghostscript
+, img2pdf
+, jbig2enc
+, leptonica
+, pngquant
+, python3
+, python3Packages
+, qpdf
+, stdenv
+, tesseract4
+, unpaper
+}:
+
+let
+ inherit (python3Packages) buildPythonApplication;
+
+ runtimeDeps = with python3Packages; [
+ ghostscript
+ jbig2enc
+ leptonica
+ pngquant
+ qpdf
+ tesseract4
+ unpaper
+ pillow
+ ];
+
+in buildPythonApplication rec {
+ pname = "ocrmypdf";
+ version = "8.2.3";
+ disabled = ! python3Packages.isPy3k;
+
+ src = fetchFromGitHub {
+ owner = "jbarlow83";
+ repo = "OCRmyPDF";
+ rev = "v${version}";
+ sha256 = "1ldlyhxkav34y9d7g2kx3d4p26c2b82vnwi0ywnfynb16sav36d5";
+ };
+
+ nativeBuildInputs = with python3Packages; [
+ pytestrunner
+ setuptools
+ setuptools-scm-git-archive
+ setuptools_scm
+ ];
+
+ propagatedBuildInputs = with python3Packages; [
+ cffi
+ chardet
+ img2pdf
+ pdfminer
+ pikepdf
+ reportlab
+ ruffus
+ ];
+
+ checkInputs = with python3Packages; [
+ hocr-tools
+ pypdf2
+ pytest
+ pytest-helpers-namespace
+ pytest_xdist
+ pytestcov
+ pytestrunner
+ python-xmp-toolkit
+ setuptools
+ ] ++ runtimeDeps;
+
+
+ postPatch = ''
+ substituteInPlace src/ocrmypdf/leptonica.py \
+ --replace "ffi.dlopen(find_library('lept'))" \
+ 'ffi.dlopen("${stdenv.lib.makeLibraryPath [leptonica]}/liblept${stdenv.hostPlatform.extensions.sharedLibrary}")'
+ '';
+
+ # The tests take potentially 20+ minutes, depending on machine
+ doCheck = false;
+
+ # These tests fail and it might be upstream problem... or packaging. :)
+ # development is happening on macos and the pinned test versions are
+ # significantly newer than nixpkgs has. Program still works...
+ # (to the extent I've used it) -- Kiwi
+ checkPhase = ''
+ export HOME=$TMPDIR
+ pytest -k 'not test_force_ocr_on_pdf_with_no_images \
+ and not test_tesseract_crash \
+ and not test_tesseract_crash_autorotate \
+ and not test_ghostscript_pdfa_failure \
+ and not test_gs_render_failure \
+ and not test_gs_raster_failure \
+ and not test_bad_utf8 \
+ and not test_old_unpaper'
+ '';
+
+ meta = with stdenv.lib; {
+ homepage = "https://github.com/jbarlow83/OCRmyPDF";
+ description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
+ license = licenses.gpl3;
+ platforms = platforms.linux;
+ maintainers = [ maintainers.kiwi ];
+ };
+}