summaryrefslogtreecommitdiffstats
path: root/pkgs/tools/text
diff options
context:
space:
mode:
authorRobert Djubek <envy1988@gmail.com>2019-03-14 03:13:25 +0000
committerRobert Djubek <envy1988@gmail.com>2019-08-14 04:45:09 +0000
commit352239e24a7da18f4eb22993cd05e8535d6b01a5 (patch)
treea8b26520a91e23a27a5851bb170aafa4ab1d3243 /pkgs/tools/text
parenta141544cb5877dbb35959cb04cf48aa89ef45ef1 (diff)
ocrmypdf: init at 8.2.3
Diffstat (limited to 'pkgs/tools/text')
-rw-r--r--pkgs/tools/text/ocrmypdf/default.nix103
1 files changed, 103 insertions, 0 deletions
diff --git a/pkgs/tools/text/ocrmypdf/default.nix b/pkgs/tools/text/ocrmypdf/default.nix
new file mode 100644
index 000000000000..514f3f675399
--- /dev/null
+++ b/pkgs/tools/text/ocrmypdf/default.nix
@@ -0,0 +1,103 @@
+{ fetchFromGitHub
+, ghostscript
+, img2pdf
+, jbig2enc
+, leptonica
+, pngquant
+, python3
+, python3Packages
+, qpdf
+, stdenv
+, tesseract4
+, unpaper
+}:
+
+let
+ inherit (python3Packages) buildPythonApplication;
+
+ runtimeDeps = with python3Packages; [
+ ghostscript
+ jbig2enc
+ leptonica
+ pngquant
+ qpdf
+ tesseract4
+ unpaper
+ pillow
+ ];
+
+in buildPythonApplication rec {
+ pname = "ocrmypdf";
+ version = "8.2.3";
+ disabled = ! python3Packages.isPy3k;
+
+ src = fetchFromGitHub {
+ owner = "jbarlow83";
+ repo = "OCRmyPDF";
+ rev = "v${version}";
+ sha256 = "1ldlyhxkav34y9d7g2kx3d4p26c2b82vnwi0ywnfynb16sav36d5";
+ };
+
+ nativeBuildInputs = with python3Packages; [
+ pytestrunner
+ setuptools
+ setuptools-scm-git-archive
+ setuptools_scm
+ ];
+
+ propagatedBuildInputs = with python3Packages; [
+ cffi
+ chardet
+ img2pdf
+ pdfminer
+ pikepdf
+ reportlab
+ ruffus
+ ];
+
+ checkInputs = with python3Packages; [
+ hocr-tools
+ pypdf2
+ pytest
+ pytest-helpers-namespace
+ pytest_xdist
+ pytestcov
+ pytestrunner
+ python-xmp-toolkit
+ setuptools
+ ] ++ runtimeDeps;
+
+
+ postPatch = ''
+ substituteInPlace src/ocrmypdf/leptonica.py \
+ --replace "ffi.dlopen(find_library('lept'))" \
+ 'ffi.dlopen("${stdenv.lib.makeLibraryPath [leptonica]}/liblept${stdenv.hostPlatform.extensions.sharedLibrary}")'
+ '';
+
+ # The tests take potentially 20+ minutes, depending on machine
+ doCheck = false;
+
+ # These tests fail and it might be upstream problem... or packaging. :)
+ # development is happening on macos and the pinned test versions are
+ # significantly newer than nixpkgs has. Program still works...
+ # (to the extent I've used it) -- Kiwi
+ checkPhase = ''
+ export HOME=$TMPDIR
+ pytest -k 'not test_force_ocr_on_pdf_with_no_images \
+ and not test_tesseract_crash \
+ and not test_tesseract_crash_autorotate \
+ and not test_ghostscript_pdfa_failure \
+ and not test_gs_render_failure \
+ and not test_gs_raster_failure \
+ and not test_bad_utf8 \
+ and not test_old_unpaper'
+ '';
+
+ meta = with stdenv.lib; {
+ homepage = "https://github.com/jbarlow83/OCRmyPDF";
+ description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
+ license = licenses.gpl3;
+ platforms = platforms.linux;
+ maintainers = [ maintainers.kiwi ];
+ };
+}