summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFabian Affolter <fabian@affolter-engineering.ch>2024-05-12 09:11:48 +0200
committerGitHub <noreply@github.com>2024-05-12 09:11:48 +0200
commit17838a4fc851b7beec69fd02643da2737c5414b7 (patch)
treedf0e50a32c50f85ade042fed25af8c680a8a6aba
parentf18b5695e5e97f5d1aff9ce843ec9c03e82f0367 (diff)
parent60bc6308c8a5114bdba5f1c1d7e5cef8e22595ad (diff)
Merge pull request #310924 from fabaff/trafilatura-bump
python312Packages.courlan: 1.0.0 -> 1.1.0, python311Packages.trafilatura: 1.8.1 -> 1.9.0
-rw-r--r--pkgs/development/python-modules/courlan/default.nix56
-rw-r--r--pkgs/development/python-modules/trafilatura/default.nix72
2 files changed, 63 insertions, 65 deletions
diff --git a/pkgs/development/python-modules/courlan/default.nix b/pkgs/development/python-modules/courlan/default.nix
index bd73aad1977d..fd2cfc01b6e8 100644
--- a/pkgs/development/python-modules/courlan/default.nix
+++ b/pkgs/development/python-modules/courlan/default.nix
@@ -1,56 +1,58 @@
-{ lib
-, buildPythonPackage
-, fetchPypi
-, langcodes
-, pytestCheckHook
-, tld
-, urllib3
-, pythonOlder
+{
+ lib,
+ babel,
+ buildPythonPackage,
+ fetchPypi,
+ langcodes,
+ pytestCheckHook,
+ pythonOlder,
+ setuptools,
+ tld,
+ urllib3,
}:
buildPythonPackage rec {
pname = "courlan";
- version = "1.0.0";
- format = "setuptools";
+ version = "1.1.0";
+ pyproject = true;
disabled = pythonOlder "3.6";
src = fetchPypi {
inherit pname version;
- hash = "sha256-PDVRHDZSXLL5Qc1nCbejp0LtlfC55WyX7sDBb9wDUYM=";
+ hash = "sha256-1wZoQzTxi+StofvVfyaArfADZkj22ECFL3pIItOt/Y0=";
};
- propagatedBuildInputs = [
+ # Tests try to write to /tmp directly. use $TMPDIR instead.
+ postPatch = ''
+ substituteInPlace tests/unit_tests.py \
+ --replace-fail "\"courlan --help\"" "\"$out/bin/courlan --help\"" \
+ --replace-fail "courlan_bin = \"courlan\"" "courlan_bin = \"$out/bin/courlan\"" \
+ --replace-fail "/tmp" "$TMPDIR"
+ '';
+
+ build-system = [ setuptools ];
+
+ dependencies = [
+ babel
langcodes
tld
urllib3
];
- nativeCheckInputs = [
- pytestCheckHook
- ];
+ nativeCheckInputs = [ pytestCheckHook ];
# disable tests that require an internet connection
- disabledTests = [
- "test_urlcheck"
- ];
-
- # tests try to write to /tmp directly. use $TMPDIR instead.
- postPatch = ''
- substituteInPlace tests/unit_tests.py \
- --replace "\"courlan --help\"" "\"$out/bin/courlan --help\"" \
- --replace "courlan_bin = \"courlan\"" "courlan_bin = \"$out/bin/courlan\"" \
- --replace "/tmp" "$TMPDIR"
- '';
+ disabledTests = [ "test_urlcheck" ];
pythonImportsCheck = [ "courlan" ];
meta = with lib; {
description = "Clean, filter and sample URLs to optimize data collection";
- mainProgram = "courlan";
homepage = "https://github.com/adbar/courlan";
changelog = "https://github.com/adbar/courlan/blob/v${version}/HISTORY.md";
license = licenses.asl20;
maintainers = with maintainers; [ jokatzke ];
+ mainProgram = "courlan";
};
}
diff --git a/pkgs/development/python-modules/trafilatura/default.nix b/pkgs/development/python-modules/trafilatura/default.nix
index 35212c343443..a8183bf3f841 100644
--- a/pkgs/development/python-modules/trafilatura/default.nix
+++ b/pkgs/development/python-modules/trafilatura/default.nix
@@ -1,35 +1,43 @@
-{ lib
-, buildPythonPackage
-, fetchPypi
-, pytestCheckHook
-, pythonOlder
-, certifi
-, charset-normalizer
-, courlan
-, htmldate
-, justext
-, lxml
-, urllib3
-, setuptools
+{
+ lib,
+ buildPythonPackage,
+ certifi,
+ charset-normalizer,
+ courlan,
+ fetchPypi,
+ htmldate,
+ justext,
+ lxml,
+ pytestCheckHook,
+ pythonOlder,
+ setuptools,
+ urllib3,
}:
buildPythonPackage rec {
pname = "trafilatura";
- version = "1.8.1";
+ version = "1.9.0";
pyproject = true;
disabled = pythonOlder "3.9";
src = fetchPypi {
inherit pname version;
- hash = "sha256-a4eN/b1cXftV0Pgwfyt9wVrDRYBU90hh/5ihcvXjhyA=";
+ hash = "sha256-5oM9KauKE+2FOTfXyR5oaLxi774QIUrCsQZDbdI9FBI=";
};
- nativeBuildInputs = [
- setuptools
- ];
+ # Patch out gui cli because it is not supported in this packaging and
+ # nixify path to the trafilatura binary in the test suite
+ postPatch = ''
+ substituteInPlace setup.py \
+ --replace-fail '"trafilatura_gui=trafilatura.gui:main",' ""
+ substituteInPlace tests/cli_tests.py \
+ --replace-fail "trafilatura_bin = 'trafilatura'" "trafilatura_bin = '$out/bin/trafilatura'"
+ '';
+
+ build-system = [ setuptools ];
- propagatedBuildInputs = [
+ dependencies = [
certifi
charset-normalizer
courlan
@@ -39,34 +47,22 @@ buildPythonPackage rec {
urllib3
];
- nativeCheckInputs = [
- pytestCheckHook
- ];
+ nativeCheckInputs = [ pytestCheckHook ];
disabledTests = [
# Disable tests that require an internet connection
+ "test_cli_pipeline"
+ "test_crawl_page"
"test_download"
"test_fetch"
- "test_redirection"
"test_meta_redirections"
- "test_crawl_page"
- "test_whole"
"test_probing"
- "test_cli_pipeline"
+ "test_queue"
+ "test_redirection"
+ "test_whole"
];
- # patch out gui cli because it is not supported in this packaging
- # nixify path to the trafilatura binary in the test suite
- postPatch = ''
- substituteInPlace setup.py \
- --replace-fail '"trafilatura_gui=trafilatura.gui:main",' ""
- substituteInPlace tests/cli_tests.py \
- --replace-fail "trafilatura_bin = 'trafilatura'" "trafilatura_bin = '$out/bin/trafilatura'"
- '';
-
- pythonImportsCheck = [
- "trafilatura"
- ];
+ pythonImportsCheck = [ "trafilatura" ];
meta = with lib; {
description = "Python package and command-line tool designed to gather text on the Web";