summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarl Lorey <git@karllorey.com>2022-06-13 20:49:45 +0200
committerKarl Lorey <git@karllorey.com>2022-06-13 20:49:45 +0200
commitd506295757c2136e98a3ea8d7988cb25644d7c7f (patch)
tree048451a6d00a88ee31948d3450b968a662dd4b82
parent88d7101a2fe3ae008181d0f53c011bb990f29a8c (diff)
Apply black and other styling
-rwxr-xr-xdocs/conf.py55
-rw-r--r--mlscraper/samples.py4
-rw-r--r--mlscraper/scrapers.py4
-rw-r--r--mlscraper/selectors.py8
-rw-r--r--mlscraper/training.py13
-rw-r--r--mlscraper/util.py14
-rw-r--r--setup.py5
-rw-r--r--tests/static/multi/defined-with-icons.html2
-rw-r--r--tests/static/multi/inline-result-page.html2
-rw-r--r--tests/static/multi/multivalued-results.html2
-rw-r--r--tests/static/multi/single-result-page.html2
-rw-r--r--tests/static/single/basic/train.html2
-rw-r--r--tests/test_samples.py9
-rw-r--r--tests/test_scrapers.py9
-rw-r--r--tests/test_selectors.py9
-rw-r--r--tests/test_training.py1
-rw-r--r--tests/test_util.py13
17 files changed, 78 insertions, 76 deletions
diff --git a/docs/conf.py b/docs/conf.py
index d3d5556..5396add 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -11,7 +11,6 @@
#
# All configuration values have a default; values that are commented out
# serve to show the default.
-
# If extensions (or modules to document with autodoc) are in another
# directory, add these directories to sys.path here. If the directory is
# relative to the documentation root, use os.path.abspath to make it
@@ -19,7 +18,8 @@
#
import os
import sys
-sys.path.insert(0, os.path.abspath('..'))
+
+sys.path.insert(0, os.path.abspath(".."))
import mlscraper
@@ -31,22 +31,22 @@ import mlscraper
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
+extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
-project = 'mlscraper'
+project = "mlscraper"
copyright = "2020, Karl Lorey"
author = "Karl Lorey"
@@ -69,10 +69,10 @@ language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
@@ -83,7 +83,7 @@ todo_include_todos = False
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-html_theme = 'alabaster'
+html_theme = "alabaster"
# Theme options are theme-specific and customize the look and feel of a
# theme further. For a list of options available for each theme, see the
@@ -94,13 +94,13 @@ html_theme = 'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
# -- Options for HTMLHelp output ---------------------------------------
# Output file base name for HTML help builder.
-htmlhelp_basename = 'mlscraperdoc'
+htmlhelp_basename = "mlscraperdoc"
# -- Options for LaTeX output ------------------------------------------
@@ -109,15 +109,12 @@ latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
-
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
-
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
-
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
@@ -127,9 +124,7 @@ latex_elements = {
# (source start file, target name, title, author, documentclass
# [howto, manual, or own class]).
latex_documents = [
- (master_doc, 'mlscraper.tex',
- 'mlscraper Documentation',
- 'Karl Lorey', 'manual'),
+ (master_doc, "mlscraper.tex", "mlscraper Documentation", "Karl Lorey", "manual"),
]
@@ -137,11 +132,7 @@ latex_documents = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
-man_pages = [
- (master_doc, 'mlscraper',
- 'mlscraper Documentation',
- [author], 1)
-]
+man_pages = [(master_doc, "mlscraper", "mlscraper Documentation", [author], 1)]
# -- Options for Texinfo output ----------------------------------------
@@ -150,13 +141,13 @@ man_pages = [
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- (master_doc, 'mlscraper',
- 'mlscraper Documentation',
- author,
- 'mlscraper',
- 'One line description of project.',
- 'Miscellaneous'),
+ (
+ master_doc,
+ "mlscraper",
+ "mlscraper Documentation",
+ author,
+ "mlscraper",
+ "One line description of project.",
+ "Miscellaneous",
+ ),
]
-
-
-
diff --git a/mlscraper/samples.py b/mlscraper/samples.py
index 876ea44..1821966 100644
--- a/mlscraper/samples.py
+++ b/mlscraper/samples.py
@@ -1,7 +1,9 @@
import typing
from itertools import product
-from mlscraper.util import DictMatch, ListMatch, Page
+from mlscraper.util import DictMatch
+from mlscraper.util import ListMatch
+from mlscraper.util import Page
class ItemStructureException(Exception):
diff --git a/mlscraper/scrapers.py b/mlscraper/scrapers.py
index 06f534d..75700c1 100644
--- a/mlscraper/scrapers.py
+++ b/mlscraper/scrapers.py
@@ -1,6 +1,8 @@
import typing
-from mlscraper.util import Extractor, Node, Selector
+from mlscraper.util import Extractor
+from mlscraper.util import Node
+from mlscraper.util import Selector
class Scraper:
diff --git a/mlscraper/selectors.py b/mlscraper/selectors.py
index 3e3ae82..dfbf5b1 100644
--- a/mlscraper/selectors.py
+++ b/mlscraper/selectors.py
@@ -2,10 +2,12 @@ import logging
import typing
from itertools import product
-from more_itertools import flatten
-
from mlscraper.samples import Sample
-from mlscraper.util import Matcher, Node, Page, Selector
+from mlscraper.util import Matcher
+from mlscraper.util import Node
+from mlscraper.util import Page
+from mlscraper.util import Selector
+from more_itertools import flatten
class CssRuleSelector(Selector):
diff --git a/mlscraper/training.py b/mlscraper/training.py
index f361613..d2872ae 100644
--- a/mlscraper/training.py
+++ b/mlscraper/training.py
@@ -2,9 +2,16 @@ import logging
import typing
from itertools import product
-from mlscraper.samples import DictItem, Item, ListItem, Sample, ValueItem
-from mlscraper.scrapers import DictScraper, ListScraper, ValueScraper
-from mlscraper.selectors import generate_selector_for_nodes, make_matcher_for_samples
+from mlscraper.samples import DictItem
+from mlscraper.samples import Item
+from mlscraper.samples import ListItem
+from mlscraper.samples import Sample
+from mlscraper.samples import ValueItem
+from mlscraper.scrapers import DictScraper
+from mlscraper.scrapers import ListScraper
+from mlscraper.scrapers import ValueScraper
+from mlscraper.selectors import generate_selector_for_nodes
+from mlscraper.selectors import make_matcher_for_samples
from mlscraper.util import Node
diff --git a/mlscraper/util.py b/mlscraper/util.py
index 67a04cd..4019667 100644
--- a/mlscraper/util.py
+++ b/mlscraper/util.py
@@ -1,8 +1,10 @@
import logging
import typing
-from itertools import combinations, product
+from itertools import combinations
+from itertools import product
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup
+from bs4 import Tag
from more_itertools import powerset
PARENT_NODE_COUNT_MAX = 2
@@ -162,7 +164,7 @@ class DictMatch(Match):
def get_span(self) -> int:
root = self.get_root()
return sum(
- [get_relative_depth(m.get_root(), root) for m in self.match_by_key.values()]
+ get_relative_depth(m.get_root(), root) for m in self.match_by_key.values()
)
@@ -183,7 +185,7 @@ class ListMatch(Match):
def get_span(self) -> int:
return sum(
- [get_relative_depth(m.get_root(), self.get_root()) for m in self.matches]
+ get_relative_depth(m.get_root(), self.get_root()) for m in self.matches
)
@@ -313,9 +315,7 @@ def generate_node_selector(node):
# use classes
css_classes = node.attrs.get("class", [])
for css_class_combo in powerset_max_length(css_classes, CSS_CLASS_COMBINATIONS_MAX):
- css_clases_str = "".join(
- [".{}".format(css_class) for css_class in css_class_combo]
- )
+ css_clases_str = "".join([f".{css_class}" for css_class in css_class_combo])
css_selector = node.name + css_clases_str
yield css_selector
diff --git a/setup.py b/setup.py
index 42a1166..1ff2c43 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-
"""The setup script."""
-
-from setuptools import setup, find_packages
+from setuptools import find_packages
+from setuptools import setup
with open("README.rst") as readme_file:
readme = readme_file.read()
diff --git a/tests/static/multi/defined-with-icons.html b/tests/static/multi/defined-with-icons.html
index 56879a4..ad2a4d9 100644
--- a/tests/static/multi/defined-with-icons.html
+++ b/tests/static/multi/defined-with-icons.html
@@ -23,4 +23,4 @@
<i class="fa fa-vcard"></i> All the kids give their email, but Frank leaves it blank
</div>
</body>
-</html> \ No newline at end of file
+</html>
diff --git a/tests/static/multi/inline-result-page.html b/tests/static/multi/inline-result-page.html
index af1392b..281910a 100644
--- a/tests/static/multi/inline-result-page.html
+++ b/tests/static/multi/inline-result-page.html
@@ -22,4 +22,4 @@
<a href="https://google.de">Go here!</a>
</body>
-</html> \ No newline at end of file
+</html>
diff --git a/tests/static/multi/multivalued-results.html b/tests/static/multi/multivalued-results.html
index fe3ea28..7b14f53 100644
--- a/tests/static/multi/multivalued-results.html
+++ b/tests/static/multi/multivalued-results.html
@@ -26,4 +26,4 @@
</div>
</div>
</body>
-</html> \ No newline at end of file
+</html>
diff --git a/tests/static/multi/single-result-page.html b/tests/static/multi/single-result-page.html
index 268c556..a9ffcfb 100644
--- a/tests/static/multi/single-result-page.html
+++ b/tests/static/multi/single-result-page.html
@@ -22,4 +22,4 @@
</div>
</div>
</body>
-</html> \ No newline at end of file
+</html>
diff --git a/tests/static/single/basic/train.html b/tests/static/single/basic/train.html
index e134a7b..e46ffbd 100644
--- a/tests/static/single/basic/train.html
+++ b/tests/static/single/basic/train.html
@@ -5,4 +5,4 @@
<div class="person-description">Cool-looking guy</div>
</div>
</body>
-</html> \ No newline at end of file
+</html>
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 4903ba3..7023082 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1,7 +1,10 @@
import pytest
-
-from mlscraper.samples import ItemStructureException, Sample, make_training_set
-from mlscraper.util import DictMatch, ListMatch, Page
+from mlscraper.samples import ItemStructureException
+from mlscraper.samples import make_training_set
+from mlscraper.samples import Sample
+from mlscraper.util import DictMatch
+from mlscraper.util import ListMatch
+from mlscraper.util import Page
class TestTrainingSet:
diff --git a/tests/test_scrapers.py b/tests/test_scrapers.py
index 31da7e9..e341041 100644
--- a/tests/test_scrapers.py
+++ b/tests/test_scrapers.py
@@ -1,9 +1,12 @@
import pytest
-
from mlscraper.samples import Sample
-from mlscraper.scrapers import DictScraper, ListScraper, ValueScraper
+from mlscraper.scrapers import DictScraper
+from mlscraper.scrapers import ListScraper
+from mlscraper.scrapers import ValueScraper
from mlscraper.selectors import CssRuleSelector
-from mlscraper.util import AttributeValueExtractor, Page, TextValueExtractor
+from mlscraper.util import AttributeValueExtractor
+from mlscraper.util import Page
+from mlscraper.util import TextValueExtractor
@pytest.fixture
diff --git a/tests/test_selectors.py b/tests/test_selectors.py
index 952d57d..ac6510a 100644
--- a/tests/test_selectors.py
+++ b/tests/test_selectors.py
@@ -1,11 +1,8 @@
import pytest
-
from mlscraper.samples import Sample
-from mlscraper.selectors import (
- generate_matchers_for_samples,
- generate_selector_for_nodes,
- make_matcher_for_samples,
-)
+from mlscraper.selectors import generate_matchers_for_samples
+from mlscraper.selectors import generate_selector_for_nodes
+from mlscraper.selectors import make_matcher_for_samples
from mlscraper.util import Page
diff --git a/tests/test_training.py b/tests/test_training.py
index b90d8a0..ab310eb 100644
--- a/tests/test_training.py
+++ b/tests/test_training.py
@@ -1,5 +1,4 @@
import pytest
-
from mlscraper.samples import make_training_set
from mlscraper.training import train_scraper
from mlscraper.util import Page
diff --git a/tests/test_util.py b/tests/test_util.py
index c8b378d..056764c 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,12 +1,9 @@
from bs4 import BeautifulSoup
-
-from mlscraper.util import (
- AttributeValueExtractor,
- Node,
- Page,
- _get_root_of_nodes,
- get_attribute_extractor,
-)
+from mlscraper.util import _get_root_of_nodes
+from mlscraper.util import AttributeValueExtractor
+from mlscraper.util import get_attribute_extractor
+from mlscraper.util import Node
+from mlscraper.util import Page
class TestPage: