diff options
author | Karl Lorey <git@karllorey.com> | 2022-06-13 20:49:45 +0200 |
---|---|---|
committer | Karl Lorey <git@karllorey.com> | 2022-06-13 20:49:45 +0200 |
commit | d506295757c2136e98a3ea8d7988cb25644d7c7f (patch) | |
tree | 048451a6d00a88ee31948d3450b968a662dd4b82 | |
parent | 88d7101a2fe3ae008181d0f53c011bb990f29a8c (diff) |
Apply black and other styling
-rwxr-xr-x | docs/conf.py | 55 | ||||
-rw-r--r-- | mlscraper/samples.py | 4 | ||||
-rw-r--r-- | mlscraper/scrapers.py | 4 | ||||
-rw-r--r-- | mlscraper/selectors.py | 8 | ||||
-rw-r--r-- | mlscraper/training.py | 13 | ||||
-rw-r--r-- | mlscraper/util.py | 14 | ||||
-rw-r--r-- | setup.py | 5 | ||||
-rw-r--r-- | tests/static/multi/defined-with-icons.html | 2 | ||||
-rw-r--r-- | tests/static/multi/inline-result-page.html | 2 | ||||
-rw-r--r-- | tests/static/multi/multivalued-results.html | 2 | ||||
-rw-r--r-- | tests/static/multi/single-result-page.html | 2 | ||||
-rw-r--r-- | tests/static/single/basic/train.html | 2 | ||||
-rw-r--r-- | tests/test_samples.py | 9 | ||||
-rw-r--r-- | tests/test_scrapers.py | 9 | ||||
-rw-r--r-- | tests/test_selectors.py | 9 | ||||
-rw-r--r-- | tests/test_training.py | 1 | ||||
-rw-r--r-- | tests/test_util.py | 13 |
17 files changed, 78 insertions, 76 deletions
diff --git a/docs/conf.py b/docs/conf.py index d3d5556..5396add 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,6 @@ # # All configuration values have a default; values that are commented out # serve to show the default. - # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory is # relative to the documentation root, use os.path.abspath to make it @@ -19,7 +18,8 @@ # import os import sys -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) import mlscraper @@ -31,22 +31,22 @@ import mlscraper # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'mlscraper' +project = "mlscraper" copyright = "2020, Karl Lorey" author = "Karl Lorey" @@ -69,10 +69,10 @@ language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -83,7 +83,7 @@ todo_include_todos = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = "alabaster" # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the @@ -94,13 +94,13 @@ html_theme = 'alabaster' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for HTMLHelp output --------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'mlscraperdoc' +htmlhelp_basename = "mlscraperdoc" # -- Options for LaTeX output ------------------------------------------ @@ -109,15 +109,12 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -127,9 +124,7 @@ latex_elements = { # (source start file, target name, title, author, documentclass # [howto, manual, or own class]). latex_documents = [ - (master_doc, 'mlscraper.tex', - 'mlscraper Documentation', - 'Karl Lorey', 'manual'), + (master_doc, "mlscraper.tex", "mlscraper Documentation", "Karl Lorey", "manual"), ] @@ -137,11 +132,7 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'mlscraper', - 'mlscraper Documentation', - [author], 1) -] +man_pages = [(master_doc, "mlscraper", "mlscraper Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------- @@ -150,13 +141,13 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'mlscraper', - 'mlscraper Documentation', - author, - 'mlscraper', - 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "mlscraper", + "mlscraper Documentation", + author, + "mlscraper", + "One line description of project.", + "Miscellaneous", + ), ] - - - diff --git a/mlscraper/samples.py b/mlscraper/samples.py index 876ea44..1821966 100644 --- a/mlscraper/samples.py +++ b/mlscraper/samples.py @@ -1,7 +1,9 @@ import typing from itertools import product -from mlscraper.util import DictMatch, ListMatch, Page +from mlscraper.util import DictMatch +from mlscraper.util import ListMatch +from mlscraper.util import Page class ItemStructureException(Exception): diff --git a/mlscraper/scrapers.py b/mlscraper/scrapers.py index 06f534d..75700c1 100644 --- a/mlscraper/scrapers.py +++ b/mlscraper/scrapers.py @@ -1,6 +1,8 @@ import typing -from mlscraper.util import Extractor, Node, Selector +from mlscraper.util import Extractor +from mlscraper.util import Node +from mlscraper.util import Selector class Scraper: diff --git a/mlscraper/selectors.py b/mlscraper/selectors.py index 3e3ae82..dfbf5b1 100644 --- a/mlscraper/selectors.py +++ b/mlscraper/selectors.py @@ -2,10 +2,12 @@ import logging import typing from itertools import product -from more_itertools import flatten - from mlscraper.samples import Sample -from mlscraper.util import Matcher, Node, Page, Selector +from mlscraper.util import Matcher +from mlscraper.util import Node +from mlscraper.util import Page +from mlscraper.util import Selector +from more_itertools import flatten class CssRuleSelector(Selector): diff --git a/mlscraper/training.py b/mlscraper/training.py index f361613..d2872ae 100644 --- a/mlscraper/training.py +++ b/mlscraper/training.py @@ -2,9 +2,16 @@ import logging import typing from itertools import product -from mlscraper.samples import DictItem, Item, ListItem, Sample, ValueItem -from mlscraper.scrapers import DictScraper, ListScraper, ValueScraper -from mlscraper.selectors import generate_selector_for_nodes, make_matcher_for_samples +from mlscraper.samples import DictItem +from mlscraper.samples import Item +from mlscraper.samples import ListItem +from mlscraper.samples import Sample +from mlscraper.samples import ValueItem +from mlscraper.scrapers import DictScraper +from mlscraper.scrapers import ListScraper +from mlscraper.scrapers import ValueScraper +from mlscraper.selectors import generate_selector_for_nodes +from mlscraper.selectors import make_matcher_for_samples from mlscraper.util import Node diff --git a/mlscraper/util.py b/mlscraper/util.py index 67a04cd..4019667 100644 --- a/mlscraper/util.py +++ b/mlscraper/util.py @@ -1,8 +1,10 @@ import logging import typing -from itertools import combinations, product +from itertools import combinations +from itertools import product -from bs4 import BeautifulSoup, Tag +from bs4 import BeautifulSoup +from bs4 import Tag from more_itertools import powerset PARENT_NODE_COUNT_MAX = 2 @@ -162,7 +164,7 @@ class DictMatch(Match): def get_span(self) -> int: root = self.get_root() return sum( - [get_relative_depth(m.get_root(), root) for m in self.match_by_key.values()] + get_relative_depth(m.get_root(), root) for m in self.match_by_key.values() ) @@ -183,7 +185,7 @@ class ListMatch(Match): def get_span(self) -> int: return sum( - [get_relative_depth(m.get_root(), self.get_root()) for m in self.matches] + get_relative_depth(m.get_root(), self.get_root()) for m in self.matches ) @@ -313,9 +315,7 @@ def generate_node_selector(node): # use classes css_classes = node.attrs.get("class", []) for css_class_combo in powerset_max_length(css_classes, CSS_CLASS_COMBINATIONS_MAX): - css_clases_str = "".join( - [".{}".format(css_class) for css_class in css_class_combo] - ) + css_clases_str = "".join([f".{css_class}" for css_class in css_class_combo]) css_selector = node.name + css_clases_str yield css_selector @@ -1,8 +1,7 @@ #!/usr/bin/env python - """The setup script.""" - -from setuptools import setup, find_packages +from setuptools import find_packages +from setuptools import setup with open("README.rst") as readme_file: readme = readme_file.read() diff --git a/tests/static/multi/defined-with-icons.html b/tests/static/multi/defined-with-icons.html index 56879a4..ad2a4d9 100644 --- a/tests/static/multi/defined-with-icons.html +++ b/tests/static/multi/defined-with-icons.html @@ -23,4 +23,4 @@ <i class="fa fa-vcard"></i> All the kids give their email, but Frank leaves it blank </div> </body> -</html>
\ No newline at end of file +</html> diff --git a/tests/static/multi/inline-result-page.html b/tests/static/multi/inline-result-page.html index af1392b..281910a 100644 --- a/tests/static/multi/inline-result-page.html +++ b/tests/static/multi/inline-result-page.html @@ -22,4 +22,4 @@ <a href="https://google.de">Go here!</a> </body> -</html>
\ No newline at end of file +</html> diff --git a/tests/static/multi/multivalued-results.html b/tests/static/multi/multivalued-results.html index fe3ea28..7b14f53 100644 --- a/tests/static/multi/multivalued-results.html +++ b/tests/static/multi/multivalued-results.html @@ -26,4 +26,4 @@ </div> </div> </body> -</html>
\ No newline at end of file +</html> diff --git a/tests/static/multi/single-result-page.html b/tests/static/multi/single-result-page.html index 268c556..a9ffcfb 100644 --- a/tests/static/multi/single-result-page.html +++ b/tests/static/multi/single-result-page.html @@ -22,4 +22,4 @@ </div> </div> </body> -</html>
\ No newline at end of file +</html> diff --git a/tests/static/single/basic/train.html b/tests/static/single/basic/train.html index e134a7b..e46ffbd 100644 --- a/tests/static/single/basic/train.html +++ b/tests/static/single/basic/train.html @@ -5,4 +5,4 @@ <div class="person-description">Cool-looking guy</div> </div> </body> -</html>
\ No newline at end of file +</html> diff --git a/tests/test_samples.py b/tests/test_samples.py index 4903ba3..7023082 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1,7 +1,10 @@ import pytest - -from mlscraper.samples import ItemStructureException, Sample, make_training_set -from mlscraper.util import DictMatch, ListMatch, Page +from mlscraper.samples import ItemStructureException +from mlscraper.samples import make_training_set +from mlscraper.samples import Sample +from mlscraper.util import DictMatch +from mlscraper.util import ListMatch +from mlscraper.util import Page class TestTrainingSet: diff --git a/tests/test_scrapers.py b/tests/test_scrapers.py index 31da7e9..e341041 100644 --- a/tests/test_scrapers.py +++ b/tests/test_scrapers.py @@ -1,9 +1,12 @@ import pytest - from mlscraper.samples import Sample -from mlscraper.scrapers import DictScraper, ListScraper, ValueScraper +from mlscraper.scrapers import DictScraper +from mlscraper.scrapers import ListScraper +from mlscraper.scrapers import ValueScraper from mlscraper.selectors import CssRuleSelector -from mlscraper.util import AttributeValueExtractor, Page, TextValueExtractor +from mlscraper.util import AttributeValueExtractor +from mlscraper.util import Page +from mlscraper.util import TextValueExtractor @pytest.fixture diff --git a/tests/test_selectors.py b/tests/test_selectors.py index 952d57d..ac6510a 100644 --- a/tests/test_selectors.py +++ b/tests/test_selectors.py @@ -1,11 +1,8 @@ import pytest - from mlscraper.samples import Sample -from mlscraper.selectors import ( - generate_matchers_for_samples, - generate_selector_for_nodes, - make_matcher_for_samples, -) +from mlscraper.selectors import generate_matchers_for_samples +from mlscraper.selectors import generate_selector_for_nodes +from mlscraper.selectors import make_matcher_for_samples from mlscraper.util import Page diff --git a/tests/test_training.py b/tests/test_training.py index b90d8a0..ab310eb 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -1,5 +1,4 @@ import pytest - from mlscraper.samples import make_training_set from mlscraper.training import train_scraper from mlscraper.util import Page diff --git a/tests/test_util.py b/tests/test_util.py index c8b378d..056764c 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,12 +1,9 @@ from bs4 import BeautifulSoup - -from mlscraper.util import ( - AttributeValueExtractor, - Node, - Page, - _get_root_of_nodes, - get_attribute_extractor, -) +from mlscraper.util import _get_root_of_nodes +from mlscraper.util import AttributeValueExtractor +from mlscraper.util import get_attribute_extractor +from mlscraper.util import Node +from mlscraper.util import Page class TestPage: |