summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarl Lorey <git@karllorey.com>2022-06-15 17:53:57 +0200
committerKarl Lorey <git@karllorey.com>2022-06-15 17:53:57 +0200
commit19bc07c8b780b23dc200644f048d5c0a74725d9a (patch)
tree486c1e3f4c269486b0074c9d263da1aa36efe6a4
parent7d7a07ea8baf7ee2af2a93b41ac42ee73d16fbda (diff)
Loop through all possible selectors when training ListScraper
-rw-r--r--mlscraper/training.py3
-rw-r--r--tests/test_training.py2
2 files changed, 2 insertions, 3 deletions
diff --git a/mlscraper/training.py b/mlscraper/training.py
index 38b15d9..fc80770 100644
--- a/mlscraper/training.py
+++ b/mlscraper/training.py
@@ -103,8 +103,7 @@ def train_scraper_for_matches(matches, roots):
# todo can be one of the parents
match_roots = [m.root for m in matches]
logging.info(f"{match_roots=}")
- selector = first(generate_selector_for_nodes(match_roots, roots))
- if selector:
+ for selector in generate_selector_for_nodes(match_roots, roots):
# for all the item_matches, create a tuple
# that contains the item_match and the new root
matches_and_roots = [
diff --git a/tests/test_training.py b/tests/test_training.py
index 49288a3..3e48756 100644
--- a/tests/test_training.py
+++ b/tests/test_training.py
@@ -16,7 +16,7 @@ def test_train_scraper_simple_list():
train_scraper(training_set)
-@pytest.mark.skip("fucking fails")
+@pytest.mark.skip("does not work yet")
def test_train_scraper(stackoverflow_samples):
training_set = TrainingSet()
for s in stackoverflow_samples: