summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarl Lorey <git@karllorey.com>2022-06-15 18:01:51 +0200
committerKarl Lorey <git@karllorey.com>2022-06-15 18:01:51 +0200
commit26e96a8e4e306bf350dc2f6d6b379d9509d18198 (patch)
treeed91d1938f1a01216ca92544c12de81c89edaea7
parent19bc07c8b780b23dc200644f048d5c0a74725d9a (diff)
Revert previous commit and add comment why
-rw-r--r--mlscraper/training.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/mlscraper/training.py b/mlscraper/training.py
index fc80770..8512682 100644
--- a/mlscraper/training.py
+++ b/mlscraper/training.py
@@ -103,7 +103,12 @@ def train_scraper_for_matches(matches, roots):
# todo can be one of the parents
match_roots = [m.root for m in matches]
logging.info(f"{match_roots=}")
- for selector in generate_selector_for_nodes(match_roots, roots):
+
+ # first selector is fine as it matches perfectly
+ # no need to try other selectors
+ # -> item_scraper would be the same
+ selector = first(generate_selector_for_nodes(match_roots, roots))
+ if selector:
# for all the item_matches, create a tuple
# that contains the item_match and the new root
matches_and_roots = [
@@ -116,5 +121,7 @@ def train_scraper_for_matches(matches, roots):
list(item_matches), list(list_roots)
)
return ListScraper(selector, item_scraper)
+ else:
+ raise NoScraperFoundException()
else:
raise RuntimeError(f"type not matched: {found_type}")