summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarl Lorey <git@karllorey.com>2020-09-28 18:55:03 +0200
committerKarl Lorey <git@karllorey.com>2020-09-28 18:55:03 +0200
commit3bf2729bc93c088afcd4707978989f6f5a72dd77 (patch)
tree797581d8a3561d901ba1bf8571308c2a686549d3
parent6ab0b458236a7eb673f525498d850f8039f137c0 (diff)
Fix readme code sample
-rw-r--r--README.rst8
1 files changed, 5 insertions, 3 deletions
diff --git a/README.rst b/README.rst
index 50709c4..c59b897 100644
--- a/README.rst
+++ b/README.rst
@@ -45,21 +45,23 @@ After you've defined the data you want to scrape, mlscraper will:
.. code:: python
+ import requests
+
from mlscraper import RuleBasedSingleItemScraper
from mlscraper.training import SingleItemPageSample
# the items found on the training page
- targets = [
+ targets = {
"https://test.com/article/1": {"title": "One great result!", "description": "Some description"},
"https://test.com/article/2": {"title": "Another great result!", "description": "Another description"},
"https://test.com/article/3": {"title": "Result to be found", "description": "Description to crawl"},
- ]
+ }
# fetch html and create samples
samples = [SingleItemPageSample(requests.get(url).content, targets[url]) for url in targets]
# training the scraper with the items
- scraper = MultiItemScraper.build([sample])
+ scraper = RuleBasedSingleItemScraper.build(samples)
# apply the learned rules and extract new item automatically
result = scraper.scrape(requests.get('https://test.com/article/4'))