diff options
author | Karl Lorey <git@karllorey.com> | 2022-06-22 09:26:27 +0200 |
---|---|---|
committer | Karl Lorey <git@karllorey.com> | 2022-06-22 09:26:27 +0200 |
commit | a0cb536c34d1b5e126882779832634b3875bcad1 (patch) | |
tree | 2fb417b5816b95e9ba1449169d9dd48cc662bb3b | |
parent | 8590a22ceb2e09cf108bd17f5b91a17f34d55fd3 (diff) |
Add code to README
-rw-r--r-- | README.rst | 31 |
1 files changed, 30 insertions, 1 deletions
@@ -56,10 +56,39 @@ You can also install the latest (unstable) development version of mlscraper via :code:`pip install git+https://github.com/lorey/mlscraper#egg=mlscraper`, e.g. to check new features or to see if a bug has been fixed already. Please note that until the 1.0 release :code:`pip install mlscraper` will return an outdated 0.* version. -Check the examples_ directory for usage examples until further documentation arrives. .. _examples: examples/ +To get started with a simple scraped, check out a basic sample below. + +.. code-block:: python + import requests + from mlscraper.html import Page + from mlscraper.samples import Sample, TrainingSet + from mlscraper.training import train_scraper + + # fetch the page to train + einstein_url = 'http://quotes.toscrape.com/author/Albert-Einstein/' + resp = requests.get(einstein_url) + assert resp.status_code == 200 + + # create a sample for Albert Einstein + training_set = TrainingSet() + page = Page(resp.content) + sample = Sample(page, {'name': 'Albert Einstein', 'born': 'March 14, 1879'}) + training_set.add_sample(sample) + + # train the scraper with the created training set + scraper = train_scraper(training_set) + + # scrape another page + resp = requests.get('http://quotes.toscrape.com/author/J-K-Rowling') + result = scraper.get(Page(resp.content)) + print(result) + # returns {'name': 'J.K. Rowling', 'born': 'July 31, 1965'} + +Check the examples_ directory for usage examples until further documentation arrives. + ----------- Development ----------- |