diff options
author | Alejandro Gallo <aamsgallo@gmail.com> | 2019-01-19 23:32:09 +0100 |
---|---|---|
committer | Alejandro Gallo <aamsgallo@gmail.com> | 2019-01-19 23:32:09 +0100 |
commit | 611719105f39127a4384ab89173c10425b155b38 (patch) | |
tree | 6d37486f67a2ac6e5a38fcfdbf148070b8573d2e | |
parent | 74863f8bafad8b3b677b652d57ad2694478393c4 (diff) |
Implement downloader for hal.fr
-rw-r--r-- | papis/downloaders/hal.py | 47 | ||||
-rw-r--r-- | papis/downloaders/utils.py | 2 |
2 files changed, 48 insertions, 1 deletions
diff --git a/papis/downloaders/hal.py b/papis/downloaders/hal.py new file mode 100644 index 00000000..c1f42d57 --- /dev/null +++ b/papis/downloaders/hal.py @@ -0,0 +1,47 @@ +import re +import papis.downloaders.base +import bs4 + + +class Downloader(papis.downloaders.base.Downloader): + + def __init__(self, url): + papis.downloaders.base.Downloader.__init__(self, url, name="hal") + self.expected_document_extension = 'pdf' + + @classmethod + def match(cls, url): + if re.match(r".*hal.*\.fr.*", url): + return Downloader(url) + else: + return False + + def get_identifier(self): + """ + >>> d = Downloader("http://www.hal.fr/2014TOU30305") + >>> d.get_identifier() + '2014TOU30305' + >>> d = Downloader("http://www.hal.fr/2014TOU30305.bib/?asdf=2") + >>> d.get_identifier() + '2014TOU30305' + """ + m = re.match(r".*hal.fr/([^/?.&]+).*", self.url) + return m.group(1) if m is not None else None + + def get_document_url(self): + """ + >>> d = Downloader("https://hal.archives-ouvertes.fr/jpa-00205888?asf=") + >>> d.get_document_url() + 'https://hal.archives-ouvertes.fr/jpa-00205888/document' + """ + url = re.sub(r'\?.*', '', self.get_url()) + '/document' + return url + + def get_bibtex_url(self): + """ + >>> d = Downloader("https://hal.archives-ouvertes.fr/jpa-00205888?asf=") + >>> d.get_bibtex_url() + 'https://hal.archives-ouvertes.fr/jpa-00205888/bibtex' + """ + url = re.sub(r'\?.*', '', self.get_url()) + '/bibtex' + return url diff --git a/papis/downloaders/utils.py b/papis/downloaders/utils.py index 502b6232..2b6fdb66 100644 --- a/papis/downloaders/utils.py +++ b/papis/downloaders/utils.py @@ -10,7 +10,7 @@ logger = logging.getLogger("downloader") def get_available_downloaders(): names = [ "aps", "acs", "arxiv", "ieee", "scitationaip", "annualreviews", - "iopscience", "libgen", "get", "thesesfr", "frontiersin", + "iopscience", "libgen", "get", "thesesfr", "hal", "frontiersin", "worldscientific", ] downloaders = [] |