summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlejandro Gallo <aamsgallo@gmail.com>2019-01-19 23:32:09 +0100
committerAlejandro Gallo <aamsgallo@gmail.com>2019-01-19 23:32:09 +0100
commit611719105f39127a4384ab89173c10425b155b38 (patch)
tree6d37486f67a2ac6e5a38fcfdbf148070b8573d2e
parent74863f8bafad8b3b677b652d57ad2694478393c4 (diff)
Implement downloader for hal.fr
-rw-r--r--papis/downloaders/hal.py47
-rw-r--r--papis/downloaders/utils.py2
2 files changed, 48 insertions, 1 deletions
diff --git a/papis/downloaders/hal.py b/papis/downloaders/hal.py
new file mode 100644
index 00000000..c1f42d57
--- /dev/null
+++ b/papis/downloaders/hal.py
@@ -0,0 +1,47 @@
+import re
+import papis.downloaders.base
+import bs4
+
+
+class Downloader(papis.downloaders.base.Downloader):
+
+ def __init__(self, url):
+ papis.downloaders.base.Downloader.__init__(self, url, name="hal")
+ self.expected_document_extension = 'pdf'
+
+ @classmethod
+ def match(cls, url):
+ if re.match(r".*hal.*\.fr.*", url):
+ return Downloader(url)
+ else:
+ return False
+
+ def get_identifier(self):
+ """
+ >>> d = Downloader("http://www.hal.fr/2014TOU30305")
+ >>> d.get_identifier()
+ '2014TOU30305'
+ >>> d = Downloader("http://www.hal.fr/2014TOU30305.bib/?asdf=2")
+ >>> d.get_identifier()
+ '2014TOU30305'
+ """
+ m = re.match(r".*hal.fr/([^/?.&]+).*", self.url)
+ return m.group(1) if m is not None else None
+
+ def get_document_url(self):
+ """
+ >>> d = Downloader("https://hal.archives-ouvertes.fr/jpa-00205888?asf=")
+ >>> d.get_document_url()
+ 'https://hal.archives-ouvertes.fr/jpa-00205888/document'
+ """
+ url = re.sub(r'\?.*', '', self.get_url()) + '/document'
+ return url
+
+ def get_bibtex_url(self):
+ """
+ >>> d = Downloader("https://hal.archives-ouvertes.fr/jpa-00205888?asf=")
+ >>> d.get_bibtex_url()
+ 'https://hal.archives-ouvertes.fr/jpa-00205888/bibtex'
+ """
+ url = re.sub(r'\?.*', '', self.get_url()) + '/bibtex'
+ return url
diff --git a/papis/downloaders/utils.py b/papis/downloaders/utils.py
index 502b6232..2b6fdb66 100644
--- a/papis/downloaders/utils.py
+++ b/papis/downloaders/utils.py
@@ -10,7 +10,7 @@ logger = logging.getLogger("downloader")
def get_available_downloaders():
names = [
"aps", "acs", "arxiv", "ieee", "scitationaip", "annualreviews",
- "iopscience", "libgen", "get", "thesesfr", "frontiersin",
+ "iopscience", "libgen", "get", "thesesfr", "hal", "frontiersin",
"worldscientific",
]
downloaders = []