From 66010479022c6603e7feedbf868312b27bfa1838 Mon Sep 17 00:00:00 2001 From: beaufilx <Est1tiam2y@24> Date: Fri, 31 Jan 2025 18:10:56 +0100 Subject: [PATCH 1/4] =?UTF-8?q?AdsQuery=20externalis=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ptf/external/ads.py | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/ptf/external/ads.py diff --git a/src/ptf/external/ads.py b/src/ptf/external/ads.py new file mode 100644 index 000000000..5421fa207 --- /dev/null +++ b/src/ptf/external/ads.py @@ -0,0 +1,78 @@ +from requests_cache import CachedSession +from requests_cache import FileCache + +import json + +from datetime import timedelta +from django.conf import settings + +session = CachedSession( + backend=FileCache( + getattr(settings, "REQUESTS_CACHE_LOCATION", None) or "/tmp/ptf_requests_cache", + decode_content=False, + ), + match_headers={ + "Authorization": "Bearer " + settings.ADS_TOKEN, + "User-Agent": getattr(settings, "REQUESTS_USER_AGENT", None) or "Mathdoc/1.0.0", + "From": getattr(settings, "REQUESTS_EMAIL", None) or "accueil@listes.mathdoc.fr", + }, + expire_after=timedelta(days=30), +) + + +class AdsQuery: + def __init__(self): + self._start = 0 + self._count = 0 + self._present = {} + + def query_referencing(self, pub: str): + while self._count >= self._start: + data = self._query(pub) + self._load(data) + return self._present + + def _load(self, _results): + for result in _results: + if 'doi' in result: + for doi in result["doi"]: + if doi[:7] == '10.5802': + self._present[doi] = AdsArticle(doi, result['title'][0], result['pub']) + else: + print(result) + + def _query(self, pub: str): + url = "https://api.adsabs.harvard.edu/v1/search/query" + query_param = { + "fl": "doi,title,pub", + "rows": "200", + "start": self._start, + "q": f"pub:\"{pub}\"", + } + response = session.get(url, params=query_param, headers={"Authorization": "Bearer " + settings.ADS_TOKEN}) + response.raise_for_status() + data = json.loads(response.text) + self._count = data["response"]["numFound"] + self._start += 200 + return data["response"]["docs"] + + +class AdsArticle: + _doi: str + _pub: str + _title: str + + def __init__(self, doi, title, pub): + self._title = title + self._doi = doi + self._pub = pub + + def doi(self) -> str: + return self._doi + + def title(self) -> str: + return self._title + + def pub(self) -> str: + return self._pub + -- GitLab From 7b08a6ec369c7de5ba4d9626567b8de577a39689 Mon Sep 17 00:00:00 2001 From: beaufilx <Est1tiam2y@24> Date: Thu, 6 Feb 2025 09:24:10 +0100 Subject: [PATCH 2/4] Recherche par ISSN --- src/ptf/external/ads.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ptf/external/ads.py b/src/ptf/external/ads.py index 5421fa207..6e0af2b83 100644 --- a/src/ptf/external/ads.py +++ b/src/ptf/external/ads.py @@ -2,10 +2,11 @@ from requests_cache import CachedSession from requests_cache import FileCache import json - from datetime import timedelta from django.conf import settings +from ptf.model_data import ArticleData + session = CachedSession( backend=FileCache( getattr(settings, "REQUESTS_CACHE_LOCATION", None) or "/tmp/ptf_requests_cache", @@ -37,17 +38,17 @@ class AdsQuery: if 'doi' in result: for doi in result["doi"]: if doi[:7] == '10.5802': - self._present[doi] = AdsArticle(doi, result['title'][0], result['pub']) + self._present[doi] = ArticleData(doi=doi, title_html=result['title'][0], pub=result['pub']) else: print(result) - def _query(self, pub: str): + def _query(self, issn: str): url = "https://api.adsabs.harvard.edu/v1/search/query" query_param = { "fl": "doi,title,pub", "rows": "200", "start": self._start, - "q": f"pub:\"{pub}\"", + "q": f"issn:\"{issn}\"", } response = session.get(url, params=query_param, headers={"Authorization": "Bearer " + settings.ADS_TOKEN}) response.raise_for_status() -- GitLab From 79357e971f6cbdecc51b56252672d2a47ead407d Mon Sep 17 00:00:00 2001 From: beaufilx <Est1tiam2y@24> Date: Mon, 10 Feb 2025 10:35:10 +0100 Subject: [PATCH 3/4] =?UTF-8?q?Acces=20=C3=A0=20ADS=5FTOKEN?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ptf/external/ads.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ptf/external/ads.py b/src/ptf/external/ads.py index 6e0af2b83..c4aeba0f7 100644 --- a/src/ptf/external/ads.py +++ b/src/ptf/external/ads.py @@ -43,6 +43,7 @@ class AdsQuery: print(result) def _query(self, issn: str): + ads_token = getattr(settings, "ADS_TOKEN", None) url = "https://api.adsabs.harvard.edu/v1/search/query" query_param = { "fl": "doi,title,pub", @@ -50,7 +51,7 @@ class AdsQuery: "start": self._start, "q": f"issn:\"{issn}\"", } - response = session.get(url, params=query_param, headers={"Authorization": "Bearer " + settings.ADS_TOKEN}) + response = session.get(url, params=query_param, headers={"Authorization": "Bearer " + ads_token}) response.raise_for_status() data = json.loads(response.text) self._count = data["response"]["numFound"] -- GitLab From b48158aa405a96c0037d55534270fdfcd2254141 Mon Sep 17 00:00:00 2001 From: beaufilx <Est1tiam2y@24> Date: Tue, 11 Feb 2025 10:35:36 +0100 Subject: [PATCH 4/4] =?UTF-8?q?Acces=20=C3=A0=20ADS=5FTOKEN?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ptf/external/ads.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ptf/external/ads.py b/src/ptf/external/ads.py index c4aeba0f7..574260da3 100644 --- a/src/ptf/external/ads.py +++ b/src/ptf/external/ads.py @@ -13,7 +13,7 @@ session = CachedSession( decode_content=False, ), match_headers={ - "Authorization": "Bearer " + settings.ADS_TOKEN, + "Authorization": "Bearer " + getattr(settings, "ADS_TOKEN", ""), "User-Agent": getattr(settings, "REQUESTS_USER_AGENT", None) or "Mathdoc/1.0.0", "From": getattr(settings, "REQUESTS_EMAIL", None) or "accueil@listes.mathdoc.fr", }, -- GitLab