From a4214f649545fdd1cd7f34cf58a9c63400b45f08 Mon Sep 17 00:00:00 2001 From: Pablo Garcia Campos <pablo.garcia-campos@univ-grenoble-alpes.fr> Date: Thu, 13 Feb 2025 11:50:20 +0100 Subject: [PATCH 1/2] Refactor service file --- resif_datacite_cli/service.py | 292 ++++++++++++++++++---------------- 1 file changed, 151 insertions(+), 141 deletions(-) diff --git a/resif_datacite_cli/service.py b/resif_datacite_cli/service.py index b22197d..025ef90 100644 --- a/resif_datacite_cli/service.py +++ b/resif_datacite_cli/service.py @@ -68,7 +68,7 @@ class ServiceDoi(object): success_step = self.validate(os.path.join(path, path_item), stop_on_error, offline) success_all = success_all and success_step if stop_on_error and not success_all: - return success_all + break return success_all # FILE @@ -76,75 +76,17 @@ class ServiceDoi(object): logger.info("Checking '%s'..." % path) # Offline validation steps - ########################### - - # Load the file and check the structure - try: - doi = DoiFactory.factory(self._reader, path) - logger.success("The XML structure is valid") - except ReaderErrorDocumentInvalid as e: - logger.error("The XML structure is invalid") - logger.error(e) - logger.warning("The file '%s' should NOT be uploaded!" % path) - return False - except ReaderFactoryError as e: - logger.error("Unable to read file") - logger.debug(e) - return False - - # Check the identifier - try: - doi.validate() - logger.success('The DOI is valid') - except DoiErrorIdentifierInvalid as e: - logger.error(e) + _valid, doi = self._get_validated_doi_obj_from_file(path) + if not _valid: logger.warning("The file '%s' should NOT be uploaded!" % path) return False logger.success("The file '%s' is ready to upload!" % path) # Online validation steps - ########################### if not offline: - - # Check if identifier is already registered - try: - location = self.client.doi_get(doi.identifier) - logger.success("The DOI is already registered on DataCite") - - # Check the landing page - logger.debug('Registered landing page: %s' % location) - if location == doi.location(self.base_url): - logger.success("The registered landing page URL is valid") - else: - logger.debug("Generated landing page: %s" % doi.location(self.base_url)) - logger.warning("The registered landing page URL is invalid") - except DataCiteNotFoundError as e: - logger.warning("The DOI '%s' is not registered yet on DataCite" % doi.identifier) - except DataCiteRequestError as e: - logger.error("Datacite Error: %s" % e) - except DataCiteServerError as e: - logger.error("Datacite Server Error") - logger.debug(e) - except HttpError as e: - logger.error("Datacite Connection Error") - logger.debug(e) - - # Check the metadata - try: - xml_metadata = self.client.metadata_get(doi.identifier) - logger.success("The DOI metadata are available on DataCite") - logger.debug('Metadata: %s' % xml_metadata) - except DataCiteNotFoundError as e: - logger.warning("No metadata found for DOI '%s' on DataCite" % doi.identifier) - except DataCiteRequestError as e: - logger.error("Datacite Error: %s" % e) - except DataCiteServerError as e: - logger.error("Datacite Server Error") - logger.debug(e) - except HttpError as e: - logger.error("Datacite Connection Error") - logger.debug(e) + self._check_if_online_and_landing_page(doi) + self._get_xml_metadata(doi.identifier) # All is fine return True @@ -174,7 +116,7 @@ class ServiceDoi(object): success_step = self.upload(os.path.join(path, path_item), stop_on_error) success_all = success_all and success_step if stop_on_error and not success_all: - return success_all + break return success_all # FILE @@ -182,26 +124,8 @@ class ServiceDoi(object): logger.info("Checking '%s'..." % path) # Load the file and check the structure - try: - doi = DoiFactory.factory(self._reader, path) - logger.success("The XML structure is valid") - except ReaderErrorDocumentInvalid as e: - logger.error("The XML structure is invalid") - logger.error(e) - logger.warning("The file '%s' has NOT been uploaded on DataCite" % path) - return False - except ReaderFactoryError as e: - logger.error("Unable to read the file '%s'" % path) - logger.debug(e) - logger.warning("The file '%s' has NOT been uploaded on DataCite" % path) - return False - - # Check the identifier - try: - doi.validate() - logger.success("The DOI is valid") - except DoiErrorIdentifierInvalid as e: - logger.error(e) + _valid, doi = self._get_validated_doi_obj_from_file(path=path) + if not _valid: logger.warning("The file '%s' has NOT been uploaded on DataCite" % path) return False @@ -209,29 +133,8 @@ class ServiceDoi(object): logger.info("Uploading '%s'..." % path) # Submit to DataCite - try: - # Publish metadata for the DOI - identifier = doi.identifier - logger.debug("Pushing the XML metadata file for '%s' on DataCite" % identifier) - self.client.metadata_post(doi.content) - logger.success("The file '%s' has been pushed to DataCite" % path) - - # Register/Update the DOI - location = doi.location(self.base_url) - logger.debug("Registering the DOI '%s' on DataCite with landing page URL: %s" % (identifier, location)) - self.client.doi_post(identifier, location) - logger.success("The DOI '%s' has been registered on DataCite and linked to '%s'" % (identifier, location)) - - except DataCiteRequestError as e: - logger.error('Datacite Error: %s' % e) - return False - except DataCiteServerError as e: - logger.error('Datacite Server Error') - logger.debug(e) - return False - except HttpError as e: - logger.error('Datacite Connection Error') - logger.debug(e) + valid = self._publish_metadata(doi=doi) + if not valid: return False # All is fine @@ -260,62 +163,169 @@ class ServiceDoi(object): logger.info("Downloading DOI '%s' from DataCite..." % identifier) # Check if identifier is already registered - try: - location = self.client.doi_get(identifier) - logger.success("The landing page of DOI '%s' registered on DataCite is '%s'" % (identifier, location)) - except DataCiteNotFoundError as e: - logger.warning("The DOI '%s' is not registered on DataCite" % identifier) + location = self._get_landing_page(identifier=identifier) + if location is None: return False - except DataCiteRequestError as e: - logger.error('Datacite Error: %s' % e) + + xml_metadata = self._get_xml_metadata(identifier) + if xml_metadata is None: return False + + output_path = self._build_output_path(output_path, suffix) + + logger.debug("Output path: %s" % output_path) + with open(output_path, "w") as f: + f.write(xml_metadata) + + logger.success("The metadata of DOI '%s' available on DataCite have been downloaded as '%s'" % (identifier, output_path)) + + + # All is fine + return True + + # region Util methods + + def _publish_metadata(self, doi : DoiAbstract) -> bool: + """ + Publish the metadata of the doi object to Datacite website. + + :param doi: DOI metadata object to save + :return: whether the publication was successful + """ + try: + # Publish metadata for the DOI + identifier = doi.identifier + logger.debug("Pushing the XML metadata file for '%s' on DataCite" % identifier) + self.client.metadata_post(doi.content) + logger.success("The metadata has been pushed to DataCite") + + # Register/Update the DOI + location = doi.location(self.base_url) + logger.debug("Registering the DOI '%s' on DataCite with landing page URL: %s" % (identifier, location)) + self.client.doi_post(identifier, location) + logger.success("The DOI '%s' has been registered on DataCite and linked to '%s'" % (identifier, location)) + return True + except DataCiteRequestError as e: + logger.error("Datacite Error: %s" % e) except DataCiteServerError as e: - logger.error('Datacite Server Error') + logger.error("Datacite Server Error") logger.debug(e) - return False except HttpError as e: - logger.error('Datacite Connection Error') + logger.error("Datacite Connection Error") logger.debug(e) - return False + return False + + def _get_validated_doi_obj_from_file(self, path : str) -> (bool, DoiAbstract | None): + """ + Tries to parse the xml file into a DOI object, then validates it and returns a couple of (valid, DOI). - # Check the metadata + :param path: The path of XML file to read and parse + + :return: a couple (valid, DOI object). If not valid, DOI object is None. + """ + try: + doi = DoiFactory.factory(self._reader, path) + logger.success("The XML structure is valid") + doi.validate() + logger.success("The DOI is valid") + return True, doi + except DoiErrorIdentifierInvalid as e: + logger.error(e) + except ReaderErrorDocumentInvalid as e: + logger.error("The XML structure is invalid") + logger.error(e) + return False, None + except ReaderFactoryError as e: + logger.error("Unable to read file") + logger.debug(e) + return False, None + + def _get_xml_metadata(self, identifier : str ) -> str | None : + """ + It fetches the xml metadata from the Datacite website corresponding to the given identifier and return it. + If it was not possible, None is returned instead. + + :param identifier: identifier of the resource. + :return: xml metadata or None it the resource metadata could not be fetched. + """ try: xml_metadata = self.client.metadata_get(identifier) + logger.success("The DOI metadata are available on DataCite") logger.debug("Metadata: %s" % xml_metadata) + return xml_metadata + except DataCiteNotFoundError as e: + logger.warning("No metadata found for DOI '%s' on DataCite" % identifier) + except DataCiteRequestError as e: + logger.error("Datacite Error: %s" % e) + except DataCiteServerError as e: + logger.error("Datacite Server Error") + logger.debug(e) + except HttpError as e: + logger.error("Datacite Connection Error") + logger.debug(e) + return None - if os.path.isdir(output_path): - dir_path = os.path.join(output_path, self.prefix) - - if "/" in suffix: - tokens = suffix.split("/") - dir_path = os.path.join(dir_path, *tokens[:-1]) - file_name = "%s.xml" % tokens[-1] - else: - file_name = "%s.xml" % suffix + def _check_if_online_and_landing_page(self, doi : DoiAbstract) -> None: + """ + Check if the identifier exists in the Datacite website and then, checks if the landing page of the + DOI object (local, from XML file) corresponds to the landing page saved in the online Datacite metadata. - os.makedirs(dir_path, exist_ok=True) - output_path = os.path.join(dir_path, file_name) + :param doi: DOI object parsed from XML. + """ + location = self._get_landing_page(doi.identifier) + logger.debug("Registered landing page: %s" % location) + if location == doi.location(self.base_url): + logger.success("The registered landing page URL is valid") + else: + logger.debug("Generated landing page: %s" % doi.location(self.base_url)) + logger.warning("The registered landing page URL is invalid") - logger.debug("Output path: %s" % output_path) - with open(output_path, 'w') as f: - f.write(xml_metadata) + def _get_landing_page(self, identifier : str) -> str | None: + """ + Check if the identifier exists in the Datacite website and then, checks if the landing page of the + DOI object (local, from XML file) corresponds to the landing page saved in the online Datacite metadata. - logger.success("The metadata of DOI '%s' available on DataCite have been downloaded as '%s'" % (identifier, output_path)) + :param identifier: identifier of the DOI object parsed from XML. + :return: the location of the landing page of the Datacite metadata or None if it could not be fetched. + """ + try: + location = self.client.doi_get(identifier) + logger.success("The DOI is already registered on DataCite") + return location except DataCiteNotFoundError as e: - logger.warning("No metadata found for DOI '%s' on DataCite" % identifier) - return False + logger.warning("The DOI '%s' is not registered yet on DataCite" % identifier) except DataCiteRequestError as e: logger.error("Datacite Error: %s" % e) - return False except DataCiteServerError as e: logger.error("Datacite Server Error") logger.debug(e) - return False except HttpError as e: logger.error("Datacite Connection Error") logger.debug(e) - return False + return None - # All is fine - return True + def _build_output_path(self, output_path : str, suffix: str) -> str: + """ + From the output path and the suffix it creates the full path of the file and creates the needed folders. + + :param output_path: output path given by the user + :param suffix: doi identifier suffix + + :return: customized output path for the XML file + """ + if os.path.isdir(output_path): + dir_path = os.path.join(output_path, self.prefix) if self.prefix else output_path + + if '/' in suffix: + tokens = suffix.split("/") + dir_path = os.path.join(dir_path, *tokens[:-1]) + file_name = "%s.xml" % tokens[-1] + else: + file_name = "%s.xml" % suffix + + os.makedirs(dir_path, exist_ok=True) + output_path = os.path.join(dir_path, file_name) + return output_path + + # endregion Util methods -- GitLab From 1d6d4710d0a5d40edb088f3075e5857eb0a80f9a Mon Sep 17 00:00:00 2001 From: Pablo Garcia Campos <pablo.garcia-campos@univ-grenoble-alpes.fr> Date: Tue, 11 Mar 2025 10:16:13 +0100 Subject: [PATCH 2/2] Fix forgotten import --- resif_datacite_cli/service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/resif_datacite_cli/service.py b/resif_datacite_cli/service.py index 025ef90..998501f 100644 --- a/resif_datacite_cli/service.py +++ b/resif_datacite_cli/service.py @@ -5,6 +5,7 @@ import logging from datacite import DataCiteMDSClient from datacite.errors import DataCiteNotFoundError, DataCiteRequestError, DataCiteServerError, HttpError +from .models import DoiAbstract from .models.factory import DoiFactory from .models.errors import DoiErrorIdentifierInvalid from .readers.factory import ReaderFactory -- GitLab