From a4214f649545fdd1cd7f34cf58a9c63400b45f08 Mon Sep 17 00:00:00 2001
From: Pablo Garcia Campos <pablo.garcia-campos@univ-grenoble-alpes.fr>
Date: Thu, 13 Feb 2025 11:50:20 +0100
Subject: [PATCH 1/2] Refactor service file

---
 resif_datacite_cli/service.py | 292 ++++++++++++++++++----------------
 1 file changed, 151 insertions(+), 141 deletions(-)

diff --git a/resif_datacite_cli/service.py b/resif_datacite_cli/service.py
index b22197d..025ef90 100644
--- a/resif_datacite_cli/service.py
+++ b/resif_datacite_cli/service.py
@@ -68,7 +68,7 @@ class ServiceDoi(object):
                 success_step = self.validate(os.path.join(path, path_item), stop_on_error, offline)
                 success_all = success_all and success_step
                 if stop_on_error and not success_all:
-                    return success_all
+                    break
             return success_all
 
         # FILE
@@ -76,75 +76,17 @@ class ServiceDoi(object):
             logger.info("Checking '%s'..." % path)
 
             # Offline validation steps
-            ###########################
-
-            # Load the file and check the structure
-            try:
-                doi = DoiFactory.factory(self._reader, path)
-                logger.success("The XML structure is valid")
-            except ReaderErrorDocumentInvalid as e:
-                logger.error("The XML structure is invalid")
-                logger.error(e)
-                logger.warning("The file '%s' should NOT be uploaded!" % path)
-                return False
-            except ReaderFactoryError as e:
-                logger.error("Unable to read file")
-                logger.debug(e)
-                return False
-
-            # Check the identifier
-            try:
-                doi.validate()
-                logger.success('The DOI is valid')
-            except DoiErrorIdentifierInvalid as e:
-                logger.error(e)
+            _valid, doi = self._get_validated_doi_obj_from_file(path)
+            if not _valid:
                 logger.warning("The file '%s' should NOT be uploaded!" % path)
                 return False
 
             logger.success("The file '%s' is ready to upload!" % path)
 
             # Online validation steps
-            ###########################
             if not offline:
-
-                # Check if identifier is already registered
-                try:
-                    location = self.client.doi_get(doi.identifier)
-                    logger.success("The DOI is already registered on DataCite")
-
-                    # Check the landing page
-                    logger.debug('Registered landing page: %s' % location)
-                    if location == doi.location(self.base_url):
-                        logger.success("The registered landing page URL is valid")
-                    else:
-                        logger.debug("Generated landing page: %s" % doi.location(self.base_url))
-                        logger.warning("The registered landing page URL is invalid")
-                except DataCiteNotFoundError as e:
-                    logger.warning("The DOI '%s' is not registered yet on DataCite" % doi.identifier)
-                except DataCiteRequestError as e:
-                    logger.error("Datacite Error: %s" % e)
-                except DataCiteServerError as e:
-                    logger.error("Datacite Server Error")
-                    logger.debug(e)
-                except HttpError as e:
-                    logger.error("Datacite Connection Error")
-                    logger.debug(e)
-
-                # Check the metadata
-                try:
-                    xml_metadata = self.client.metadata_get(doi.identifier)
-                    logger.success("The DOI metadata are available on DataCite")
-                    logger.debug('Metadata: %s' % xml_metadata)
-                except DataCiteNotFoundError as e:
-                    logger.warning("No metadata found for DOI '%s' on DataCite" % doi.identifier)
-                except DataCiteRequestError as e:
-                    logger.error("Datacite Error: %s" % e)
-                except DataCiteServerError as e:
-                    logger.error("Datacite Server Error")
-                    logger.debug(e)
-                except HttpError as e:
-                    logger.error("Datacite Connection Error")
-                    logger.debug(e)
+                self._check_if_online_and_landing_page(doi)
+                self._get_xml_metadata(doi.identifier)
 
             # All is fine
             return True
@@ -174,7 +116,7 @@ class ServiceDoi(object):
                 success_step = self.upload(os.path.join(path, path_item), stop_on_error)
                 success_all = success_all and success_step
                 if stop_on_error and not success_all:
-                    return success_all
+                    break
             return success_all
 
         # FILE
@@ -182,26 +124,8 @@ class ServiceDoi(object):
             logger.info("Checking '%s'..." % path)
 
             # Load the file and check the structure
-            try:
-                doi = DoiFactory.factory(self._reader, path)
-                logger.success("The XML structure is valid")
-            except ReaderErrorDocumentInvalid as e:
-                logger.error("The XML structure is invalid")
-                logger.error(e)
-                logger.warning("The file '%s' has NOT been uploaded on DataCite" % path)
-                return False
-            except ReaderFactoryError as e:
-                logger.error("Unable to read the file '%s'" % path)
-                logger.debug(e)
-                logger.warning("The file '%s' has NOT been uploaded on DataCite" % path)
-                return False
-
-            # Check the identifier
-            try:
-                doi.validate()
-                logger.success("The DOI is valid")
-            except DoiErrorIdentifierInvalid as e:
-                logger.error(e)
+            _valid, doi = self._get_validated_doi_obj_from_file(path=path)
+            if not _valid:
                 logger.warning("The file '%s' has NOT been uploaded on DataCite" % path)
                 return False
 
@@ -209,29 +133,8 @@ class ServiceDoi(object):
             logger.info("Uploading '%s'..." % path)
 
             # Submit to DataCite
-            try:
-                # Publish metadata for the DOI
-                identifier = doi.identifier
-                logger.debug("Pushing the XML metadata file for '%s' on DataCite" % identifier)
-                self.client.metadata_post(doi.content)
-                logger.success("The file '%s' has been pushed to DataCite" % path)
-
-                # Register/Update the DOI
-                location = doi.location(self.base_url)
-                logger.debug("Registering the DOI '%s' on DataCite with landing page URL: %s" % (identifier, location))
-                self.client.doi_post(identifier, location)
-                logger.success("The DOI '%s' has been registered on DataCite and linked to '%s'" % (identifier, location))
-
-            except DataCiteRequestError as e:
-                logger.error('Datacite Error: %s' % e)
-                return False
-            except DataCiteServerError as e:
-                logger.error('Datacite Server Error')
-                logger.debug(e)
-                return False
-            except HttpError as e:
-                logger.error('Datacite Connection Error')
-                logger.debug(e)
+            valid = self._publish_metadata(doi=doi)
+            if not valid:
                 return False
 
             # All is fine
@@ -260,62 +163,169 @@ class ServiceDoi(object):
         logger.info("Downloading DOI '%s' from DataCite..." % identifier)
 
         # Check if identifier is already registered
-        try:
-            location = self.client.doi_get(identifier)
-            logger.success("The landing page of DOI '%s' registered on DataCite is '%s'" % (identifier, location))
-        except DataCiteNotFoundError as e:
-            logger.warning("The DOI '%s' is not registered on DataCite" % identifier)
+        location = self._get_landing_page(identifier=identifier)
+        if location is None:
             return False
-        except DataCiteRequestError as e:
-            logger.error('Datacite Error: %s' % e)
+
+        xml_metadata = self._get_xml_metadata(identifier)
+        if xml_metadata is None:
             return False
+
+        output_path = self._build_output_path(output_path, suffix)
+
+        logger.debug("Output path: %s" % output_path)
+        with open(output_path, "w") as f:
+            f.write(xml_metadata)
+
+        logger.success("The metadata of DOI '%s' available on DataCite have been downloaded as '%s'" % (identifier, output_path))
+
+
+        # All is fine
+        return True
+
+    # region Util methods
+
+    def _publish_metadata(self, doi : DoiAbstract) -> bool:
+        """
+        Publish the metadata of the doi object to Datacite website.
+
+        :param doi: DOI metadata object to save
+        :return: whether the publication was successful
+        """
+        try:
+            # Publish metadata for the DOI
+            identifier = doi.identifier
+            logger.debug("Pushing the XML metadata file for '%s' on DataCite" % identifier)
+            self.client.metadata_post(doi.content)
+            logger.success("The metadata has been pushed to DataCite")
+
+            # Register/Update the DOI
+            location = doi.location(self.base_url)
+            logger.debug("Registering the DOI '%s' on DataCite with landing page URL: %s" % (identifier, location))
+            self.client.doi_post(identifier, location)
+            logger.success("The DOI '%s' has been registered on DataCite and linked to '%s'" % (identifier, location))
+            return True
+        except DataCiteRequestError as e:
+            logger.error("Datacite Error: %s" % e)
         except DataCiteServerError as e:
-            logger.error('Datacite Server Error')
+            logger.error("Datacite Server Error")
             logger.debug(e)
-            return False
         except HttpError as e:
-            logger.error('Datacite Connection Error')
+            logger.error("Datacite Connection Error")
             logger.debug(e)
-            return False
+        return False
+
+    def _get_validated_doi_obj_from_file(self, path : str) -> (bool, DoiAbstract | None):
+        """
+        Tries to parse the xml file into a DOI object, then validates it and returns a couple of (valid, DOI).
 
-        # Check the metadata
+        :param path: The path of XML file to read and parse
+
+        :return: a couple (valid, DOI object). If not valid, DOI object is None.
+        """
+        try:
+            doi = DoiFactory.factory(self._reader, path)
+            logger.success("The XML structure is valid")
+            doi.validate()
+            logger.success("The DOI is valid")
+            return True, doi
+        except DoiErrorIdentifierInvalid as e:
+            logger.error(e)
+        except ReaderErrorDocumentInvalid as e:
+            logger.error("The XML structure is invalid")
+            logger.error(e)
+            return False, None
+        except ReaderFactoryError as e:
+            logger.error("Unable to read file")
+            logger.debug(e)
+            return False, None
+
+    def _get_xml_metadata(self, identifier : str ) -> str | None :
+        """
+        It fetches the xml metadata from the Datacite website corresponding to the given identifier and return it.
+        If it was not possible, None is returned instead.
+
+        :param identifier: identifier of the resource.
+        :return: xml metadata or None it the resource metadata could not be fetched.
+        """
         try:
             xml_metadata = self.client.metadata_get(identifier)
+            logger.success("The DOI metadata are available on DataCite")
             logger.debug("Metadata: %s" % xml_metadata)
+            return xml_metadata
+        except DataCiteNotFoundError as e:
+            logger.warning("No metadata found for DOI '%s' on DataCite" % identifier)
+        except DataCiteRequestError as e:
+            logger.error("Datacite Error: %s" % e)
+        except DataCiteServerError as e:
+            logger.error("Datacite Server Error")
+            logger.debug(e)
+        except HttpError as e:
+            logger.error("Datacite Connection Error")
+            logger.debug(e)
+        return None
 
-            if os.path.isdir(output_path):
-                dir_path = os.path.join(output_path, self.prefix)
-
-                if "/" in suffix:
-                    tokens = suffix.split("/")
-                    dir_path = os.path.join(dir_path, *tokens[:-1])
-                    file_name = "%s.xml" % tokens[-1]
-                else:
-                    file_name = "%s.xml" % suffix
+    def _check_if_online_and_landing_page(self, doi : DoiAbstract) -> None:
+        """
+        Check if the identifier exists in the Datacite website and then, checks if the landing page of the
+        DOI object (local, from XML file) corresponds to the landing page saved in the online Datacite metadata.
 
-                os.makedirs(dir_path, exist_ok=True)
-                output_path = os.path.join(dir_path, file_name)
+        :param doi: DOI object parsed from XML.
+        """
+        location = self._get_landing_page(doi.identifier)
+        logger.debug("Registered landing page: %s" % location)
+        if location == doi.location(self.base_url):
+            logger.success("The registered landing page URL is valid")
+        else:
+            logger.debug("Generated landing page: %s" % doi.location(self.base_url))
+            logger.warning("The registered landing page URL is invalid")
 
-            logger.debug("Output path: %s" % output_path)
-            with open(output_path, 'w') as f:
-                f.write(xml_metadata)
+    def _get_landing_page(self, identifier : str) -> str | None:
+        """
+        Check if the identifier exists in the Datacite website and then, checks if the landing page of the
+        DOI object (local, from XML file) corresponds to the landing page saved in the online Datacite metadata.
 
-            logger.success("The metadata of DOI '%s' available on DataCite have been downloaded as '%s'" % (identifier, output_path))
+        :param identifier: identifier of the DOI object parsed from XML.
 
+        :return: the location of the landing page of the Datacite metadata or None if it could not be fetched.
+        """
+        try:
+            location = self.client.doi_get(identifier)
+            logger.success("The DOI is already registered on DataCite")
+            return location
         except DataCiteNotFoundError as e:
-            logger.warning("No metadata found for DOI '%s' on DataCite" % identifier)
-            return False
+            logger.warning("The DOI '%s' is not registered yet on DataCite" % identifier)
         except DataCiteRequestError as e:
             logger.error("Datacite Error: %s" % e)
-            return False
         except DataCiteServerError as e:
             logger.error("Datacite Server Error")
             logger.debug(e)
-            return False
         except HttpError as e:
             logger.error("Datacite Connection Error")
             logger.debug(e)
-            return False
+        return None
 
-        # All is fine
-        return True
+    def _build_output_path(self, output_path : str, suffix: str) -> str:
+        """
+        From the output path and the suffix it creates the full path of the file and creates the needed folders.
+
+        :param output_path: output path given by the user
+        :param suffix: doi identifier suffix
+
+        :return: customized output path for the XML file
+        """
+        if os.path.isdir(output_path):
+            dir_path = os.path.join(output_path, self.prefix) if self.prefix else output_path
+
+            if '/' in suffix:
+                tokens = suffix.split("/")
+                dir_path = os.path.join(dir_path, *tokens[:-1])
+                file_name = "%s.xml" % tokens[-1]
+            else:
+                file_name = "%s.xml" % suffix
+
+            os.makedirs(dir_path, exist_ok=True)
+            output_path = os.path.join(dir_path, file_name)
+        return output_path
+
+    # endregion Util methods
-- 
GitLab


From 1d6d4710d0a5d40edb088f3075e5857eb0a80f9a Mon Sep 17 00:00:00 2001
From: Pablo Garcia Campos <pablo.garcia-campos@univ-grenoble-alpes.fr>
Date: Tue, 11 Mar 2025 10:16:13 +0100
Subject: [PATCH 2/2] Fix forgotten import

---
 resif_datacite_cli/service.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/resif_datacite_cli/service.py b/resif_datacite_cli/service.py
index 025ef90..998501f 100644
--- a/resif_datacite_cli/service.py
+++ b/resif_datacite_cli/service.py
@@ -5,6 +5,7 @@ import logging
 from datacite import DataCiteMDSClient
 from datacite.errors import DataCiteNotFoundError, DataCiteRequestError, DataCiteServerError, HttpError
 
+from .models import DoiAbstract
 from .models.factory import DoiFactory
 from .models.errors import DoiErrorIdentifierInvalid
 from .readers.factory import ReaderFactory
-- 
GitLab