diff --git a/src/ptf/tex.py b/src/ptf/tex.py index 6a000ea913681429c0d405c51f718b8d4c9c1c30..156bad19a5298ec241b81ca579de42ca0be97cb3 100644 --- a/src/ptf/tex.py +++ b/src/ptf/tex.py @@ -490,7 +490,7 @@ def compile_tex(lines, article, update=False): to_path = os.path.join( settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf" ) - if settings.MERSENNE_CREATE_FRONTPAGE: + if settings.get("MERSENNE_CREATE_FRONTPAGE", True): utils.linearize_pdf(cedram_pdf_location, to_path) return to_path diff --git a/src/ptf/utils.py b/src/ptf/utils.py index 88129fc15674f2313f3ec300866b46fe6fdb1dc4..f5357d5dee759647617e182c720ede7ddc194750 100644 --- a/src/ptf/utils.py +++ b/src/ptf/utils.py @@ -65,11 +65,7 @@ def execute_cmd(cmd, force_execute=False): @param cmd: str which represents shell command @return: output of the command """ - if settings.MERSENNE_CREATE_FRONTPAGE or force_execute: - result = subprocess.check_output(cmd, shell=True) - return result - - else: + if not settings.get("MERSENNE_CREATE_FRONTPAGE", True) and not force_execute: # pour debug returnStatus = 0 output = cmd @@ -77,6 +73,9 @@ def execute_cmd(cmd, force_execute=False): file_.write(f"cmd : {cmd}\n") return returnStatus, output + result = subprocess.check_output(cmd, shell=True) + return result + def get_file_content_in_utf8(filename): """ @@ -91,35 +90,35 @@ def get_file_content_in_utf8(filename): def pdf_to_text(pdf_filename, force_execute=False): # Extract full text from the PDF - if settings.MERSENNE_CREATE_FRONTPAGE or force_execute: - txt_filename = os.path.join(settings.MERSENNE_TMP_FOLDER, "fulltext.txt") + if not settings.get("MERSENNE_CREATE_FRONTPAGE", True) and not force_execute: + return "" - try: - os.makedirs(settings.MERSENNE_TMP_FOLDER, exist_ok=True) + txt_filename = os.path.join(settings.MERSENNE_TMP_FOLDER, "fulltext.txt") + + try: + os.makedirs(settings.MERSENNE_TMP_FOLDER, exist_ok=True) - cmd_str = "pdftotext -raw -nopgbrk -enc UTF-8 " + pdf_filename + " " + txt_filename - execute_cmd(cmd_str, force_execute=force_execute) + cmd_str = "pdftotext -raw -nopgbrk -enc UTF-8 " + pdf_filename + " " + txt_filename + execute_cmd(cmd_str, force_execute=force_execute) - # Check if the output file has been created - if not os.path.isfile(txt_filename) or os.path.getsize(txt_filename) == 0: - raise RuntimeError( - "The PDF file was not converted by pdftotext (output file is empty)." - ) + # Check if the output file has been created + if not os.path.isfile(txt_filename) or os.path.getsize(txt_filename) == 0: + raise RuntimeError( + "The PDF file was not converted by pdftotext (output file is empty)." + ) - body = get_file_content_in_utf8(txt_filename) - # strip control characters - body = "".join(ch for ch in body if unicodedata.category(ch)[0] != "C") + body = get_file_content_in_utf8(txt_filename) + # strip control characters + body = "".join(ch for ch in body if unicodedata.category(ch)[0] != "C") - return body - except Exception as e: - raise RuntimeError(f"An error occurred while processing the PDF: {str(e)}") + return body + except Exception as e: + raise RuntimeError(f"An error occurred while processing the PDF: {str(e)}") - finally: - # Ensure the txt file is deleted - if os.path.isfile(txt_filename): - os.remove(txt_filename) - else: - return "" + finally: + # Ensure the txt file is deleted + if os.path.isfile(txt_filename): + os.remove(txt_filename) def linearize_pdf(from_path, to_path): diff --git a/src/upload/utils.py b/src/upload/utils.py index c32112fbfa5904304f66801de93a2f03be731ee4..df34a9c53282fcf395209e4a1d1d845f7c7fe612 100644 --- a/src/upload/utils.py +++ b/src/upload/utils.py @@ -28,12 +28,9 @@ def compute_article_for_pcj( lines = tex.create_tex_for_pcj(article) pdf_filename = tex.compile_tex(lines, article) # Extract full text from the PDF - if settings.MERSENNE_CREATE_FRONTPAGE or force_execute: + if not settings.get("MERSENNE_CREATE_FRONTPAGE", True) and not force_execute: body = utils.pdf_to_text(pdf_filename) return article, body - # for debug - else: - body = utils.pdf_to_text(pdf_filename) - - return article, body + body = utils.pdf_to_text(pdf_filename) + return article, body