diff --git a/src/ptf/cmds/xml/ckeditor/ckeditor_parser.py b/src/ptf/cmds/xml/ckeditor/ckeditor_parser.py index 3e783a41ea0e96256e5f0c1472b98aa0dbfc9fe9..5f5d0764b4c6bbcf71cf856f7ea089bc85751ada 100644 --- a/src/ptf/cmds/xml/ckeditor/ckeditor_parser.py +++ b/src/ptf/cmds/xml/ckeditor/ckeditor_parser.py @@ -32,6 +32,7 @@ from ptf.cmds.xml.xml_utils import escape, get_xml_from_node, normalize, replace from ptf.display import resolver from ptf.utils import convert_index_to_list_type + # from ptf.utils import create_interlink_for_citation @@ -51,7 +52,7 @@ class CkeditorParser: resolve_entities=True, ) html_value = kwargs["html_value"].replace("\n\n", "") - body = f"<body>{replace_html_entities(html_value)}</body>" + body = f"<fake_body>{replace_html_entities(html_value)}</fake_body>" tree = etree.fromstring(body.encode("utf-8"), parser=parser) else: tree = kwargs["tree"] @@ -130,10 +131,18 @@ class CkeditorParser: xml_text += inner_jats_xml_text xml_text += "</list>" - # # JATS requires <list> to be inside <p> - # parent = node.getparent() - # if parent is None or parent.tag != "p": - # xml_text = f"<p>{xml_text}</p>" + # JATS requires <list> to be inside some specific elements + parent = node.getparent() + if parent is not None and parent.tag not in ["answer", "app", "app-group", "bio", "body", "boxed-text", + "chem-struct", "disp-quote", + "explanation", "fig", "glossary", "legend", "license-p", + "list-item", "named-content", + "notes", "option", "p", "question", "question-preamble", + "ref-list", "sec", "styled-content", + "supplementary-material", + "table-wrap", "td", "th"]: + xml_text = f"<p>{xml_text}</p>" + html_text = f"<{node.tag}{ol_attributes}>{inner_html_text}</{node.tag}>" tex_text = f"<{node.tag}{ol_attributes}>{inner_tex_text}</{node.tag}>" diff --git a/src/ptf/cmds/xml/jats/jats_parser.py b/src/ptf/cmds/xml/jats/jats_parser.py index b24b0f7212b0175d8bd45a5bbe172dd06b17bcde..e71a1b54a76db714e9e06a778a4f46f7734a4386 100644 --- a/src/ptf/cmds/xml/jats/jats_parser.py +++ b/src/ptf/cmds/xml/jats/jats_parser.py @@ -657,6 +657,19 @@ class JatsBase(XmlParserBase): def parse_node_with_p(self, node, **kwargs): tex, html = self.parse_inner_node(node, **kwargs) + insert_p = True + # JATS forces <list> to be inside <p> (or other elements) + # In HTML, you would get an extra unwanted <p> to display a list + # Ignore the <p> if it only embeds a <list> + if node.text is None: + children_count = 0 + for child in node: + if child.tag == "list": + insert_p = False + children_count += 1 + if not insert_p and children_count == 1: + return tex, html + if not self.for_tex_file: tex = f"<p>{tex}</p>" diff --git a/src/ptf/tests/test_ckeditor.py b/src/ptf/tests/test_ckeditor.py index 0a1648ea3e40ab97247515362fd5d20fd6978b0e..5484b1cf458c1ded5f120e7194f230d0f62d97ab 100644 --- a/src/ptf/tests/test_ckeditor.py +++ b/src/ptf/tests/test_ckeditor.py @@ -7,7 +7,7 @@ def test_ckeditor_parser(): result = parser.value_xml assert ( result - == 'Te<st\xa0<inline-formula><alternatives><tex-math>$x = {-b \\pm \\sqrt{b^2-4ac} \\over 2a}$</tex-math></alternatives></inline-formula> done<list list-type="simple"><list-item><p>Item</p></list-item></list><list list-type="number"><list-item><p>Item 1<break/>New line</p></list-item><list-item><p>\xa0</p></list-item></list>' + == 'Te<st\xa0<inline-formula><alternatives><tex-math>$x = {-b \\pm \\sqrt{b^2-4ac} \\over 2a}$</tex-math></alternatives></inline-formula> done<p><list list-type="simple"><list-item><p>Item</p></list-item></list></p><p><list list-type="number"><list-item><p>Item 1<break/>New line</p></list-item><list-item><p>\xa0</p></list-item></list></p>' ) html_value = r"<strong>Item</strong><em>text</em><sub>i</sub><sup>2</sup><a>title</a>"