Skip to content

Commit 8a7a874

Browse files
authored
fix: adds XML declarations to serialized read along documents (#277)
* fix: adds XML declarations to serialized read along documents * test: added test to validate xml declaration is returned by the API
1 parent f074bf0 commit 8a7a874

File tree

5 files changed

+16
-13
lines changed

5 files changed

+16
-13
lines changed

readalongs/api.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class like pathlib.Path. Warning: don't just use "/some/path/config.json"
6767
DEFAULT_TITLE,
6868
create_web_component_html,
6969
)
70-
from readalongs.text.util import parse_xml
70+
from readalongs.text.util import parse_xml, xml_to_string
7171
from readalongs.util import JoinerCallbackForClick, get_langs_deferred
7272

7373

@@ -263,12 +263,7 @@ def convert_prealigned_text_to_readalong(
263263
sentence_xml.text += token.text
264264

265265
xml = add_ids(xml)
266-
xml_text = etree.tostring(
267-
xml,
268-
encoding="utf-8",
269-
xml_declaration=True,
270-
).decode("utf8")
271-
266+
xml_text = xml_to_string(xml)
272267
return xml_text + "\n"
273268

274269

readalongs/text/make_package.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def encode_from_path(path: Union[str, os.PathLike]) -> str:
126126
)
127127
continue
128128
img.attrib["url"] = f"data:{mime[0]};base64,{img_b64}"
129-
path_bytes = etree.tostring(root)
129+
path_bytes = etree.tostring(root, encoding="utf-8", xml_declaration=True)
130130
b64 = str(b64encode(path_bytes), encoding="utf8")
131131
mime = guess_type(path)
132132
if str(path).endswith(

readalongs/text/util.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ def save_xml(output_path, xml):
159159
write_xml(fout, xml)
160160

161161

162+
def xml_to_string(xml) -> str:
163+
return etree.tostring(xml, encoding="utf-8", xml_declaration=True).decode()
164+
165+
162166
def save_xml_zip(zip_path, output_path, xml):
163167
ensure_dirs(zip_path)
164168
with zipfile.ZipFile(zip_path, "a", compression=zipfile.ZIP_DEFLATED) as fout_zip:

readalongs/web_api.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
from readalongs.text.convert_xml import TimeLimitException, convert_xml
5353
from readalongs.text.make_dict import make_dict_list
5454
from readalongs.text.tokenize_xml import tokenize_xml
55-
from readalongs.text.util import parse_xml
55+
from readalongs.text.util import parse_xml, xml_to_string
5656
from readalongs.util import get_langs
5757

5858
# Heroku drops requests that take more than 30s total to respond, so give g2p a 25s budget
@@ -287,15 +287,15 @@ async def assemble(
287287
response = AssembleResponse(
288288
lexicon=dict_data,
289289
text_ids=text_input,
290-
processed_ras=etree.tostring(g2ped, encoding="utf8").decode(),
290+
processed_ras=xml_to_string(g2ped),
291291
log=captured_logs.getvalue(),
292292
)
293293

294294
if request.debug:
295295
response.input_request = request
296-
response.parsed = etree.tostring(parsed, encoding="utf8")
297-
response.tokenized = etree.tostring(tokenized, encoding="utf8")
298-
response.g2ped = etree.tostring(g2ped, encoding="utf8")
296+
response.parsed = xml_to_string(parsed)
297+
response.tokenized = xml_to_string(tokenized)
298+
response.g2ped = xml_to_string(g2ped)
299299
return response
300300

301301

test/test_web_api.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python
22

3+
import json
34
import os
45
import re
56
from contextlib import redirect_stderr
@@ -51,7 +52,10 @@ def test_assemble_from_plain_text(self):
5152
}
5253
with redirect_stderr(StringIO()):
5354
response = self.API_CLIENT.post("/api/v1/assemble", json=request)
55+
56+
resp_dict = json.loads(response.content.decode("utf-8"))
5457
self.assertEqual(response.status_code, 200)
58+
self.assertTrue(resp_dict["processed_ras"].find("<?xml") >= 0)
5559

5660
def test_bad_path(self):
5761
# Test a request to a path that doesn't exist

0 commit comments

Comments
 (0)