Skip to content

Commit 30544b8

Browse files
authored
Fix unicode encoding issue on Windows (#11)
* Fix unicode encoding issue on Windows Add UTF-8 encoding to text diff output and enforce file encoding checks
1 parent 48c978b commit 30544b8

File tree

6 files changed

+8
-4
lines changed

6 files changed

+8
-4
lines changed

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,14 @@ addopts = [
6363
]
6464

6565
[tool.ruff.lint]
66+
preview = true
6667
extend-select = [
6768
"I", # Sort imports
6869
"ANN", # Enforce type annotations
6970
"PT", # Common style issues or inconsistencies with pytest-based tests
70-
]
71+
"PTH", # Use PathLib
72+
"PLW1514", # Must specify file encoding
73+
]
7174

7275
[tool.ruff.lint.isort]
7376
combine-as-imports = true # Combines "as" imports on the same line

src/diffpdf/hash_check.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
def compute_file_hash(filepath: Path) -> str:
66
sha256 = hashlib.sha256()
7-
with open(filepath, "rb") as f:
7+
with Path(filepath).open("rb") as f:
88
for chunk in iter(lambda: f.read(8192), b""):
99
sha256.update(chunk)
1010
return sha256.hexdigest()

src/diffpdf/text_check.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def check_text_content(ref: Path, actual: Path, output_dir: Path | None) -> bool
4646
if output_dir is not None:
4747
output_dir.mkdir(parents=True, exist_ok=True)
4848
diff_file = output_dir / f"{ref.stem}_vs_{actual.stem}_text_diff.txt"
49-
diff_file.write_text(diff_text)
49+
diff_file.write_text(diff_text, encoding="utf-8")
5050

5151
logger.error(f"Text content mismatch:\n {diff_text}")
5252
return False

tests/assets/fail/unicode-A.pdf

22.2 KB
Binary file not shown.

tests/assets/fail/unicode-B.pdf

22.2 KB
Binary file not shown.

tests/test_api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
("fail/1-letter-diff-A.pdf", "fail/1-letter-diff-B.pdf", False),
2020
("fail/major-color-diff-A.pdf", "fail/major-color-diff-B.pdf", False),
2121
("fail/page-count-diff-A.pdf", "fail/page-count-diff-B.pdf", False),
22+
("fail/unicode-A.pdf", "fail/unicode-B.pdf", False),
2223
],
2324
)
2425
def test_api(ref_pdf_rel, actual_pdf_rel, should_pass):
@@ -39,4 +40,4 @@ def test_text_diff_output(tmp_path):
3940
assert result is False
4041
diff_file = tmp_path / "1-letter-diff-A_vs_1-letter-diff-B_text_diff.txt"
4142
assert diff_file.exists()
42-
assert diff_file.read_text()
43+
assert diff_file.read_text(encoding="utf-8")

0 commit comments

Comments
 (0)