Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions src/filler.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,41 @@
from pdfrw import PdfReader, PdfWriter
from src.llm import LLM
from datetime import datetime
from pathlib import Path
from uuid import uuid4


class Filler:
def __init__(self):
pass

@staticmethod
def _make_output_path(pdf_form: str) -> str:
"""
Generate a unique output path for a filled PDF.

Previously this used datetime.now().strftime("%Y%m%d_%H%M%S"), which has
only 1-second resolution. Two concurrent requests for the same template
within the same second would produce identical paths, causing the second
write to silently overwrite the first and corrupting the first submission's
stored record.

uuid4().hex provides 128 bits of randomness — collision probability is
effectively zero (2^-128) regardless of concurrency or timing.

Output is written to an `outputs/` subdirectory next to the template so
that generated files are cleanly separated from source templates.
"""
template_path = Path(pdf_form)
output_dir = template_path.parent / "outputs"
output_dir.mkdir(parents=True, exist_ok=True)
return str(output_dir / f"{template_path.stem}_{uuid4().hex}_filled.pdf")

def fill_form(self, pdf_form: str, llm: LLM):
"""
Fill a PDF form with values from user_input using LLM.
Fields are filled in the visual order (top-to-bottom, left-to-right).
"""
output_pdf = (
pdf_form[:-4]
+ "_"
+ datetime.now().strftime("%Y%m%d_%H%M%S")
+ "_filled.pdf"
)
output_pdf = self._make_output_path(pdf_form)

# Generate dictionary of answers from your original function
t2j = llm.main_loop()
Expand Down
50 changes: 50 additions & 0 deletions tests/test_filler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
Tests for src/filler.py — specifically the output path generation logic.
"""
import os
from src.filler import Filler


def test_make_output_path_is_unique():
"""
Every call to _make_output_path() must return a distinct path.

Previously, paths were generated with a 1-second timestamp, meaning two
concurrent requests within the same second would produce the same path and
the second write would silently overwrite the first (race condition).

With uuid4().hex the collision probability is 2^-128 — effectively zero.
"""
paths = {Filler._make_output_path("src/inputs/template.pdf") for _ in range(500)}
assert len(paths) == 500, (
"Expected 500 unique output paths but got collisions. "
"The race condition fix may have been reverted."
)


def test_make_output_path_lands_in_outputs_subdir():
"""
Output PDFs must be written into an 'outputs/' subdirectory,
not alongside the source template.
"""
path = Filler._make_output_path("src/inputs/template.pdf")
parts = path.replace("\\", "/").split("/")
assert "outputs" in parts, (
f"Output path '{path}' does not contain an 'outputs/' directory. "
"Generated files must be separated from source templates."
)


def test_make_output_path_extension_is_pdf():
"""Output path must end with .pdf."""
path = Filler._make_output_path("src/inputs/template.pdf")
assert path.endswith(".pdf"), f"Expected .pdf extension, got: {path}"


def test_make_output_path_preserves_stem():
"""Output filename must include the original template stem for traceability."""
path = Filler._make_output_path("src/inputs/my_form.pdf")
filename = os.path.basename(path)
assert filename.startswith("my_form_"), (
f"Expected filename to start with 'my_form_', got: {filename}"
)