Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions src/file_manipulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,19 @@ def __init__(self):

def create_template(self, pdf_path: str):
"""
By using commonforms, we create an editable .pdf template and we store it.
By using commonforms, we create an editable .pdf template securely in the OS temp directory.
"""
template_path = pdf_path[:-4] + "_template.pdf"
prepare_form(pdf_path, template_path)
return template_path
import tempfile
from pathlib import Path

safe_original_name = Path(pdf_path).stem # Extracts just "file" from "../../../file.pdf"

# mkstemp creates a highly secure, collision-proof temporary file
fd, temp_path = tempfile.mkstemp(suffix="_template.pdf", prefix=f"{safe_original_name}_")
os.close(fd) # Close the file descriptor so commonforms can write to it

prepare_form(pdf_path, temp_path)
return temp_path

def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
"""
Expand Down
21 changes: 13 additions & 8 deletions src/filler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ def __init__(self):

def fill_form(self, pdf_form: str, llm: LLM):
"""
Fill a PDF form with values from user_input using LLM.
Fields are filled in the visual order (top-to-bottom, left-to-right).
Fill a PDF form with values from user_input using LLM safely.
"""
output_pdf = (
pdf_form[:-4]
+ "_"
+ datetime.now().strftime("%Y%m%d_%H%M%S")
+ "_filled.pdf"
)
from pathlib import Path

# 1. Sanitize the path to block traversal attacks
safe_name = Path(pdf_form).stem

# 2. Ensure a secure output directory exists
output_dir = Path("outputs")
output_dir.mkdir(parents=True, exist_ok=True)

# 3. Safely construct the final path
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_pdf = str(output_dir / f"{safe_name}_{timestamp}_filled.pdf")

# Generate dictionary of answers from your original function
t2j = llm.main_loop()
Expand Down
2 changes: 1 addition & 1 deletion src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def build_prompt(self, current_field):

def main_loop(self):
# self.type_check_all()
for field in self._target_fields.keys():
for field in self._target_fields:
prompt = self.build_prompt(field)
# print(prompt)
# ollama_url = "http://localhost:11434/api/generate"
Expand Down
5 changes: 3 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from commonforms import prepare_form
from pypdf import PdfReader
from controller import Controller
from typing import Union

def input_fields(num_fields: int):
fields = []
Expand Down Expand Up @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
if __name__ == "__main__":
file = "./src/inputs/file.pdf"
user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
prepared_pdf = "temp_outfile.pdf"
prepare_form(file, prepared_pdf)

Expand All @@ -80,4 +81,4 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
num_fields = 0

controller = Controller()
controller.fill_form(user_input, fields, file)
controller.fill_form(user_input, descriptive_fields, file)