diff --git a/src/filler.py b/src/filler.py index e31e535..3502566 100644 --- a/src/filler.py +++ b/src/filler.py @@ -1,7 +1,7 @@ from pdfrw import PdfReader, PdfWriter from src.llm import LLM from datetime import datetime - +import re class Filler: def __init__(self): @@ -12,19 +12,36 @@ def fill_form(self, pdf_form: str, llm: LLM): Fill a PDF form with values from user_input using LLM. Fields are filled in the visual order (top-to-bottom, left-to-right). """ - output_pdf = ( - pdf_form[:-4] - + "_" - + datetime.now().strftime("%Y%m%d_%H%M%S") - + "_filled.pdf" - ) - # Generate dictionary of answers from your original function t2j = llm.main_loop() textbox_answers = t2j.get_data() # This is a dictionary - + + target_keys = ["Employee's name", "Name", "Full Name", "Patient Name"] + extracted_name = None + + for key in target_keys: + if key in textbox_answers and textbox_answers[key]: + val = textbox_answers[key] + # If the LLM returned a list (plural values), grab the first item + extracted_name = str(val[0]) if isinstance(val, list) else str(val) + break + + # 3. Create the output filename + if extracted_name and extracted_name != "-1": + # Clean illegal characters and spaces + safe_name = re.sub(r'[^a-zA-Z0-9_\-]', '_', extracted_name.replace(' ', '_')) + safe_name = re.sub(r'_+', '_', safe_name).strip('_') + output_pdf = f"{pdf_form[:-4]}_{safe_name}.pdf" + else: + # Fallback to the original timestamp method if no name is found + output_pdf = ( + pdf_form[:-4] + + "_" + + datetime.now().strftime("%Y%m%d_%H%M%S") + + "_filled.pdf" + ) + answers_list = list(textbox_answers.values()) - # Read PDF pdf = PdfReader(pdf_form) diff --git a/src/llm.py b/src/llm.py index 70937f9..3ed6761 100644 --- a/src/llm.py +++ b/src/llm.py @@ -46,7 +46,7 @@ def build_prompt(self, current_field): def main_loop(self): # self.type_check_all() - for field in self._target_fields.keys(): + for field in self._target_fields: prompt = self.build_prompt(field) # print(prompt) # ollama_url = "http://localhost:11434/api/generate" diff --git a/src/main.py b/src/main.py index 5bb632b..54f6b0a 100644 --- a/src/main.py +++ b/src/main.py @@ -3,6 +3,7 @@ from commonforms import prepare_form from pypdf import PdfReader from controller import Controller +from typing import Union def input_fields(num_fields: int): fields = [] @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio if __name__ == "__main__": file = "./src/inputs/file.pdf" user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is , and the date is 01/02/2005" - fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] + descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"] prepared_pdf = "temp_outfile.pdf" prepare_form(file, prepared_pdf) @@ -80,4 +81,4 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio num_fields = 0 controller = Controller() - controller.fill_form(user_input, fields, file) + controller.fill_form(user_input, descriptive_fields, file)