Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions src/filler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pdfrw import PdfReader, PdfWriter
from src.llm import LLM
from datetime import datetime

import re

class Filler:
def __init__(self):
Expand All @@ -12,19 +12,36 @@ def fill_form(self, pdf_form: str, llm: LLM):
Fill a PDF form with values from user_input using LLM.
Fields are filled in the visual order (top-to-bottom, left-to-right).
"""
output_pdf = (
pdf_form[:-4]
+ "_"
+ datetime.now().strftime("%Y%m%d_%H%M%S")
+ "_filled.pdf"
)

# Generate dictionary of answers from your original function
t2j = llm.main_loop()
textbox_answers = t2j.get_data() # This is a dictionary


target_keys = ["Employee's name", "Name", "Full Name", "Patient Name"]
extracted_name = None

for key in target_keys:
if key in textbox_answers and textbox_answers[key]:
val = textbox_answers[key]
# If the LLM returned a list (plural values), grab the first item
extracted_name = str(val[0]) if isinstance(val, list) else str(val)
break

# 3. Create the output filename
if extracted_name and extracted_name != "-1":
# Clean illegal characters and spaces
safe_name = re.sub(r'[^a-zA-Z0-9_\-]', '_', extracted_name.replace(' ', '_'))
safe_name = re.sub(r'_+', '_', safe_name).strip('_')
output_pdf = f"{pdf_form[:-4]}_{safe_name}.pdf"
else:
# Fallback to the original timestamp method if no name is found
output_pdf = (
pdf_form[:-4]
+ "_"
+ datetime.now().strftime("%Y%m%d_%H%M%S")
+ "_filled.pdf"
)

answers_list = list(textbox_answers.values())

# Read PDF
pdf = PdfReader(pdf_form)

Expand Down
2 changes: 1 addition & 1 deletion src/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def build_prompt(self, current_field):

def main_loop(self):
# self.type_check_all()
for field in self._target_fields.keys():
for field in self._target_fields:
prompt = self.build_prompt(field)
# print(prompt)
# ollama_url = "http://localhost:11434/api/generate"
Expand Down
5 changes: 3 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from commonforms import prepare_form
from pypdf import PdfReader
from controller import Controller
from typing import Union

def input_fields(num_fields: int):
fields = []
Expand Down Expand Up @@ -68,7 +69,7 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
if __name__ == "__main__":
file = "./src/inputs/file.pdf"
user_input = "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is <Mamañema>, and the date is 01/02/2005"
fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
descriptive_fields = ["Employee's name", "Employee's job title", "Employee's department supervisor", "Employee's phone number", "Employee's email", "Signature", "Date"]
prepared_pdf = "temp_outfile.pdf"
prepare_form(file, prepared_pdf)

Expand All @@ -80,4 +81,4 @@ def run_pdf_fill_process(user_input: str, definitions: list, pdf_form_path: Unio
num_fields = 0

controller = Controller()
controller.fill_form(user_input, fields, file)
controller.fill_form(user_input, descriptive_fields, file)