fireform-core · Cubix33 · Mar 3, 2026 · Mar 3, 2026
diff --git a/src/file_manipulator.py b/src/file_manipulator.py
@@ -2,7 +2,7 @@
 from src.filler import Filler
 from src.llm import LLM
 from commonforms import prepare_form
-
+from src.privacy import PrivacyManager
 
 class FileManipulator:
     def __init__(self):
@@ -31,9 +31,23 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
 
         print("[3] Starting extraction and PDF filling process...")
         try:
+            # --- PRIVACY INTERCEPTION START ---
+            privacy = PrivacyManager()
+            safe_input = privacy.tokenize(user_input)
+
             self.llm._target_fields = fields
-            self.llm._transcript_text = user_input
-            output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm)
+            self.llm._transcript_text = safe_input
+
+            # Execute LLM here
+            self.llm.main_loop()
+            tokenized_dict = self.llm.get_data()
+
+            # Unmask data back to real values
+            real_data_dict = privacy.detokenize(tokenized_dict)
+            # --- PRIVACY INTERCEPTION END ---
+
+            # Pass the unmasked dictionary to the filler
+            output_name = self.filler.fill_form(pdf_form=pdf_form_path, manual_data=real_data_dict)
 
             print("\n----------------------------------")
             print("✅ Process Complete.")
@@ -43,5 +57,4 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
 
         except Exception as e:
             print(f"An error occurred during PDF generation: {e}")
-            # Re-raise the exception so the frontend can handle it
             raise e
diff --git a/src/filler.py b/src/filler.py
@@ -7,7 +7,7 @@ class Filler:
     def __init__(self):
         pass
 
-    def fill_form(self, pdf_form: str, llm: LLM):
+    def fill_form(self, pdf_form: str, manual_data: dict): # Changed parameter
         """
         Fill a PDF form with values from user_input using LLM.
         Fields are filled in the visual order (top-to-bottom, left-to-right).
@@ -19,11 +19,8 @@ def fill_form(self, pdf_form: str, llm: LLM):
             + "_filled.pdf"
         )
 
-        # Generate dictionary of answers from your original function
-        t2j = llm.main_loop()
-        textbox_answers = t2j.get_data()  # This is a dictionary
-
-        answers_list = list(textbox_answers.values())
+        # Generate list from the real_data dictionary passed in
+        answers_list = list(manual_data.values())
 
         # Read PDF
         pdf = PdfReader(pdf_form)

diff --git a/src/privacy.py b/src/privacy.py
@@ -0,0 +1,29 @@
+import re
+import uuid
+import json
+
+class PrivacyManager:
+    def __init__(self):
+        self._pii_map = {}
+        # Simple regex for emails and phone numbers
+        self.patterns = {
+            "EMAIL": r'[\w\.-]+@[\w\.-]+\.\w+',
+            "PHONE": r'\b(?:\+?\d{1,3}[- ]?)?\(?\d{3}\)?[- ]?\d{3}[- ]?\d{4}\b'
+        }
+
+    def tokenize(self, text: str) -> str:
+        tokenized_text = text
+        for label, pattern in self.patterns.items():
+            matches = re.findall(pattern, tokenized_text)
+            for match in matches:
+                token = f"TOKEN_{label}_{uuid.uuid4().hex[:6].upper()}"
+                self._pii_map[token] = match
+                tokenized_text = tokenized_text.replace(match, token)
+        return tokenized_text
+
+    def detokenize(self, tokenized_data: dict) -> dict:
+        # Convert dict to string, replace tokens, convert back to dict
+        dumped = json.dumps(tokenized_data)
+        for token, original_value in self._pii_map.items():
+            dumped = dumped.replace(token, original_value)
+        return json.loads(dumped)