Merge pull request #17 from PFS-AI/DEVELOPMENT

Eng-AliKazemi · web-flow · commit d4401e15c033 · 2025-11-15T01:19:08.000+03:30
#10--Non-English-Semantic-Search-Support
diff --git a/README.md b/README.md
@@ -153,7 +153,7 @@ This guide provides instructions for both regular users (via a simple installer)
 
 The easiest way to install Precision File Search is to download the latest official installer for Windows.
 
-[![Latest Release](https://img.shields.io/badge/Download-V1.0.16-blueviolet?style=for-the-badge)](https://github.com/PFS-AI/PFS/releases/latest)
+[![Latest Release](https://img.shields.io/badge/Download-V1.1.0-blueviolet?style=for-the-badge)](https://github.com/PFS-AI/PFS/releases/latest)
 
 1.  Click the button above to go to the latest release page.
 2.  Under the **Assets** section, download the `PFS-SETUP_vX.X.X.exe` file.
diff --git a/backend/ai_search.py b/backend/ai_search.py
@@ -1,4 +1,9 @@
-# backend\ai_search.py
+# File Version: 1.3.0
+# /backend/ai_search.py
+
+# Copyright (c) 2025 Ali Kazemi
+# Licensed under MPL 2.0
+# This file is part of a derivative work and must retain this notice.
 
 """
 # Precision File Search
@@ -11,7 +16,8 @@
 This module uses Large Language Models (LLMs) via the LangChain library to create
 a multi-step, intelligent search pipeline. It goes beyond simple keyword matching
 by first understanding the user's intent and then executing the most appropriate
-search strategy.
+search strategy. This version assumes the use of a multilingual embedding model,
+so it passes the user's query directly to the search pipeline without translation.
 
 The core orchestration logic is in `run_ai_search`, which performs the following:
 1.  **Intent Routing (`route_user_query`):** First, an LLM determines if the user
@@ -359,37 +365,38 @@ async def run_file_search_pipeline(
 
     return summarize_results_with_llm(query, raw_results, strategy, temperature, max_tokens)
 
-# 9. MAIN ORCHESTRATOR ##########################################################################################
+# 9. MAIN ORCHESTRATOR (SIMPLIFIED) ###############################################################################
 async def run_ai_search(
     query: str, temperature: float, max_tokens: int, k_fetch_initial: Optional[int],
     vector_score_threshold: Optional[float], vector_top_n: Optional[int],
     enable_reranker: Optional[bool], rerank_top_n: Optional[int], rerank_score_threshold: Optional[float]
 ) -> Dict[str, Any]:
     """
-    The main orchestration function. It routes the user query to the appropriate handler.
+    The main orchestration function. It takes the user's query directly and routes it.
+    This architecture relies on a multilingual embedding model for non-English queries.
     """
     try:
         clean_query = query.strip()
+
         if clean_query.upper().startswith("PFS:"):
             logger.info("Forcing Knowledge Base search due to 'PFS:' prefix.")
             actual_question = clean_query[4:].strip()
             if not actual_question:
-                 logger.warning("User provided 'PFS:' prefix with no question.")
                  return {
                     "summary": "### Knowledge Base Search\n\nYou used the `PFS:` prefix, which is for searching the application's built-in documentation. Please provide a question after the prefix.\n\n**For example:** `PFS: how does the reranker work?`",
                     "relevant_files": []
                 }
             return answer_from_knowledge_base(actual_question, temperature, max_tokens)
 
-        routing_result = route_user_query(query)
+        routing_result = route_user_query(clean_query)
         intent = routing_result.get("intent")
 
         if intent == "app_knowledge_query":
-            return answer_from_knowledge_base(query, temperature, max_tokens)
+            return answer_from_knowledge_base(clean_query, temperature, max_tokens)
 
         elif intent == "file_search_query":
             return await run_file_search_pipeline(
-                query, temperature, max_tokens, k_fetch_initial, vector_score_threshold,
+                clean_query, temperature, max_tokens, k_fetch_initial, vector_score_threshold,
                 vector_top_n, enable_reranker, rerank_top_n, rerank_score_threshold
             )
         else:
diff --git a/backend/config_manager.py b/backend/config_manager.py
@@ -1,4 +1,4 @@
-# File Version: 1.1.0
+# File Version: 1.4.0
 # /backend/config_manager.py
 
 # Copyright (c) 2025 Ali Kazemi
@@ -75,8 +75,6 @@ def get_user_data_dir() -> str:
 DATA_FOLDER = get_user_data_dir()
 CONFIG_DB = os.path.join(DATA_FOLDER, "app_config.db")
 
-# Block Version: 1.1.0
-# Define a dedicated folder for storing downloaded AI models.
 MODELS_FOLDER = os.path.join(DATA_FOLDER, "models")
 os.makedirs(MODELS_FOLDER, exist_ok=True)
 
@@ -116,8 +114,9 @@ def get_user_data_dir() -> str:
         "rerank_top_n": 10,
         "rerank_score_threshold": 0.5
     },
+    # Block Version: 1.1.0
     "embedding_model": {
-        "model_name": "",
+        "model_name": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
         "device": "auto"
     },
     "reranker_model": {
diff --git a/backend/routes.py b/backend/routes.py
@@ -1,11 +1,11 @@
-# backend\routes.py
+# File Version: 1.2.0
+# /backend/routes.py
 
-"""
-# Precision File Search
 # Copyright (c) 2025 Ali Kazemi
 # Licensed under MPL 2.0
 # This file is part of a derivative work and must retain this notice.
 
+"""
 Defines the API layer for the backend using FastAPI.
 
 This module sets up all the HTTP and WebSocket endpoints that the frontend
diff --git a/main.py b/main.py
@@ -1,4 +1,4 @@
-# File Version: 1.2.0
+# File Version: 1.4.0
 # /main.py
 
 # Copyright (c) 2025 Ali Kazemi
@@ -49,7 +49,7 @@
 from backend.logging_config import setup_logging, LOG_LEVELS, DEFAULT_LOG_LEVEL
 
 load_dotenv()
-logger = None 
+logger = None # Will be initialized after setup_logging
 
 # 2. APPLICATION INITIALIZATION HELPERS #########################################################################
 def init_db():
@@ -86,9 +86,12 @@ def initialize_ai_models():
 
     logger.info("Verifying AI model configuration...")
     try:
+        # This import triggers the model loading logic in semantic_search.py and rag_pipeline.py
         from backend import semantic_search
 
+        # Accessing the variables ensures they are initialized if configured
         if semantic_search.EMBEDDINGS is None:
+            # This is expected on first run. It's only an error if a model name IS configured.
             if semantic_search.EMBEDDING_CONFIG.get("model_name"):
                 raise RuntimeError("Embedding model is configured but failed to initialize.")
             else:
@@ -105,7 +108,6 @@ def initialize_ai_models():
     except Exception as e:
         logger.critical(f"A critical error occurred during AI model initialization: {e}", exc_info=True)
 
-# Block Version: 1.2.0
 def warm_up_unstructured():
     """
     Warms up the 'unstructured' library by running a trivial partition.
@@ -154,7 +156,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(
     title="Precision File Search (PFS)",
     description="A local file search and classification application with an advanced RAG retrieval engine.",
-    version="1.0.0",
+    version="1.2.0",
     lifespan=lifespan
 )
 app.mount("/static", StaticFiles(directory="static"), name="static")
@@ -214,7 +216,6 @@ async def get_license():
     unstructured_warmup_thread = threading.Thread(target=warm_up_unstructured, daemon=True)
     unstructured_warmup_thread.start()
 
-
     startup_event = threading.Event()
     app.state.startup_event = startup_event
     config = uvicorn.Config(app, host="127.0.0.1", port=9090, log_config=None)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,6 @@
+# File Version: 1.3.0
+# /pyproject.toml
+
 # ==============================================================================
 # Precision File Search (PFS) - pyproject.toml
 # ==============================================================================
@@ -14,7 +17,7 @@ build-backend = "setuptools.build_meta"
 [project]
 # --- Core Project Metadata ---
 name = "precision-file-search"
-version = "1.0.0"
+version = "1.3.0"
 authors = [
     { name = "Ali Kazemi", email = "eng.ali.kazemi@outlook.com" },
 ]
@@ -41,8 +44,6 @@ keywords = ["search", "rag", "ai", "fastapi", "llm", "semantic-search", "classif
 
 
 # --- Project Dependencies ---
-# These are the packages required to run the application. No versions are
-# specified, so the latest versions will be installed by default.
 dependencies = [
     # Web Framework & Server
     "fastapi",
@@ -77,17 +78,17 @@ dependencies = [
 
     # Vector Database Client
     "qdrant-client",
-    
+
     # Other utilities that might be required by dependencies
     "hf_xet",
     "dill",
 ]
 
 [project.urls]
-Homepage = "https://github.com/Eng-AliKazemi/PFS"
-"Bug Tracker" = "https://github.com/Eng-AliKazemi/PFS/issues"
+Homepage = "https://github.com/PFS-AI/PFS"
+"Bug Tracker" = "https://github.com/PFS-AI/PFS/issues"
 
 # --- Tool Specific Configuration ---
 [tool.setuptools.packages.find]
 where = ["."]
-include = ["backend*"]
+include = ["backend*"]
diff --git a/static/index.html b/static/index.html
@@ -480,7 +480,7 @@ <h2 class="modal-title"><i class="fas fa-info-circle"></i> <span data-i18n-key="
         <a href="https://pfs-ai.github.io/PFS/" target="_blank" rel="noopener noreferrer"><i class="fab fa-github"></i> GitHub</a> |
         <a href="/license" target="_blank"><i class="fas fa-file-contract"></i> License</a> |
         <span class="footer-item"><i class="fas fa-calendar-alt"></i> 2025</span> |
-        <span class="footer-item"><i class="fas fa-code-branch"></i> V1.0.16</span>
+        <span class="footer-item"><i class="fas fa-code-branch"></i> V1.1.0</span>
     </div>
 
     <!-- 11. MODAL DIALOGS ####################################################################################### -->
diff --git a/static/style.css b/static/style.css
@@ -1,4 +1,5 @@
-/* static\style.css */
+/* File Version: 1.3.0 */
+/* /static/style.css */
 
 /* # Precision File Search
 # Copyright (c) 2025 Ali Kazemi
@@ -394,3 +395,25 @@ input[type="range"]::-moz-range-thumb { width: 20px; height: 20px; background: v
 [dir="rtl"] .checkbox-field { gap: 0 1rem; }
 [dir="rtl"] .result-path { margin-right: 0; margin-left: 1rem; }
 [dir="rtl"] .history-delete-btn { margin-left: 0; margin-right: auto; }
+
+/* Block Version: 1.3.0 */
+#ai-results-section[dir="rtl"] > h2,
+#ai-results-section[dir="rtl"] #ai-results h3 {
+    text-align: right;
+}
+
+#ai-results-section[dir="rtl"] .markdown-body,
+#ai-results-section[dir="rtl"] .markdown-body p,
+#ai-results-section[dir="rtl"] .markdown-body h1,
+#ai-results-section[dir="rtl"] .markdown-body h2,
+#ai-results-section[dir="rtl"] .markdown-body h3,
+#ai-results-section[dir="rtl"] .markdown-body li {
+    text-align: right;
+    direction: rtl;
+}
+
+#ai-results-section[dir="rtl"] .markdown-body ul,
+#ai-results-section[dir="rtl"] .markdown-body ol {
+    padding-left: 0;
+    padding-right: 2em;
+}
diff --git a/static/ui_handlers.js b/static/ui_handlers.js
@@ -1,4 +1,5 @@
-// static/ui_handlers.js
+// File Version: 1.2.0
+// /static/ui_handlers.js
 
 // # Precision File Search
 // # Copyright (c) 2025 Ali Kazemi
@@ -38,8 +39,16 @@ export function initializeAISearchUI(defaults) {
     aiMaxTokensValue.textContent = aiMaxTokensSlider.value;
 }
 
+// Block Version: 1.2.0
 function renderAIResults(data) {
     aiResultsDiv.innerHTML = '';
+    aiResultsSection.removeAttribute('dir'); 
+
+    const currentLangDirection = document.documentElement.dir;
+    if (currentLangDirection === 'rtl') {
+        aiResultsSection.setAttribute('dir', 'rtl');
+    }
+
     const summaryDiv = document.createElement('div');
     summaryDiv.className = 'markdown-body';
 
@@ -101,7 +110,7 @@ export async function performAISearch(event) {
         endpoint = '/api/ai/summarize-results';
         body = {
             query: query,
-            search_results: lastSemanticResults, // Pass the stored context
+            search_results: lastSemanticResults,
             temperature: parseFloat(aiTemperatureSlider.value),
             max_tokens: parseInt(aiMaxTokensSlider.value, 10),
         };