Skip to content

Commit d4401e1

Browse files
Merge pull request #17 from PFS-AI/DEVELOPMENT
#10--Non-English-Semantic-Search-Support
2 parents b09094b + 6238c47 commit d4401e1

File tree

9 files changed

+72
-32
lines changed

9 files changed

+72
-32
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ This guide provides instructions for both regular users (via a simple installer)
153153

154154
The easiest way to install Precision File Search is to download the latest official installer for Windows.
155155

156-
[![Latest Release](https://img.shields.io/badge/Download-V1.0.16-blueviolet?style=for-the-badge)](https://github.com/PFS-AI/PFS/releases/latest)
156+
[![Latest Release](https://img.shields.io/badge/Download-V1.1.0-blueviolet?style=for-the-badge)](https://github.com/PFS-AI/PFS/releases/latest)
157157

158158
1. Click the button above to go to the latest release page.
159159
2. Under the **Assets** section, download the `PFS-SETUP_vX.X.X.exe` file.

backend/ai_search.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
# backend\ai_search.py
1+
# File Version: 1.3.0
2+
# /backend/ai_search.py
3+
4+
# Copyright (c) 2025 Ali Kazemi
5+
# Licensed under MPL 2.0
6+
# This file is part of a derivative work and must retain this notice.
27

38
"""
49
# Precision File Search
@@ -11,7 +16,8 @@
1116
This module uses Large Language Models (LLMs) via the LangChain library to create
1217
a multi-step, intelligent search pipeline. It goes beyond simple keyword matching
1318
by first understanding the user's intent and then executing the most appropriate
14-
search strategy.
19+
search strategy. This version assumes the use of a multilingual embedding model,
20+
so it passes the user's query directly to the search pipeline without translation.
1521
1622
The core orchestration logic is in `run_ai_search`, which performs the following:
1723
1. **Intent Routing (`route_user_query`):** First, an LLM determines if the user
@@ -359,37 +365,38 @@ async def run_file_search_pipeline(
359365

360366
return summarize_results_with_llm(query, raw_results, strategy, temperature, max_tokens)
361367

362-
# 9. MAIN ORCHESTRATOR ##########################################################################################
368+
# 9. MAIN ORCHESTRATOR (SIMPLIFIED) ###############################################################################
363369
async def run_ai_search(
364370
query: str, temperature: float, max_tokens: int, k_fetch_initial: Optional[int],
365371
vector_score_threshold: Optional[float], vector_top_n: Optional[int],
366372
enable_reranker: Optional[bool], rerank_top_n: Optional[int], rerank_score_threshold: Optional[float]
367373
) -> Dict[str, Any]:
368374
"""
369-
The main orchestration function. It routes the user query to the appropriate handler.
375+
The main orchestration function. It takes the user's query directly and routes it.
376+
This architecture relies on a multilingual embedding model for non-English queries.
370377
"""
371378
try:
372379
clean_query = query.strip()
380+
373381
if clean_query.upper().startswith("PFS:"):
374382
logger.info("Forcing Knowledge Base search due to 'PFS:' prefix.")
375383
actual_question = clean_query[4:].strip()
376384
if not actual_question:
377-
logger.warning("User provided 'PFS:' prefix with no question.")
378385
return {
379386
"summary": "### Knowledge Base Search\n\nYou used the `PFS:` prefix, which is for searching the application's built-in documentation. Please provide a question after the prefix.\n\n**For example:** `PFS: how does the reranker work?`",
380387
"relevant_files": []
381388
}
382389
return answer_from_knowledge_base(actual_question, temperature, max_tokens)
383390

384-
routing_result = route_user_query(query)
391+
routing_result = route_user_query(clean_query)
385392
intent = routing_result.get("intent")
386393

387394
if intent == "app_knowledge_query":
388-
return answer_from_knowledge_base(query, temperature, max_tokens)
395+
return answer_from_knowledge_base(clean_query, temperature, max_tokens)
389396

390397
elif intent == "file_search_query":
391398
return await run_file_search_pipeline(
392-
query, temperature, max_tokens, k_fetch_initial, vector_score_threshold,
399+
clean_query, temperature, max_tokens, k_fetch_initial, vector_score_threshold,
393400
vector_top_n, enable_reranker, rerank_top_n, rerank_score_threshold
394401
)
395402
else:

backend/config_manager.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# File Version: 1.1.0
1+
# File Version: 1.4.0
22
# /backend/config_manager.py
33

44
# Copyright (c) 2025 Ali Kazemi
@@ -75,8 +75,6 @@ def get_user_data_dir() -> str:
7575
DATA_FOLDER = get_user_data_dir()
7676
CONFIG_DB = os.path.join(DATA_FOLDER, "app_config.db")
7777

78-
# Block Version: 1.1.0
79-
# Define a dedicated folder for storing downloaded AI models.
8078
MODELS_FOLDER = os.path.join(DATA_FOLDER, "models")
8179
os.makedirs(MODELS_FOLDER, exist_ok=True)
8280

@@ -116,8 +114,9 @@ def get_user_data_dir() -> str:
116114
"rerank_top_n": 10,
117115
"rerank_score_threshold": 0.5
118116
},
117+
# Block Version: 1.1.0
119118
"embedding_model": {
120-
"model_name": "",
119+
"model_name": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
121120
"device": "auto"
122121
},
123122
"reranker_model": {

backend/routes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# backend\routes.py
1+
# File Version: 1.2.0
2+
# /backend/routes.py
23

3-
"""
4-
# Precision File Search
54
# Copyright (c) 2025 Ali Kazemi
65
# Licensed under MPL 2.0
76
# This file is part of a derivative work and must retain this notice.
87

8+
"""
99
Defines the API layer for the backend using FastAPI.
1010
1111
This module sets up all the HTTP and WebSocket endpoints that the frontend

main.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# File Version: 1.2.0
1+
# File Version: 1.4.0
22
# /main.py
33

44
# Copyright (c) 2025 Ali Kazemi
@@ -49,7 +49,7 @@
4949
from backend.logging_config import setup_logging, LOG_LEVELS, DEFAULT_LOG_LEVEL
5050

5151
load_dotenv()
52-
logger = None
52+
logger = None # Will be initialized after setup_logging
5353

5454
# 2. APPLICATION INITIALIZATION HELPERS #########################################################################
5555
def init_db():
@@ -86,9 +86,12 @@ def initialize_ai_models():
8686

8787
logger.info("Verifying AI model configuration...")
8888
try:
89+
# This import triggers the model loading logic in semantic_search.py and rag_pipeline.py
8990
from backend import semantic_search
9091

92+
# Accessing the variables ensures they are initialized if configured
9193
if semantic_search.EMBEDDINGS is None:
94+
# This is expected on first run. It's only an error if a model name IS configured.
9295
if semantic_search.EMBEDDING_CONFIG.get("model_name"):
9396
raise RuntimeError("Embedding model is configured but failed to initialize.")
9497
else:
@@ -105,7 +108,6 @@ def initialize_ai_models():
105108
except Exception as e:
106109
logger.critical(f"A critical error occurred during AI model initialization: {e}", exc_info=True)
107110

108-
# Block Version: 1.2.0
109111
def warm_up_unstructured():
110112
"""
111113
Warms up the 'unstructured' library by running a trivial partition.
@@ -154,7 +156,7 @@ async def lifespan(app: FastAPI):
154156
app = FastAPI(
155157
title="Precision File Search (PFS)",
156158
description="A local file search and classification application with an advanced RAG retrieval engine.",
157-
version="1.0.0",
159+
version="1.2.0",
158160
lifespan=lifespan
159161
)
160162
app.mount("/static", StaticFiles(directory="static"), name="static")
@@ -214,7 +216,6 @@ async def get_license():
214216
unstructured_warmup_thread = threading.Thread(target=warm_up_unstructured, daemon=True)
215217
unstructured_warmup_thread.start()
216218

217-
218219
startup_event = threading.Event()
219220
app.state.startup_event = startup_event
220221
config = uvicorn.Config(app, host="127.0.0.1", port=9090, log_config=None)

pyproject.toml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# File Version: 1.3.0
2+
# /pyproject.toml
3+
14
# ==============================================================================
25
# Precision File Search (PFS) - pyproject.toml
36
# ==============================================================================
@@ -14,7 +17,7 @@ build-backend = "setuptools.build_meta"
1417
[project]
1518
# --- Core Project Metadata ---
1619
name = "precision-file-search"
17-
version = "1.0.0"
20+
version = "1.3.0"
1821
authors = [
1922
{ name = "Ali Kazemi", email = "[email protected]" },
2023
]
@@ -41,8 +44,6 @@ keywords = ["search", "rag", "ai", "fastapi", "llm", "semantic-search", "classif
4144

4245

4346
# --- Project Dependencies ---
44-
# These are the packages required to run the application. No versions are
45-
# specified, so the latest versions will be installed by default.
4647
dependencies = [
4748
# Web Framework & Server
4849
"fastapi",
@@ -77,17 +78,17 @@ dependencies = [
7778

7879
# Vector Database Client
7980
"qdrant-client",
80-
81+
8182
# Other utilities that might be required by dependencies
8283
"hf_xet",
8384
"dill",
8485
]
8586

8687
[project.urls]
87-
Homepage = "https://github.com/Eng-AliKazemi/PFS"
88-
"Bug Tracker" = "https://github.com/Eng-AliKazemi/PFS/issues"
88+
Homepage = "https://github.com/PFS-AI/PFS"
89+
"Bug Tracker" = "https://github.com/PFS-AI/PFS/issues"
8990

9091
# --- Tool Specific Configuration ---
9192
[tool.setuptools.packages.find]
9293
where = ["."]
93-
include = ["backend*"]
94+
include = ["backend*"]

static/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ <h2 class="modal-title"><i class="fas fa-info-circle"></i> <span data-i18n-key="
480480
<a href="https://pfs-ai.github.io/PFS/" target="_blank" rel="noopener noreferrer"><i class="fab fa-github"></i> GitHub</a> |
481481
<a href="/license" target="_blank"><i class="fas fa-file-contract"></i> License</a> |
482482
<span class="footer-item"><i class="fas fa-calendar-alt"></i> 2025</span> |
483-
<span class="footer-item"><i class="fas fa-code-branch"></i> V1.0.16</span>
483+
<span class="footer-item"><i class="fas fa-code-branch"></i> V1.1.0</span>
484484
</div>
485485

486486
<!-- 11. MODAL DIALOGS ####################################################################################### -->

static/style.css

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* static\style.css */
1+
/* File Version: 1.3.0 */
2+
/* /static/style.css */
23

34
/* # Precision File Search
45
# Copyright (c) 2025 Ali Kazemi
@@ -394,3 +395,25 @@ input[type="range"]::-moz-range-thumb { width: 20px; height: 20px; background: v
394395
[dir="rtl"] .checkbox-field { gap: 0 1rem; }
395396
[dir="rtl"] .result-path { margin-right: 0; margin-left: 1rem; }
396397
[dir="rtl"] .history-delete-btn { margin-left: 0; margin-right: auto; }
398+
399+
/* Block Version: 1.3.0 */
400+
#ai-results-section[dir="rtl"] > h2,
401+
#ai-results-section[dir="rtl"] #ai-results h3 {
402+
text-align: right;
403+
}
404+
405+
#ai-results-section[dir="rtl"] .markdown-body,
406+
#ai-results-section[dir="rtl"] .markdown-body p,
407+
#ai-results-section[dir="rtl"] .markdown-body h1,
408+
#ai-results-section[dir="rtl"] .markdown-body h2,
409+
#ai-results-section[dir="rtl"] .markdown-body h3,
410+
#ai-results-section[dir="rtl"] .markdown-body li {
411+
text-align: right;
412+
direction: rtl;
413+
}
414+
415+
#ai-results-section[dir="rtl"] .markdown-body ul,
416+
#ai-results-section[dir="rtl"] .markdown-body ol {
417+
padding-left: 0;
418+
padding-right: 2em;
419+
}

static/ui_handlers.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// static/ui_handlers.js
1+
// File Version: 1.2.0
2+
// /static/ui_handlers.js
23

34
// # Precision File Search
45
// # Copyright (c) 2025 Ali Kazemi
@@ -38,8 +39,16 @@ export function initializeAISearchUI(defaults) {
3839
aiMaxTokensValue.textContent = aiMaxTokensSlider.value;
3940
}
4041

42+
// Block Version: 1.2.0
4143
function renderAIResults(data) {
4244
aiResultsDiv.innerHTML = '';
45+
aiResultsSection.removeAttribute('dir');
46+
47+
const currentLangDirection = document.documentElement.dir;
48+
if (currentLangDirection === 'rtl') {
49+
aiResultsSection.setAttribute('dir', 'rtl');
50+
}
51+
4352
const summaryDiv = document.createElement('div');
4453
summaryDiv.className = 'markdown-body';
4554

@@ -101,7 +110,7 @@ export async function performAISearch(event) {
101110
endpoint = '/api/ai/summarize-results';
102111
body = {
103112
query: query,
104-
search_results: lastSemanticResults, // Pass the stored context
113+
search_results: lastSemanticResults,
105114
temperature: parseFloat(aiTemperatureSlider.value),
106115
max_tokens: parseInt(aiMaxTokensSlider.value, 10),
107116
};

0 commit comments

Comments
 (0)