|
1 | | -# backend\ai_search.py |
| 1 | +# File Version: 1.3.0 |
| 2 | +# /backend/ai_search.py |
| 3 | + |
| 4 | +# Copyright (c) 2025 Ali Kazemi |
| 5 | +# Licensed under MPL 2.0 |
| 6 | +# This file is part of a derivative work and must retain this notice. |
2 | 7 |
|
3 | 8 | """ |
4 | 9 | # Precision File Search |
|
11 | 16 | This module uses Large Language Models (LLMs) via the LangChain library to create |
12 | 17 | a multi-step, intelligent search pipeline. It goes beyond simple keyword matching |
13 | 18 | by first understanding the user's intent and then executing the most appropriate |
14 | | -search strategy. |
| 19 | +search strategy. This version assumes the use of a multilingual embedding model, |
| 20 | +so it passes the user's query directly to the search pipeline without translation. |
15 | 21 |
|
16 | 22 | The core orchestration logic is in `run_ai_search`, which performs the following: |
17 | 23 | 1. **Intent Routing (`route_user_query`):** First, an LLM determines if the user |
@@ -359,37 +365,38 @@ async def run_file_search_pipeline( |
359 | 365 |
|
360 | 366 | return summarize_results_with_llm(query, raw_results, strategy, temperature, max_tokens) |
361 | 367 |
|
362 | | -# 9. MAIN ORCHESTRATOR ########################################################################################## |
| 368 | +# 9. MAIN ORCHESTRATOR (SIMPLIFIED) ############################################################################### |
363 | 369 | async def run_ai_search( |
364 | 370 | query: str, temperature: float, max_tokens: int, k_fetch_initial: Optional[int], |
365 | 371 | vector_score_threshold: Optional[float], vector_top_n: Optional[int], |
366 | 372 | enable_reranker: Optional[bool], rerank_top_n: Optional[int], rerank_score_threshold: Optional[float] |
367 | 373 | ) -> Dict[str, Any]: |
368 | 374 | """ |
369 | | - The main orchestration function. It routes the user query to the appropriate handler. |
| 375 | + The main orchestration function. It takes the user's query directly and routes it. |
| 376 | + This architecture relies on a multilingual embedding model for non-English queries. |
370 | 377 | """ |
371 | 378 | try: |
372 | 379 | clean_query = query.strip() |
| 380 | + |
373 | 381 | if clean_query.upper().startswith("PFS:"): |
374 | 382 | logger.info("Forcing Knowledge Base search due to 'PFS:' prefix.") |
375 | 383 | actual_question = clean_query[4:].strip() |
376 | 384 | if not actual_question: |
377 | | - logger.warning("User provided 'PFS:' prefix with no question.") |
378 | 385 | return { |
379 | 386 | "summary": "### Knowledge Base Search\n\nYou used the `PFS:` prefix, which is for searching the application's built-in documentation. Please provide a question after the prefix.\n\n**For example:** `PFS: how does the reranker work?`", |
380 | 387 | "relevant_files": [] |
381 | 388 | } |
382 | 389 | return answer_from_knowledge_base(actual_question, temperature, max_tokens) |
383 | 390 |
|
384 | | - routing_result = route_user_query(query) |
| 391 | + routing_result = route_user_query(clean_query) |
385 | 392 | intent = routing_result.get("intent") |
386 | 393 |
|
387 | 394 | if intent == "app_knowledge_query": |
388 | | - return answer_from_knowledge_base(query, temperature, max_tokens) |
| 395 | + return answer_from_knowledge_base(clean_query, temperature, max_tokens) |
389 | 396 |
|
390 | 397 | elif intent == "file_search_query": |
391 | 398 | return await run_file_search_pipeline( |
392 | | - query, temperature, max_tokens, k_fetch_initial, vector_score_threshold, |
| 399 | + clean_query, temperature, max_tokens, k_fetch_initial, vector_score_threshold, |
393 | 400 | vector_top_n, enable_reranker, rerank_top_n, rerank_score_threshold |
394 | 401 | ) |
395 | 402 | else: |
|
0 commit comments