feat: add support for reasoning llm with thought visualization (#652) bump:patch

taprosoft · web-flow · commit 0b090896fd0e · 2025-02-05T13:45:12.000+07:00
* fix: lanceDB query with empty file_ids

* feat: add thinking display

* feat: add low request mode for local llm
diff --git a/libs/kotaemon/kotaemon/indices/qa/utils.py b/libs/kotaemon/kotaemon/indices/qa/utils.py
@@ -80,3 +80,18 @@ def find_start_end_phrase(
         final_match = None
 
     return final_match, matched_length
+
+
+def replace_think_tag_with_details(text):
+    text = text.replace(
+        "<think>",
+        '<details><summary><span style="color:grey">Thought</span></summary><blockquote>',  # noqa
+    )
+    text = text.replace("</think>", "</blockquote></details>")
+    return text
+
+
+def strip_think_tag(text):
+    if "</think>" in text:
+        text = text.split("</think>")[1]
+    return text
diff --git a/libs/kotaemon/kotaemon/storages/docstores/lancedb.py b/libs/kotaemon/kotaemon/storages/docstores/lancedb.py
@@ -98,6 +98,9 @@ def get(self, ids: Union[List[str], str]) -> List[Document]:
         if not isinstance(ids, list):
             ids = [ids]
 
+        if len(ids) == 0:
+            return []
+
         id_filter = ", ".join([f"'{_id}'" for _id in ids])
         try:
             document_collection = self.db_connection.open_table(self.collection_name)
diff --git a/libs/ktem/ktem/index/file/pipelines.py b/libs/ktem/ktem/index/file/pipelines.py
@@ -14,6 +14,7 @@
 from typing import Generator, Optional, Sequence
 
 import tiktoken
+from decouple import config
 from ktem.db.models import engine
 from ktem.embeddings.manager import embedding_models_manager
 from ktem.llms.manager import llms
@@ -270,7 +271,7 @@ def get_user_settings(cls) -> dict:
             },
             "use_llm_reranking": {
                 "name": "Use LLM relevant scoring",
-                "value": True,
+                "value": not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool),
                 "choices": [True, False],
                 "component": "checkbox",
             },
diff --git a/libs/ktem/ktem/pages/chat/__init__.py b/libs/ktem/ktem/pages/chat/__init__.py
@@ -5,6 +5,7 @@
 from typing import Optional
 
 import gradio as gr
+from decouple import config
 from ktem.app import BasePage
 from ktem.components import reasonings
 from ktem.db.models import Conversation, engine
@@ -23,6 +24,7 @@
 
 from kotaemon.base import Document
 from kotaemon.indices.ingests.files import KH_DEFAULT_FILE_EXTRACTORS
+from kotaemon.indices.qa.utils import strip_think_tag
 
 from ...utils import SUPPORTED_LANGUAGE_MAP, get_file_names_regex, get_urls
 from ...utils.commands import WEB_SEARCH_COMMAND
@@ -367,13 +369,22 @@ def on_building_ui(self):
                             elem_id="citation-dropdown",
                         )
 
-                        self.use_mindmap = gr.State(value=True)
-                        self.use_mindmap_check = gr.Checkbox(
-                            label="Mindmap (on)",
-                            container=False,
-                            elem_id="use-mindmap-checkbox",
-                            value=True,
-                        )
+                        if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool):
+                            self.use_mindmap = gr.State(value=True)
+                            self.use_mindmap_check = gr.Checkbox(
+                                label="Mindmap (on)",
+                                container=False,
+                                elem_id="use-mindmap-checkbox",
+                                value=True,
+                            )
+                        else:
+                            self.use_mindmap = gr.State(value=False)
+                            self.use_mindmap_check = gr.Checkbox(
+                                label="Mindmap (off)",
+                                container=False,
+                                elem_id="use-mindmap-checkbox",
+                                value=False,
+                            )
 
             with gr.Column(
                 scale=INFO_PANEL_SCALES[False], elem_id="chat-info-panel"
@@ -1361,6 +1372,7 @@ def check_and_suggest_name_conv(self, chat_history):
         # check if this is a newly created conversation
         if len(chat_history) == 1:
             suggested_name = suggest_pipeline(chat_history).text
+            suggested_name = strip_think_tag(suggested_name)
             suggested_name = suggested_name.replace('"', "").replace("'", "")[:40]
             new_name = gr.update(value=suggested_name)
             renamed = True
diff --git a/libs/ktem/ktem/reasoning/simple.py b/libs/ktem/ktem/reasoning/simple.py
@@ -3,6 +3,7 @@
 from textwrap import dedent
 from typing import Generator
 
+from decouple import config
 from ktem.embeddings.manager import embedding_models_manager as embeddings
 from ktem.llms.manager import llms
 from ktem.reasoning.prompt_optimization import (
@@ -29,6 +30,7 @@
 )
 from kotaemon.indices.qa.citation_qa_inline import AnswerWithInlineCitation
 from kotaemon.indices.qa.format_context import PrepareEvidencePipeline
+from kotaemon.indices.qa.utils import replace_think_tag_with_details
 from kotaemon.llms import ChatLLM
 
 from ..utils import SUPPORTED_LANGUAGE_MAP
@@ -313,6 +315,13 @@ def generate_relevant_scores():
             **kwargs,
         )
 
+        # check <think> tag from reasoning models
+        processed_answer = replace_think_tag_with_details(answer.text)
+        if processed_answer != answer.text:
+            # clear the chat message and render again
+            yield Document(channel="chat", content=None)
+            yield Document(channel="chat", content=processed_answer)
+
         # show the evidence
         if scoring_thread:
             scoring_thread.join()
@@ -410,7 +419,11 @@ def get_user_settings(cls) -> dict:
             },
             "highlight_citation": {
                 "name": "Citation style",
-                "value": "highlight",
+                "value": (
+                    "highlight"
+                    if not config("USE_LOW_LLM_REQUESTS", default=False, cast=bool)
+                    else "off"
+                ),
                 "component": "radio",
                 "choices": [
                     ("citation: highlight", "highlight"),