11import hashlib
2+ import re
23from contextlib import contextmanager
34from datetime import datetime
45from pathlib import Path
1112from ..models import ConversationInStore , Message
1213from ..utils import g_config
1314from ..utils .singleton import Singleton
14- import re
1515
16- def _normalize_content (content : str ) -> str :
17- """Remove <think>...</think> tags and strip whitespace from content."""
18- # Remove think tags
19- cleaned_content = re .sub (r"<think>.*?</think>\n?" , "" , content , flags = re .DOTALL )
20- # Strip leading/trailing whitespace
21- return cleaned_content .strip ()
2216
23- def hash_message (message : Message ) -> str :
17+ def _hash_message (message : Message ) -> str :
2418 """Generate a hash for a single message."""
2519 # Convert message to dict and sort keys for consistent hashing
2620 message_dict = message .model_dump (mode = "json" )
2721 message_bytes = orjson .dumps (message_dict , option = orjson .OPT_SORT_KEYS )
2822 return hashlib .sha256 (message_bytes ).hexdigest ()
2923
3024
31- def hash_conversation (client_id : str , model : str , messages : List [Message ]) -> str :
25+ def _hash_conversation (client_id : str , model : str , messages : List [Message ]) -> str :
3226 """Generate a hash for a list of messages and client id."""
3327 # Create a combined hash from all individual message hashes
3428 combined_hash = hashlib .sha256 ()
3529 combined_hash .update (client_id .encode ("utf-8" ))
3630 combined_hash .update (model .encode ("utf-8" ))
3731 for message in messages :
38- message_hash = hash_message (message )
32+ message_hash = _hash_message (message )
3933 combined_hash .update (message_hash .encode ("utf-8" ))
4034 return combined_hash .hexdigest ()
4135
@@ -123,7 +117,7 @@ def store(
123117 raise ValueError ("Messages list cannot be empty" )
124118
125119 # Generate hash for the message list
126- message_hash = hash_conversation (conv .client_id , conv .model , conv .messages )
120+ message_hash = _hash_conversation (conv .client_id , conv .model , conv .messages )
127121 storage_key = custom_key or message_hash
128122
129123 # Prepare data for storage
@@ -178,23 +172,6 @@ def get(self, key: str) -> Optional[ConversationInStore]:
178172 logger .error (f"Failed to retrieve messages for key { key } : { e } " )
179173 return None
180174
181- def clean_assistant_messages (self , messages : List [Message ]) -> List [Message ]:
182- """Create a new list of messages with assistant content cleaned."""
183- cleaned_messages = []
184- for msg in messages :
185- if msg .role == "assistant" and isinstance (msg .content , str ):
186- # Create a new Message object with cleaned content
187- normalized_content = _normalize_content (msg .content )
188- # Only create a new object if content actually changed
189- if normalized_content != msg .content :
190- cleaned_msg = Message (role = msg .role , content = normalized_content , name = msg .name )
191- cleaned_messages .append (cleaned_msg )
192- else :
193- cleaned_messages .append (msg )
194- else :
195- cleaned_messages .append (msg )
196- return cleaned_messages
197-
198175 def find (self , model : str , messages : List [Message ]) -> Optional [ConversationInStore ]:
199176 """
200177 Search conversation data by message list.
@@ -215,7 +192,7 @@ def find(self, model: str, messages: List[Message]) -> Optional[ConversationInSt
215192 return conv
216193
217194 # --- Find with cleaned messages ---
218- cleaned_messages = self .clean_assistant_messages (messages )
195+ cleaned_messages = self .sanitize_assistant_messages (messages )
219196 if conv := self ._find_by_message_list (model , cleaned_messages ):
220197 logger .debug ("Found conversation with cleaned message history." )
221198 return conv
@@ -228,14 +205,12 @@ def _find_by_message_list(
228205 ) -> Optional [ConversationInStore ]:
229206 """Internal find implementation based on a message list."""
230207 for c in g_config .gemini .clients :
231- message_hash = hash_conversation (c .id , model , messages )
208+ message_hash = _hash_conversation (c .id , model , messages )
232209
233210 key = f"{ self .HASH_LOOKUP_PREFIX } { message_hash } "
234211 try :
235212 with self ._get_transaction (write = False ) as txn :
236- mapped = txn .get (key .encode ("utf-8" ))
237- if mapped :
238- logger .debug (f"Found mapped key '{ mapped .decode ('utf-8' )} ' for hash '{ message_hash } '." )
213+ if mapped := txn .get (key .encode ("utf-8" )): # type: ignore
239214 return self .get (mapped .decode ("utf-8" )) # type: ignore
240215 except Exception as e :
241216 logger .error (
@@ -283,7 +258,7 @@ def delete(self, key: str) -> Optional[ConversationInStore]:
283258
284259 storage_data = orjson .loads (data ) # type: ignore
285260 conv = ConversationInStore .model_validate (storage_data )
286- message_hash = hash_conversation (conv .client_id , conv .model , conv .messages )
261+ message_hash = _hash_conversation (conv .client_id , conv .model , conv .messages )
287262
288263 # Delete main data
289264 txn .delete (key .encode ("utf-8" ))
@@ -362,3 +337,32 @@ def close(self) -> None:
362337 def __del__ (self ):
363338 """Cleanup on destruction."""
364339 self .close ()
340+
341+ @staticmethod
342+ def remove_think_tags (text : str ) -> str :
343+ """
344+ Remove <think>...</think> tags at the start of text and strip whitespace.
345+ """
346+ cleaned_content = re .sub (r"^(\s*<think>.*?</think>\n?)" , "" , text , flags = re .DOTALL )
347+ return cleaned_content .strip ()
348+
349+ @staticmethod
350+ def sanitize_assistant_messages (messages : list [Message ]) -> list [Message ]:
351+ """
352+ Create a new list of messages with assistant content cleaned of <think> tags.
353+ This is useful for store the chat history.
354+ """
355+ cleaned_messages = []
356+ for msg in messages :
357+ if msg .role == "assistant" and isinstance (msg .content , str ):
358+ normalized_content = LMDBConversationStore .remove_think_tags (msg .content )
359+ # Only create a new object if content actually changed
360+ if normalized_content != msg .content :
361+ cleaned_msg = Message (role = msg .role , content = normalized_content , name = msg .name )
362+ cleaned_messages .append (cleaned_msg )
363+ else :
364+ cleaned_messages .append (msg )
365+ else :
366+ cleaned_messages .append (msg )
367+
368+ return cleaned_messages
0 commit comments