diff --git a/preprocessors/text-followup/text-followup.py b/preprocessors/text-followup/text-followup.py index bba395ab..13a0a8a4 100644 --- a/preprocessors/text-followup/text-followup.py +++ b/preprocessors/text-followup/text-followup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 IMAGE Project, Shared Reality Lab, McGill University +# Copyright (c) 2025 IMAGE Project, Shared Reality Lab, McGill University # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as @@ -452,13 +452,16 @@ def followup(): {"error": "Failed to process focus area on image"} ), 500 + # get followup prompt from env as an override if it exists + followup_prompt = os.getenv('FOLLOWUP_PROMPT_OVERRIDE', FOLLOWUP_PROMPT) + if not focus: - system_prompt = FOLLOWUP_PROMPT + system_prompt = followup_prompt else: - system_prompt = FOLLOWUP_PROMPT + FOLLOWUP_PROMPT_FOCUS + system_prompt = followup_prompt + FOLLOWUP_PROMPT_FOCUS system_message = { - "role": "developer", + "role": "system", "content": system_prompt } @@ -508,7 +511,9 @@ def followup(): followup_response_json = llm_client.chat_completion( prompt="", # Empty since we're using full messages via kwargs - json_schema=FOLLOWUP_RESPONSE_SCHEMA, + system_prompt=system_prompt, + json_schema=None, # qwen3 wants json_object not rigid schema + response_format={"type": "json_object"}, temperature=0.0, messages=messages, # Pass full conversation history via kwargs parse_json=True, @@ -518,7 +523,7 @@ def followup(): if followup_response_json is None: logging.error("Failed to receive response from LLM.") return jsonify( - {"error": "Failed to get graphic caption from LLM"} + {"error": "Failed to receive response from LLM"} ), 500 response_text, token_usage = followup_response_json diff --git a/utils/llm/client.py b/utils/llm/client.py index 5e38650a..a0019237 100644 --- a/utils/llm/client.py +++ b/utils/llm/client.py @@ -121,7 +121,7 @@ def chat_completion( # Add system prompt if provided if system_prompt: messages.append( - {"role": "developer", + {"role": "system", "content": system_prompt} ) @@ -140,6 +140,8 @@ def chat_completion( messages.append({"role": "user", "content": user_content}) + logging.pii(messages) + # Build API call parameters params = { "model": self.model, diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index 586cade9..8e1dbd89 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -101,7 +101,8 @@ "response_brief": "One sentence response to the user request.", "response_full": "Further details. Maximum three sentences." } - +""" +OLD_END_OF_FOLLOWUP_PROMPT = """ The user may add a note to focus on a specific part of the image and an updated picture with the area of interest marked with a red rectangle. In this case, answer the question ONLY about the contents