From 8e0cff96637d91ebb21202a85cd32a0fcd06c840 Mon Sep 17 00:00:00 2001 From: Mike Gvozdev Date: Sun, 16 Nov 2025 14:31:37 -0500 Subject: [PATCH] change coordinate calculation --- .../object-detection-llm/object-detection-llm.py | 8 ++++---- utils/llm/client.py | 2 +- utils/llm/prompts.py | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/preprocessors/object-detection-llm/object-detection-llm.py b/preprocessors/object-detection-llm/object-detection-llm.py index 76e0e366..b2930c8c 100644 --- a/preprocessors/object-detection-llm/object-detection-llm.py +++ b/preprocessors/object-detection-llm/object-detection-llm.py @@ -61,10 +61,10 @@ def normalize_bbox(bbox, width, height): """ x1, y1, x2, y2 = bbox return [ - max(0.0, min(x1 / width, 1.0)), - max(0.0, min(y1 / height, 1.0)), - max(0.0, min(x2 / width, 1.0)), - max(0.0, min(y2 / height, 1.0)) + max(0.0, min(x1 / 1000, 1.0)), + max(0.0, min(y1 / 1000, 1.0)), + max(0.0, min(x2 / 1000, 1.0)), + max(0.0, min(y2 / 1000, 1.0)) ] diff --git a/utils/llm/client.py b/utils/llm/client.py index 9be52e38..5e38650a 100644 --- a/utils/llm/client.py +++ b/utils/llm/client.py @@ -68,7 +68,7 @@ def chat_completion( image_base64: Optional[str] = None, json_schema: Optional[Dict[str, Any]] = None, schema_name: str = "response-format", - temperature: float = 0.5, + temperature: float = 0.0, max_tokens: Optional[int] = None, response_format: Optional[Dict[str, str]] = None, system_prompt: Optional[str] = None, diff --git a/utils/llm/prompts.py b/utils/llm/prompts.py index c6b95124..ce686ce7 100644 --- a/utils/llm/prompts.py +++ b/utils/llm/prompts.py @@ -40,11 +40,11 @@ Ensure that the bounding boxes are in the format [x1, y1, x2, y2]. Rules: -1. The graphic can contain any number of objects, from zero to many. -2. If no objects are detected, return an empty list: {"objects": []}. -3. Use simple and common object labels (e.g., "car", "person", "tree"). -4. Include only objects that are clearly visible and identifiable. -5. Focus on the major and important objects in the image. +1. Focus ONLY on the major and important objects in the image. +2. The graphic can contain any number of objects, from zero to many. +3. If no objects are detected, return an empty list: {"objects": []}. +4. Use simple and common object labels (e.g., "car", "person", "tree"). +5. Include ONLY objects that are clearly visible and identifiable. 6. Multiple objects can have the same confidence score. """ ###