diff --git a/addon/globalPlugins/visionAssistant/__init__.py b/addon/globalPlugins/visionAssistant/__init__.py
index 089a7b4..702d21b 100644
--- a/addon/globalPlugins/visionAssistant/__init__.py
+++ b/addon/globalPlugins/visionAssistant/__init__.py
@@ -49,654 +49,36 @@
log = logging.getLogger(__name__)
addonHandler.initTranslation()
-_vision_assistant_instance = None
-
-ADDON_NAME = addonHandler.getCodeAddon().manifest["summary"]
-GITHUB_REPO = "mahmoodhozhabri/VisionAssistantPro"
-
-# --- Constants & Config ---
-
-CHROME_OCR_KEYS = [
- "AIzaSyA2KlwBX3mkFo30om9LUFYQhpqLoa_BNhE",
- "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
-]
-
-MODELS = [
- # --- 1. Recommended (Auto-Updating) ---
- # Translators: AI Model info. [Auto] = Automatic updates. (Latest) = Newest version.
- (_("[Auto]") + " Gemini Flash " + _("(Latest)"), "gemini-flash-latest"),
- (_("[Auto]") + " Gemini Flash Lite " + _("(Latest)"), "gemini-flash-lite-latest"),
-
- # --- 2. Current Standard (Free & Fast) ---
- # Translators: AI Model info. [Free] = Generous usage limits. (Preview) = Experimental or early-access version.
- (_("[Free]") + " Gemini 3.0 Flash " + _("(Preview)"), "gemini-3-flash-preview"),
- (_("[Free]") + " Gemini 2.5 Flash", "gemini-2.5-flash"),
- (_("[Free]") + " Gemini 2.5 Flash Lite", "gemini-2.5-flash-lite"),
-
- # --- 3. High Intelligence (Paid/Pro/Preview) ---
- # Translators: AI Model info. [Pro] = High intelligence/Paid tier. (Preview) = Experimental version.
- (_("[Pro]") + " Gemini 3.0 Pro " + _("(Preview)"), "gemini-3-pro-preview"),
- (_("[Pro]") + " Gemini 2.5 Pro", "gemini-2.5-pro"),
-]
-
-GEMINI_VOICES = [
- # Translators: Adjective describing a bright AI voice style.
- ("Zephyr", _("Bright")),
- # Translators: Adjective describing an upbeat AI voice style.
- ("Puck", _("Upbeat")),
- # Translators: Adjective describing an informative AI voice style.
- ("Charon", _("Informative")),
- # Translators: Adjective describing a firm AI voice style.
- ("Kore", _("Firm")),
- # Translators: Adjective describing an excitable AI voice style.
- ("Fenrir", _("Excitable")),
- # Translators: Adjective describing a youthful AI voice style.
- ("Leda", _("Youthful")),
- # Translators: Adjective describing a firm AI voice style.
- ("Orus", _("Firm")),
- # Translators: Adjective describing a breezy AI voice style.
- ("Aoede", _("Breezy")),
- # Translators: Adjective describing an easy-going AI voice style.
- ("Callirrhoe", _("Easy-going")),
- # Translators: Adjective describing a bright AI voice style.
- ("Autonoe", _("Bright")),
- # Translators: Adjective describing a breathy AI voice style.
- ("Enceladus", _("Breathy")),
- # Translators: Adjective describing a clear AI voice style.
- ("Iapetus", _("Clear")),
- # Translators: Adjective describing an easy-going AI voice style.
- ("Umbriel", _("Easy-going")),
- # Translators: Adjective describing a smooth AI voice style.
- ("Algieba", _("Smooth")),
- # Translators: Adjective describing a smooth AI voice style.
- ("Despina", _("Smooth")),
- # Translators: Adjective describing a clear AI voice style.
- ("Erinome", _("Clear")),
- # Translators: Adjective describing a gravelly AI voice style.
- ("Algenib", _("Gravelly")),
- # Translators: Adjective describing an informative AI voice style.
- ("Rasalgethi", _("Informative")),
- # Translators: Adjective describing an upbeat AI voice style.
- ("Laomedeia", _("Upbeat")),
- # Translators: Adjective describing a soft AI voice style.
- ("Achernar", _("Soft")),
- # Translators: Adjective describing a firm AI voice style.
- ("Alnilam", _("Firm")),
- # Translators: Adjective describing an even AI voice style.
- ("Schedar", _("Even")),
- # Translators: Adjective describing a mature AI voice style.
- ("Gacrux", _("Mature")),
- # Translators: Adjective describing a forward AI voice style.
- ("Pulcherrima", _("Forward")),
- # Translators: Adjective describing a friendly AI voice style.
- ("Achird", _("Friendly")),
- # Translators: Adjective describing a casual AI voice style.
- ("Zubenelgenubi", _("Casual")),
- # Translators: Adjective describing a gentle AI voice style.
- ("Vindemiatrix", _("Gentle")),
- # Translators: Adjective describing a lively AI voice style.
- ("Sadachbia", _("Lively")),
- # Translators: Adjective describing a knowledgeable AI voice style.
- ("Sadaltager", _("Knowledgeable")),
- # Translators: Adjective describing a warm AI voice style.
- ("Sulafat", _("Warm"))
-]
-
-BASE_LANGUAGES = [
- ("Arabic", "ar"), ("Bulgarian", "bg"), ("Chinese", "zh"), ("Czech", "cs"), ("Danish", "da"),
- ("Dutch", "nl"), ("English", "en"), ("Finnish", "fi"), ("French", "fr"),
- ("German", "de"), ("Greek", "el"), ("Hebrew", "he"), ("Hindi", "hi"),
- ("Hungarian", "hu"), ("Indonesian", "id"), ("Italian", "it"), ("Japanese", "ja"),
- ("Korean", "ko"), ("Nepali", "ne"), ("Norwegian", "no"), ("Persian", "fa"), ("Polish", "pl"),
- ("Portuguese", "pt"), ("Romanian", "ro"), ("Russian", "ru"), ("Spanish", "es"),
- ("Swedish", "sv"), ("Thai", "th"), ("Turkish", "tr"), ("Ukrainian", "uk"),
- ("Vietnamese", "vi")
-]
-SOURCE_LIST = [("Auto-detect", "auto")] + BASE_LANGUAGES
-SOURCE_NAMES = [x[0] for x in SOURCE_LIST]
-TARGET_LIST = BASE_LANGUAGES
-TARGET_NAMES = [x[0] for x in TARGET_LIST]
-TARGET_CODES = {x[0]: x[1] for x in BASE_LANGUAGES}
-
-OCR_ENGINES = [
- # Translators: OCR Engine option (Fast but less formatted)
- (_("Chrome (Fast)"), "chrome"),
- # Translators: OCR Engine option (Slower but better formatting)
- (_("Gemini (Formatted)"), "gemini")
-]
-
-confspec = {
- "proxy_url": "string(default='')",
- "api_key": "string(default='')",
- "model_name": "string(default='gemini-flash-lite-latest')",
- "target_language": "string(default='English')",
- "source_language": "string(default='Auto-detect')",
- "ai_response_language": "string(default='English')",
- "smart_swap": "boolean(default=True)",
- "captcha_mode": "string(default='navigator')",
- "custom_prompts": "string(default='')",
- "custom_prompts_v2": "string(default='')",
- "default_refine_prompts": "string(default='')",
- "check_update_startup": "boolean(default=False)",
- "clean_markdown_chat": "boolean(default=True)",
- "copy_to_clipboard": "boolean(default=False)",
- "skip_chat_dialog": "boolean(default=False)",
- "ocr_engine": "string(default='chrome')",
- "tts_voice": "string(default='Puck')"
-}
-
-config.conf.spec["VisionAssistant"] = confspec
-
-PROMPT_TRANSLATE = """
-Task: Translate the text below to "{target_lang}".
-
-Configuration:
-- Target Language: "{target_lang}"
-- Swap Language: "{swap_target}"
-- Smart Swap: {smart_swap}
-
-Rules:
-1. DEFAULT: Translate the input strictly to "{target_lang}".
-2. MIXED CONTENT: If the text contains mixed languages (e.g., Arabic content with English UI terms like 'Reply', 'From', 'Forwarded'), translate EVERYTHING to "{target_lang}".
-3. EXCEPTION: If (and ONLY if) the input is already completely in "{target_lang}" AND "Smart Swap" is True, then translate to "{swap_target}".
-
-Constraints:
-- Output ONLY the translation.
-- Do NOT translate actual programming code (Python, C++, etc.) or URLs.
-- Translate ALL UI elements, menus, and interface labels.
-
-Input Text:
-{text_content}
-"""
-
-PROMPT_UI_LOCATOR = "Analyze UI (Size: {width}x{height}). Request: '{query}'. Output JSON: {{\"x\": int, \"y\": int, \"found\": bool}}."
-
-REFINE_PROMPT_KEYS = ("summarize", "fix_grammar", "fix_translate", "explain")
-
-LEGACY_REFINER_TOKENS = {
- "summarize": "[summarize]",
- "fix_grammar": "[fix_grammar]",
- "fix_translate": "[fix_translate]",
- "explain": "[explain]",
-}
-
-DEFAULT_SYSTEM_PROMPTS = (
- {
- "key": "summarize",
- # Translators: Section header for text refinement prompts in Prompt Manager.
- "section": _("Refine"),
- # Translators: Label for the text summarization prompt.
- "label": _("Summarize"),
- "prompt": "Summarize the text below in {response_lang}.",
- },
- {
- "key": "fix_grammar",
- # Translators: Section header for text refinement prompts in Prompt Manager.
- "section": _("Refine"),
- # Translators: Label for the grammar correction prompt.
- "label": _("Fix Grammar"),
- "prompt": "Fix grammar in the text below. Output ONLY the fixed text.",
- },
- {
- "key": "fix_translate",
- # Translators: Section header for text refinement prompts in Prompt Manager.
- "section": _("Refine"),
- # Translators: Label for the grammar correction and translation prompt.
- "label": _("Fix Grammar & Translate"),
- "prompt": "Fix grammar and translate to {target_lang}.{swap_instruction} Output ONLY the result.",
- },
- {
- "key": "explain",
- # Translators: Section header for text refinement prompts in Prompt Manager.
- "section": _("Refine"),
- # Translators: Label for the text explanation prompt.
- "label": _("Explain"),
- "prompt": "Explain the text below in {response_lang}.",
- },
- {
- "key": "translate_main",
- # Translators: Section header for translation-related prompts in Prompt Manager.
- "section": _("Translation"),
- # Translators: Label for the smart translation prompt.
- "label": _("Smart Translation"),
- "prompt": PROMPT_TRANSLATE.strip(),
- },
- {
- "key": "translate_quick",
- # Translators: Section header for translation-related prompts in Prompt Manager.
- "section": _("Translation"),
- # Translators: Label for the quick translation prompt.
- "label": _("Quick Translation"),
- "prompt": "Translate to {target_lang}. Output ONLY translation.",
- },
- {
- "key": "document_chat_system",
- # Translators: Section header for document-related prompts in Prompt Manager.
- "section": _("Document"),
- # Translators: Label for the initial context prompt in document chat.
- "label": _("Document Chat Context"),
- "prompt": "STRICTLY Respond in {response_lang}. Use Markdown formatting. Analyze the attached content to answer.",
- },
- {
- "key": "document_chat_ack",
- # Translators: Section header for advanced/internal prompts in Prompt Manager.
- "section": _("Advanced"),
- # Translators: Label for the AI's acknowledgement reply in document chat.
- "label": _("Document Chat Bootstrap Reply"),
- "internal": True,
- "prompt": "Context received. Ready for questions.",
- },
- {
- "key": "vision_navigator_object",
- # Translators: Section header for image analysis prompts in Prompt Manager.
- "section": _("Vision"),
- # Translators: Label for the prompt used to analyze the current navigator object.
- "label": _("Navigator Object Analysis"),
- "prompt": (
- "Analyze this image. Describe the layout, visible text, and UI elements. "
- "Use Markdown formatting (headings, lists) to organize the description. "
- "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
- "IMPORTANT: Start directly with the description content. Do not add introductory "
- "sentences like 'Here is the analysis' or 'The image shows'."
- ),
- },
- {
- "key": "vision_fullscreen",
- # Translators: Section header for image analysis prompts in Prompt Manager.
- "section": _("Vision"),
- # Translators: Label for the prompt used to analyze the entire screen.
- "label": _("Full Screen Analysis"),
- "prompt": (
- "Analyze this image. Describe the layout, visible text, and UI elements. "
- "Use Markdown formatting (headings, lists) to organize the description. "
- "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
- "IMPORTANT: Start directly with the description content. Do not add introductory "
- "sentences like 'Here is the analysis' or 'The image shows'."
- ),
- },
- {
- "key": "vision_followup_context",
- # Translators: Section header for advanced/internal prompts in Prompt Manager.
- "section": _("Advanced"),
- # Translators: Label for the follow-up context in image analysis chat.
- "label": _("Vision Follow-up Context"),
- "internal": True,
- "prompt": "Image Context. Target Language: {response_lang}",
- },
- {
- "key": "vision_followup_suffix",
- # Translators: Section header for advanced/internal prompts in Prompt Manager.
- "section": _("Advanced"),
- # Translators: Label for the rule enforced during image analysis follow-up questions.
- "label": _("Vision Follow-up Answer Rule"),
- "internal": True,
- "prompt": "Answer strictly in {response_lang}",
- },
- {
- "key": "video_analysis",
- # Translators: Section header for video analysis prompts in Prompt Manager.
- "section": _("Video"),
- # Translators: Label for the video content analysis prompt.
- "label": _("Video Analysis"),
- "prompt": (
- "Analyze this video. Provide a detailed description of the visual content and a "
- "summary of the audio. IMPORTANT: Write the entire response STRICTLY in "
- "{response_lang} language."
- ),
- },
- {
- "key": "audio_transcription",
- # Translators: Section header for audio-related prompts in Prompt Manager.
- "section": _("Audio"),
- # Translators: Label for the audio file transcription prompt.
- "label": _("Audio Transcription"),
- "prompt": "Transcribe this audio in {response_lang}.",
- },
- {
- "key": "dictation_transcribe",
- # Translators: Section header for audio-related prompts in Prompt Manager.
- "section": _("Audio"),
- # Translators: Label for the smart voice dictation prompt.
- "label": _("Smart Dictation"),
- "prompt": (
- "Transcribe speech. Use native script. Fix stutters. If there is no speech, silence, "
- "or background noise only, write exactly: [[[NOSPEECH]]]"
- ),
- },
- {
- "key": "ocr_image_extract",
- # Translators: Section header for OCR-related prompts in Prompt Manager.
- "section": _("OCR"),
- # Translators: Label for the OCR prompt used for image text extraction.
- "label": _("OCR Image Extraction"),
- "prompt": (
- "Extract all visible text from this image. Strictly preserve original formatting "
- "(headings, lists, tables) using Markdown. Do not output any system messages or "
- "code block backticks (```). Output ONLY the raw content."
- ),
- },
- {
- "key": "ocr_document_extract",
- # Translators: Section header for OCR-related prompts in Prompt Manager.
- "section": _("OCR"),
- # Translators: Label for the OCR prompt used for document text extraction.
- "label": _("OCR Document Extraction"),
- "prompt": (
- "Extract all visible text from this document. Strictly preserve original formatting "
- "(headings, lists, tables) using Markdown. You MUST insert the exact delimiter "
- "'[[[PAGE_SEP]]]' immediately after the content of every single page. Do not output "
- "any system messages or code block backticks (```). Output ONLY the raw content."
- ),
- },
- {
- "key": "ocr_document_translate",
- # Translators: Section header for document-related prompts in Prompt Manager.
- "section": _("Document"),
- # Translators: Label for the combined OCR and translation prompt for documents.
- "label": _("Document OCR + Translate"),
- "prompt": (
- "Extract all text from this document. Preserve formatting (Markdown). Then translate "
- "the content to {target_lang}. Output ONLY the translated content. Do not add "
- "explanations."
- ),
- },
- {
- "key": "captcha_solver_base",
- # Translators: Section header for CAPTCHA-related prompts in Prompt Manager.
- "section": _("CAPTCHA"),
- # Translators: Label for the CAPTCHA solving prompt.
- "label": _("CAPTCHA Solver"),
- "internal": True,
- "prompt": (
- "Blind user. Return CAPTCHA code only. If NO CAPTCHA is detected in the image, "
- "strictly return: [[[NO_CAPTCHA]]].{captcha_extra}"
- ),
- },
- {
- "key": "refine_files_only",
- # Translators: Section header for advanced/internal prompts in Prompt Manager.
- "section": _("Advanced"),
- # Translators: Label for the fallback prompt when only files are provided in Refine.
- "label": _("Refine Files-Only Fallback"),
- "internal": True,
- "prompt": "Analyze these files.",
- },
+from .constants import (
+ ADDON_NAME,
+ CHROME_OCR_KEYS,
+ GEMINI_VOICES,
+ GITHUB_REPO,
+ MODELS,
+ OCR_ENGINES,
+ PROMPT_VARIABLES_GUIDE,
+ REFINE_PROMPT_KEYS,
+ SOURCE_NAMES,
+ TARGET_CODES,
+ TARGET_NAMES,
)
-
-PROMPT_VARIABLES_GUIDE = (
- # Translators: Description and input type for the [selection] variable in the Variables Guide.
- ("[selection]", _("Currently selected text"), _("Text")),
- # Translators: Description for the [clipboard] variable in the Variables Guide.
- ("[clipboard]", _("Clipboard content"), _("Text")),
- # Translators: Description and input type for the [screen_obj] variable in the Variables Guide.
- ("[screen_obj]", _("Screenshot of the navigator object"), _("Image")),
- # Translators: Description for the [screen_full] variable in the Variables Guide.
- ("[screen_full]", _("Screenshot of the entire screen"), _("Image")),
- # Translators: Description and input type for the [file_ocr] variable in the Variables Guide.
- ("[file_ocr]", _("Select image/PDF/TIFF for text extraction"), _("Image, PDF, TIFF")),
- # Translators: Description and input type for the [file_read] variable in the Variables Guide.
- ("[file_read]", _("Select document for reading"), _("TXT, Code, PDF")),
- # Translators: Description and input type for the [file_audio] variable in the Variables Guide.
- ("[file_audio]", _("Select audio file for analysis"), _("MP3, WAV, OGG")),
+from .markdown_utils import clean_markdown, markdown_to_html
+from .prompt_helpers import (
+ apply_prompt_template,
+ get_builtin_default_prompts,
+ get_builtin_default_prompt_map,
+ get_configured_default_prompt_map,
+ get_configured_default_prompts,
+ get_prompt_text,
+ get_refine_menu_options,
+ load_configured_custom_prompts,
+ migrate_prompt_config_if_needed,
+ serialize_default_prompt_overrides,
+ serialize_custom_prompts_v2,
)
# --- Helpers ---
-def get_builtin_default_prompts():
- builtins = []
- for item in DEFAULT_SYSTEM_PROMPTS:
- p = str(item["prompt"]).strip()
- builtins.append({
- "key": item["key"],
- "section": item["section"],
- "label": item["label"],
- "display_label": f"{item['section']} - {item['label']}",
- "internal": bool(item.get("internal")),
- "prompt": p,
- "default": p,
- })
- return builtins
-
-def get_builtin_default_prompt_map():
- return {item["key"]: item for item in get_builtin_default_prompts()}
-
-def _normalize_custom_prompt_items(items):
- normalized = []
- if not isinstance(items, list):
- return normalized
-
- for item in items:
- if not isinstance(item, dict):
- continue
- name = item.get("name")
- content = item.get("content")
- if not isinstance(name, str) or not isinstance(content, str):
- continue
- name = name.strip()
- content = content.strip()
- if name and content:
- normalized.append({"name": name, "content": content})
- return normalized
-
-def parse_custom_prompts_legacy(raw_value):
- items = []
- if not raw_value:
- return items
-
- normalized = raw_value.replace("\r\n", "\n").replace("\r", "\n")
- for line in normalized.split("\n"):
- for segment in line.split("|"):
- segment = segment.strip()
- if not segment or ":" not in segment:
- continue
- name, content = segment.split(":", 1)
- name = name.strip()
- content = content.strip()
- if name and content:
- items.append({"name": name, "content": content})
- return items
-
-def parse_custom_prompts_v2(raw_value):
- if not isinstance(raw_value, str) or not raw_value.strip():
- return None
- try:
- data = json.loads(raw_value)
- except Exception as e:
- log.warning(f"Invalid custom_prompts_v2 config, falling back to legacy format: {e}")
- return None
- return _normalize_custom_prompt_items(data)
-
-def serialize_custom_prompts_v2(items):
- normalized = _normalize_custom_prompt_items(items)
- if not normalized:
- return ""
- return json.dumps(normalized, ensure_ascii=False)
-
-def load_configured_custom_prompts():
- try:
- raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
- except Exception:
- raw_v2 = ""
- items_v2 = parse_custom_prompts_v2(raw_v2)
- if items_v2 is not None:
- return items_v2
- return parse_custom_prompts_legacy(config.conf["VisionAssistant"]["custom_prompts"])
-
-def _sanitize_default_prompt_overrides(data):
- if not isinstance(data, dict):
- return {}, False
-
- changed = False
- mutable = dict(data)
- # Migrate old key used in previous versions.
- legacy_vision = mutable.pop("vision_image_analysis", None)
- if legacy_vision is not None:
- changed = True
- if isinstance(legacy_vision, str) and legacy_vision.strip():
- legacy_text = legacy_vision.strip()
- nav_value = mutable.get("vision_navigator_object")
- if not isinstance(nav_value, str) or not nav_value.strip():
- mutable["vision_navigator_object"] = legacy_text
- changed = True
- full_value = mutable.get("vision_fullscreen")
- if not isinstance(full_value, str) or not full_value.strip():
- mutable["vision_fullscreen"] = legacy_text
- changed = True
-
- valid_keys = set(get_builtin_default_prompt_map().keys())
- sanitized = {}
- for key, value in mutable.items():
- if key not in valid_keys or not isinstance(value, str):
- changed = True
- continue
- prompt_text = value.strip()
- if not prompt_text:
- changed = True
- continue
- if key in LEGACY_REFINER_TOKENS and prompt_text == LEGACY_REFINER_TOKENS[key]:
- # Drop old token-only overrides and fallback to current built-ins.
- changed = True
- continue
- if prompt_text != value:
- changed = True
- sanitized[key] = prompt_text
- return sanitized, changed
-
-def migrate_prompt_config_if_needed():
- changed = False
-
- try:
- raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
- except Exception:
- raw_v2 = ""
- raw_legacy = config.conf["VisionAssistant"]["custom_prompts"]
-
- v2_items = parse_custom_prompts_v2(raw_v2)
- if v2_items is None:
- target_items = parse_custom_prompts_legacy(raw_legacy)
- else:
- target_items = v2_items
-
- serialized_v2 = serialize_custom_prompts_v2(target_items)
- if serialized_v2 != (raw_v2 or ""):
- config.conf["VisionAssistant"]["custom_prompts_v2"] = serialized_v2
- changed = True
-
- # Legacy mirror is disabled. Clear old storage to prevent stale fallback data.
- if raw_legacy:
- config.conf["VisionAssistant"]["custom_prompts"] = ""
- changed = True
-
- try:
- raw_defaults = config.conf["VisionAssistant"]["default_refine_prompts"]
- except Exception:
- raw_defaults = ""
- if isinstance(raw_defaults, str) and raw_defaults.strip():
- try:
- defaults_data = json.loads(raw_defaults)
- except Exception:
- defaults_data = None
- if isinstance(defaults_data, dict):
- sanitized, migrated = _sanitize_default_prompt_overrides(defaults_data)
- if migrated:
- config.conf["VisionAssistant"]["default_refine_prompts"] = (
- json.dumps(sanitized, ensure_ascii=False) if sanitized else ""
- )
- changed = True
-
- return changed
-
-def load_default_prompt_overrides():
- try:
- raw = config.conf["VisionAssistant"]["default_refine_prompts"]
- except Exception:
- raw = ""
- if not isinstance(raw, str) or not raw.strip():
- return {}
-
- try:
- data = json.loads(raw)
- except Exception as e:
- log.warning(f"Invalid default_refine_prompts config, using built-ins: {e}")
- return {}
-
- overrides, _ = _sanitize_default_prompt_overrides(data)
- return overrides
-
-def get_configured_default_prompt_map():
- prompt_map = get_builtin_default_prompt_map()
- overrides = load_default_prompt_overrides()
- for key, override in overrides.items():
- if key not in prompt_map:
- continue
- if key in LEGACY_REFINER_TOKENS and override == LEGACY_REFINER_TOKENS[key]:
- continue
- prompt_map[key]["prompt"] = override
- return prompt_map
-
-def get_configured_default_prompts():
- prompt_map = get_configured_default_prompt_map()
- items = []
- for item in DEFAULT_SYSTEM_PROMPTS:
- if item.get("internal"):
- continue
- key = item["key"]
- if key in prompt_map:
- items.append(dict(prompt_map[key]))
- items.sort(key=lambda item: item.get("display_label", "").casefold())
- return items
-
-def get_prompt_text(prompt_key):
- prompt_map = get_configured_default_prompt_map()
- item = prompt_map.get(prompt_key)
- if item:
- return item["prompt"]
- return ""
-
-def serialize_default_prompt_overrides(items):
- if not items:
- return ""
-
- base_map = {item["key"]: item["prompt"] for item in get_builtin_default_prompts()}
- overrides = {}
- for item in items:
- key = item.get("key")
- prompt_text = item.get("prompt", "")
- if key not in base_map:
- continue
- if not isinstance(prompt_text, str):
- continue
- prompt_text = prompt_text.strip()
- if prompt_text and prompt_text != base_map[key]:
- overrides[key] = prompt_text
-
- if not overrides:
- return ""
- return json.dumps(overrides, ensure_ascii=False)
-
-def get_refine_menu_options():
- options = []
- prompt_map = get_configured_default_prompt_map()
- for key in REFINE_PROMPT_KEYS:
- item = prompt_map.get(key)
- if item:
- options.append((item["label"], item["prompt"]))
-
- for item in load_configured_custom_prompts():
- # Translators: Prefix for custom prompts in the Refine menu
- options.append((_("Custom: ") + item["name"], item["content"]))
- return options
-
-def apply_prompt_template(template, replacements):
- if not isinstance(template, str):
- return ""
-
- text = template
- for key, value in replacements:
- text = text.replace("{" + key + "}", str(value))
-
- return text.strip()
-
def finally_(func, final):
@wraps(func)
def new(*args, **kwargs):
@@ -706,1661 +88,34 @@ def new(*args, **kwargs):
final()
return new
-def clean_markdown(text):
- if not text: return ""
- text = re.sub(r'\*\*|__|[*_]', '', text)
- text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE)
- text = re.sub(r'```', '', text)
- text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
- text = re.sub(r'^\s*-\s+', '', text, flags=re.MULTILINE)
- return text.strip()
-
-def markdown_to_html(text, full_page=False):
- if not text: return ""
-
- html = text.replace("&", "&").replace("<", "<").replace(">", ">")
- html = re.sub(r'\*\*(.*?)\*\*', r'\1', html)
- html = re.sub(r'__(.*?)__', r'\1', html)
- html = re.sub(r'^### (.*)', r'
\1
', html, flags=re.M)
- html = re.sub(r'^## (.*)', r'\1
', html, flags=re.M)
- html = re.sub(r'^# (.*)', r'\1
', html, flags=re.M)
-
- lines = html.split('\n')
- in_table = False
- new_lines = []
- table_style = 'border="1" style="border-collapse: collapse; width: 100%; margin-bottom: 10px;"'
- td_style = 'style="padding: 5px; border: 1px solid #ccc;"'
-
- for line in lines:
- stripped = line.strip()
- if stripped.startswith('|') or (stripped.count('|') > 1 and len(stripped) > 5):
- if not in_table:
- new_lines.append(f'')
- in_table = True
- if '---' in stripped: continue
- row_content = stripped.strip('|').split('|')
- cells = "".join([f'| {c.strip()} | ' for c in row_content])
- new_lines.append(f'{cells}
')
- else:
- if in_table:
- new_lines.append('
')
- in_table = False
- if stripped: new_lines.append(line + "
")
- else: new_lines.append("
")
- if in_table: new_lines.append('')
- html_body = "".join(new_lines)
-
- if not full_page: return html_body
- return f"""{html_body}"""
-
-def get_mime_type(path):
- ext = os.path.splitext(path)[1].lower()
- if ext == '.pdf': return 'application/pdf'
- if ext in ['.jpg', '.jpeg']: return 'image/jpeg'
- if ext == '.png': return 'image/png'
- if ext == '.webp': return 'image/webp'
- if ext in ['.tif', '.tiff']: return 'image/jpeg'
- if ext == '.mp3': return 'audio/mpeg'
- if ext == '.wav': return 'audio/wav'
- if ext == '.ogg': return 'audio/ogg'
- if ext == '.mp4': return 'video/mp4'
- return 'application/octet-stream'
-
-def show_error_dialog(message):
- # Translators: Title of the error dialog box
- title = _("{name} Error").format(name=ADDON_NAME)
- wx.CallAfter(gui.messageBox, message, title, wx.OK | wx.ICON_ERROR)
-
-def send_ctrl_v():
- try:
- user32 = ctypes.windll.user32
- VK_CONTROL = 0x11; VK_V = 0x56; KEYEVENTF_KEYUP = 0x0002
- user32.keybd_event(VK_CONTROL, 0, 0, 0)
- user32.keybd_event(VK_V, 0, 0, 0)
- user32.keybd_event(VK_V, 0, KEYEVENTF_KEYUP, 0)
- user32.keybd_event(VK_CONTROL, 0, KEYEVENTF_KEYUP, 0)
- except: pass
-
-def get_proxy_opener():
- proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
- if proxy_url:
- if "127.0.0.1" in proxy_url or "localhost" in proxy_url or ":" in proxy_url.split("/")[-1]:
- handler = request.ProxyHandler({'http': proxy_url, 'https': proxy_url})
- return request.build_opener(handler)
- return request.build_opener()
-
-def get_twitter_download_link(tweet_url):
- cj = cookiejar.CookieJar()
- opener = request.build_opener(request.HTTPCookieProcessor(cj))
- base_url = "https://savetwitter.net/en4"
- api_url = "https://savetwitter.net/api/ajaxSearch"
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Referer': base_url}
- try:
- req_init = request.Request(base_url, headers=headers)
- opener.open(req_init)
- params = {'q': tweet_url, 'lang': 'en', 'cftoken': ''}
- data = urlencode(params).encode('utf-8')
- req_post = request.Request(api_url, data=data, headers=headers, method='POST')
- with opener.open(req_post) as response:
- res_data = json.loads(response.read().decode('utf-8'))
- if res_data.get('status') == 'ok':
- html = res_data.get('data', '')
- match = re.search(r'href="(https?://dl\.snapcdn\.app/[^"]+)"', html)
- if match: return match.group(1)
- except: pass
- return None
-
-def get_instagram_download_link(insta_url):
- cj = cookiejar.CookieJar()
- opener = request.build_opener(request.HTTPCookieProcessor(cj))
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36',
- 'X-Requested-With': 'XMLHttpRequest',
- 'Referer': 'https://anon-viewer.com/',
- 'Accept': '*/*'
- }
- opener.addheaders = list(headers.items())
- try:
- opener.open("https://anon-viewer.com/", timeout=30)
-
- if "/stories/" in insta_url:
- parts = insta_url.split("/")
- username = parts[parts.index("stories") + 1]
- api_url = f"https://anon-viewer.com/content.php?url={username}&method=allstories"
- else:
- encoded_url = quote(insta_url, safe='')
- api_url = f"https://anon-viewer.com/content.php?url={encoded_url}"
-
- response = opener.open(api_url, timeout=60)
- if response.getcode() == 200:
- res_content = response.read().decode('utf-8')
- data = json.loads(res_content)
- html_text = data.get('html', '')
-
- match = re.search(r'href="([^"]+anon-viewer\.com/media\.php\?media=[^"]+)"', html_text)
- if match:
- return match.group(1).replace('&', '&')
-
- source_match = re.search(r' 0:
- result_parts = [x[0] for x in data[0] if x[0]]
- return "".join(result_parts)
- except Exception as e:
- log.error(f"Google Translate Failed: {e}", exc_info=True)
- return text
- return text
-
-class GeminiHandler:
- _working_key_idx = 0
- _file_uri_keys = {}
- _max_retries = 5
-
- @staticmethod
- def _get_api_keys():
- raw = config.conf["VisionAssistant"]["api_key"]
- clean_raw = raw.replace('\r\n', ',').replace('\n', ',')
- return [k.strip() for k in clean_raw.split(',') if k.strip()]
-
- @staticmethod
- def _get_opener():
- return get_proxy_opener()
-
- @staticmethod
- def _handle_error(e):
- if hasattr(e, 'code'):
- # Translators: Error message for Bad Request (400)
- if e.code == 400: return _("Error 400: Bad Request (Check API Key)")
- # Translators: Error message for Forbidden (403)
- if e.code == 403: return _("Error 403: Forbidden (Check Region)")
- if e.code == 429: return "QUOTA_EXCEEDED"
- if e.code >= 500: return "SERVER_ERROR"
- return str(e)
-
- @staticmethod
- def _call_with_retry(func_logic, key, *args):
- last_exc = None
- for attempt in range(GeminiHandler._max_retries):
- try:
- return func_logic(key, *args)
- except error.HTTPError as e:
- err_msg = GeminiHandler._handle_error(e)
- if err_msg not in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
- raise
- last_exc = e
- except error.URLError as e:
- last_exc = e
- if attempt < GeminiHandler._max_retries - 1:
- time.sleep(0.5 * (attempt + 1))
- raise last_exc
-
- @staticmethod
- def _register_file_uri(uri, key):
- if uri and key:
- GeminiHandler._file_uri_keys[uri] = key
- while len(GeminiHandler._file_uri_keys) > 200:
- GeminiHandler._file_uri_keys.pop(next(iter(GeminiHandler._file_uri_keys)))
-
- @staticmethod
- def _get_registered_key(uri):
- if not uri:
- return None
- return GeminiHandler._file_uri_keys.get(uri)
-
- @staticmethod
- def _call_with_key(func_logic, key, *args):
- try:
- return GeminiHandler._call_with_retry(func_logic, key, *args)
- except error.HTTPError as e:
- err_msg = GeminiHandler._handle_error(e)
- if err_msg == "QUOTA_EXCEEDED":
- # Translators: Message of a dialog which may pop up while performing an AI call
- err_msg = _("Error 429: Quota Exceeded (Try later)")
- elif err_msg == "SERVER_ERROR":
- # Translators: Message of a dialog which may pop up while performing an AI call
- err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
- return "ERROR:" + err_msg
- except Exception as e:
- return "ERROR:" + str(e)
-
- @staticmethod
- def _call_with_rotation(func_logic, *args):
- keys = GeminiHandler._get_api_keys()
- if not keys:
- # Translators: Error when no API keys are found in settings
- return "ERROR:" + _("No API Keys configured.")
-
- num_keys = len(keys)
- for i in range(num_keys):
- idx = (GeminiHandler._working_key_idx + i) % num_keys
- key = keys[idx]
- try:
- res = GeminiHandler._call_with_retry(func_logic, key, *args)
- GeminiHandler._working_key_idx = idx
- return res
- except error.HTTPError as e:
- err_msg = GeminiHandler._handle_error(e)
- if err_msg in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
- if i < num_keys - 1: continue
- # Translators: Error when all available API keys fail
- return "ERROR:" + _("All API Keys failed (Quota/Server).")
- return "ERROR:" + err_msg
- except Exception as e:
- return "ERROR:" + str(e)
- return "ERROR:" + _("Unknown error occurred.")
-
- @staticmethod
- def translate(text, target_lang):
- def _logic(key, txt, lang):
- model = config.conf["VisionAssistant"]["model_name"]
- url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
- quick_template = get_prompt_text("translate_quick") or "Translate to {target_lang}. Output ONLY translation."
- quick_prompt = apply_prompt_template(quick_template, [("target_lang", lang)])
- payload = {"contents": [{"parts": [{"text": quick_prompt}, {"text": txt}]}]}
- req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
- with GeminiHandler._get_opener().open(req, timeout=90) as r:
- return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
- return GeminiHandler._call_with_rotation(_logic, text, target_lang)
-
- @staticmethod
- def ocr_page(image_bytes):
- def _logic(key, img_data):
- model = config.conf["VisionAssistant"]["model_name"]
- url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
- ocr_image_prompt = get_prompt_text("ocr_image_extract")
- payload = {"contents": [{"parts": [{"inline_data": {"mime_type": "image/jpeg", "data": base64.b64encode(img_data).decode('utf-8')}}, {"text": ocr_image_prompt}]}]}
- req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
- with GeminiHandler._get_opener().open(req, timeout=120) as r:
- return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
- return GeminiHandler._call_with_rotation(_logic, image_bytes)
-
- @staticmethod
- def upload_and_process_batch(file_path, mime_type, page_count):
- keys = GeminiHandler._get_api_keys()
- if not keys:
- # Translators: Error message for missing API Keys
- return [ "ERROR:" + _("No API Keys.") ]
- model = config.conf["VisionAssistant"]["model_name"]
-
- opener = GeminiHandler._get_opener()
- proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
- base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
-
- for i, key in enumerate(keys):
- try:
- f_size = os.path.getsize(file_path)
- init_url = f"{base_url}/upload/v1beta/files"
- headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
-
- req = request.Request(init_url, data=json.dumps({"file": {"display_name": "batch"}}).encode(), headers=headers, method="POST")
- with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
-
- with open(file_path, 'rb') as f: f_data = f.read()
- req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
- with opener.open(req_up, timeout=180) as r:
- res = json.loads(r.read().decode())
- uri, name = res['file']['uri'], res['file']['name']
-
- active = False
- for attempt in range(30):
- req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
- with opener.open(req_check, timeout=30) as r:
- state = json.loads(r.read().decode()).get('state')
- if state == "ACTIVE":
- active = True
- break
- if state == "FAILED":
- break
- time.sleep(2)
-
- if not active:
- if i < len(keys) - 1:
- continue
- return [ "ERROR:" + _("Upload failed.") ]
-
- GeminiHandler._register_file_uri(uri, key)
-
- url = f"{base_url}/v1beta/models/{model}:generateContent"
- prompt = get_prompt_text("ocr_document_extract")
- contents = [{"parts": [{"file_data": {"mime_type": mime_type, "file_uri": uri}}, {"text": prompt}]}]
-
- req_gen = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
- with opener.open(req_gen, timeout=180) as r:
- res = json.loads(r.read().decode())
- text = res['candidates'][0]['content']['parts'][0]['text']
- return text.split('[[[PAGE_SEP]]]')
-
- except error.HTTPError as e:
- err_code = GeminiHandler._handle_error(e)
- if err_code in ["QUOTA_EXCEEDED", "SERVER_ERROR"] and i < len(keys) - 1:
- continue
- if err_code == "QUOTA_EXCEEDED":
- # Translators: Message of a dialog which may pop up while performing an AI call
- err_msg = _("Error 429: Quota Exceeded (Try later)")
- elif err_code == "SERVER_ERROR":
- # Translators: Message of a dialog which may pop up while performing an AI call
- err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
- else:
- err_msg = err_code
- return ["ERROR:" + err_msg]
- except Exception as e:
- return ["ERROR:" + str(e)]
- return ["ERROR:" + _("All keys failed.")]
-
- @staticmethod
- def chat(history, new_msg, file_uri, mime_type):
- def _logic(key, hist, msg, uri, mime):
- model = config.conf["VisionAssistant"]["model_name"]
- proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
- base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
- url = f"{base_url}/v1beta/models/{model}:generateContent"
-
- contents = list(hist)
- if uri:
- user_parts = [{"file_data": {"mime_type": mime, "file_uri": uri}}]
- else:
- user_parts = []
- user_parts.append({"text": msg})
- contents.append({"role": "user", "parts": user_parts})
-
- req = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
- with GeminiHandler._get_opener().open(req, timeout=120) as r:
- return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
- forced_key = GeminiHandler._get_registered_key(file_uri) if file_uri else None
- if forced_key:
- return GeminiHandler._call_with_key(_logic, forced_key, history, new_msg, file_uri, mime_type)
- return GeminiHandler._call_with_rotation(_logic, history, new_msg, file_uri, mime_type)
-
- @staticmethod
- def upload_for_chat(file_path, mime_type):
- keys = GeminiHandler._get_api_keys()
- if not keys: return None
- opener = GeminiHandler._get_opener()
- proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
- base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
-
- for key in keys:
- try:
- f_size = os.path.getsize(file_path)
- init_url = f"{base_url}/upload/v1beta/files"
- headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
- req = request.Request(init_url, data=json.dumps({"file": {"display_name": os.path.basename(file_path)}}).encode(), headers=headers, method="POST")
- with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
- with open(file_path, 'rb') as f: f_data = f.read()
- req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
- with opener.open(req_up, timeout=180) as r:
- res = json.loads(r.read().decode())
- uri, name = res['file']['uri'], res['file']['name']
- for attempt in range(30):
- req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
- with opener.open(req_check, timeout=30) as r:
- state = json.loads(r.read().decode()).get('state')
- if state == "ACTIVE":
- GeminiHandler._register_file_uri(uri, key)
- return uri
- time.sleep(2)
- return None
- except: continue
- return None
-
- @staticmethod
- def generate_speech(text, voice_name):
- def _logic(key, txt, voice):
- main_model = config.conf["VisionAssistant"]["model_name"]
- if "pro" in main_model.lower():
- tts_model = "gemini-2.5-pro-preview-tts"
- else:
- tts_model = "gemini-2.5-flash-preview-tts"
-
- proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
- base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
- url = f"{base_url}/v1beta/models/{tts_model}:generateContent"
-
- payload = {
- "contents": [{"parts": [{"text": txt}]}],
- "generationConfig": {
- "responseModalities": ["AUDIO"],
- "speechConfig": {"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": voice}}}
- }
- }
- req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
- with GeminiHandler._get_opener().open(req, timeout=600) as r:
- res = json.loads(r.read().decode())
- candidates = res.get('candidates', [])
- if not candidates: raise Exception("No candidates returned")
- content = candidates[0].get('content', {})
- parts = content.get('parts', [])
- if not parts: raise Exception("No parts in response")
- part = parts[0]
- if 'inlineData' in part: return part['inlineData']['data']
- if 'inline_data' in part: return part['inline_data']['data']
- if 'text' in part: raise Exception(f"Model refused audio: {part['text']}")
- raise Exception("Unknown response format")
- return GeminiHandler._call_with_rotation(_logic, text, voice_name)
+from .services import (
+ ChromeOCREngine,
+ GeminiHandler,
+ GoogleTranslator,
+ SmartProgrammersOCREngine,
+ VirtualDocument,
+ _download_temp_video,
+ get_file_path,
+ get_instagram_download_link,
+ get_mime_type,
+ get_proxy_opener,
+ get_tiktok_download_link,
+ get_twitter_download_link,
+ send_ctrl_v,
+ show_error_dialog,
+)
# --- Update Manager ---
-class UpdateDialog(wx.Dialog):
- def __init__(self, parent, version, name, changes):
- # Translators: Title of update confirmation dialog
- super().__init__(parent, title=_("Update Available"), size=(500, 450))
- self.Centre()
-
- panel = wx.Panel(self)
- vbox = wx.BoxSizer(wx.VERTICAL)
-
- # Translators: Message asking user to update. {version} is version number.
- msg = _("A new version ({version}) of {name} is available.").format(version=version, name=name)
- header = wx.StaticText(panel, label=msg)
- vbox.Add(header, 0, wx.ALL, 15)
-
- # Translators: Label for the changes text box
- change_lbl = wx.StaticText(panel, label=_("Changes:"))
- vbox.Add(change_lbl, 0, wx.LEFT | wx.RIGHT, 15)
-
- self.changes_ctrl = wx.TextCtrl(panel, value=changes, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
- vbox.Add(self.changes_ctrl, 1, wx.EXPAND | wx.ALL, 15)
-
- # Translators: Question to download and install
- question = wx.StaticText(panel, label=_("Download and Install?"))
- vbox.Add(question, 0, wx.LEFT | wx.RIGHT | wx.BOTTOM, 15)
-
- btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
- # Translators: Button to accept update
- self.yes_btn = wx.Button(panel, wx.ID_YES, label=_("&Yes"))
- # Translators: Button to reject update
- self.no_btn = wx.Button(panel, wx.ID_NO, label=_("&No"))
-
- btn_sizer.Add(self.yes_btn, 0, wx.RIGHT, 10)
- btn_sizer.Add(self.no_btn, 0)
- vbox.Add(btn_sizer, 0, wx.ALIGN_RIGHT | wx.ALL, 15)
-
- panel.SetSizer(vbox)
- self.yes_btn.SetDefault()
- self.yes_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_YES))
- self.no_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_NO))
-
-class UpdateManager:
- def __init__(self, repo_name):
- self.repo_name = repo_name
- self.current_version = addonHandler.getCodeAddon().manifest['version']
-
- def check_for_updates(self, silent=True):
- threading.Thread(target=self._check_thread, args=(silent,), daemon=True).start()
-
- def _check_thread(self, silent):
- try:
- url = f"https://api.github.com/repos/{self.repo_name}/releases/latest"
- req = request.Request(url, headers={"User-Agent": "NVDA-Addon"})
- with request.urlopen(req, timeout=60) as response:
- if response.status == 200:
- data = json.loads(response.read().decode('utf-8'))
- latest_tag = data.get("tag_name", "").lstrip("v")
- if self._compare_versions(latest_tag, self.current_version) > 0:
- download_url = None
- for asset in data.get("assets", []):
- if asset["name"].endswith(".nvda-addon"):
- download_url = asset["browser_download_url"]
- break
- if download_url:
- raw_changes = data.get("body", "")
-
- clean_changes = re.split(r'SHA256|Checklist|---', raw_changes, flags=re.I)[0].strip()
- clean_changes = clean_markdown(clean_changes)
-
- wx.CallAfter(self._prompt_update, latest_tag, download_url, clean_changes)
- elif not silent:
- # Translators: Error message when an update is found but the addon file is missing from GitHub.
- msg = _("Update found but no .nvda-addon file in release.")
- show_error_dialog(msg)
- elif not silent:
- # Translators: Status message informing the user they are already on the latest version.
- msg = _("You have the latest version.")
- wx.CallAfter(ui.message, msg)
- except Exception as e:
- if not silent:
- msg = _("Update check failed: {error}").format(error=e)
- show_error_dialog(msg)
-
- def _compare_versions(self, v1, v2):
- try:
- parts1 = [int(x) for x in v1.split('.')]
- parts2 = [int(x) for x in v2.split('.')]
- return (parts1 > parts2) - (parts1 < parts2)
- except: return 0 if v1 == v2 else 1
-
- def _prompt_update(self, version, url, changes):
- dlg = UpdateDialog(gui.mainFrame, version, ADDON_NAME, changes)
- if dlg.ShowModal() == wx.ID_YES:
- threading.Thread(target=self._download_install_worker, args=(url,), daemon=True).start()
- dlg.Destroy()
-
- def _download_install_worker(self, url):
- try:
- # Translators: Message shown while downloading update
- msg = _("Downloading update...")
- wx.CallAfter(ui.message, msg)
- temp_dir = tempfile.gettempdir()
- file_path = os.path.join(temp_dir, "VisionAssistant_Update.nvda-addon")
- with request.urlopen(url) as response, open(file_path, 'wb') as out_file:
- out_file.write(response.read())
- wx.CallAfter(os.startfile, file_path)
- except Exception as e:
- # Translators: Error message for download failure
- msg = _("Download failed: {error}").format(error=e)
- show_error_dialog(msg)
-
-# --- UI Classes ---
-
-
-class VisionQADialog(wx.Dialog):
- def __init__(self, parent, title, initial_text, context_data, callback_fn, extra_info=None, raw_content=None, status_callback=None, announce_on_open=True, allow_questions=True):
- super(VisionQADialog, self).__init__(parent, title=title, size=(550, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
- self.context_data = context_data
- self.callback_fn = callback_fn
- self.extra_info = extra_info
- self.chat_history = []
- self.raw_content = raw_content
- self.status_callback = status_callback
- self.announce_on_open = announce_on_open
- self.allow_questions = allow_questions
-
- mainSizer = wx.BoxSizer(wx.VERTICAL)
- # Translators: Label for the AI response text area in a chat dialog
- lbl_text = _("AI Response:")
- lbl = wx.StaticText(self, label=lbl_text)
- mainSizer.Add(lbl, 0, wx.ALL, 5)
- self.outputArea = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY)
- mainSizer.Add(self.outputArea, 1, wx.EXPAND | wx.ALL, 5)
-
- self.should_clean = config.conf["VisionAssistant"]["clean_markdown_chat"]
- display_text = clean_markdown(initial_text) if self.should_clean else initial_text
- if display_text:
- # Translators: Format for displaying AI message in a chat dialog
- init_msg = _("AI: {text}\n").format(text=display_text)
- self.outputArea.AppendText(init_msg)
- if config.conf["VisionAssistant"]["copy_to_clipboard"]:
- api.copyToClip(raw_content if raw_content else display_text)
-
- if not (extra_info and extra_info.get('skip_init_history')):
- self.chat_history.append({"role": "model", "parts": [{"text": initial_text}]})
-
- self.inputArea = None
- if allow_questions:
- # Translators: Label for user input field in a chat dialog
- ask_text = _("Ask:")
- inputLbl = wx.StaticText(self, label=ask_text)
- mainSizer.Add(inputLbl, 0, wx.ALL, 5)
- self.inputArea = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
- mainSizer.Add(self.inputArea, 0, wx.EXPAND | wx.ALL, 5)
-
- btnSizer = wx.BoxSizer(wx.HORIZONTAL)
- self.askBtn = None
- if allow_questions:
- # Translators: Button to send message in a chat dialog
- self.askBtn = wx.Button(self, label=_("Send"))
- # Translators: Button to view the content in a formatted HTML window
- self.viewBtn = wx.Button(self, label=_("View Formatted"))
- self.viewBtn.Bind(wx.EVT_BUTTON, self.onView)
- # Translators: Button to save only the result content without chat history
- self.saveContentBtn = wx.Button(self, label=_("Save Content"))
- self.saveContentBtn.Bind(wx.EVT_BUTTON, self.onSaveContent)
- # Translators: Button to save chat in a chat dialog
- self.saveBtn = wx.Button(self, label=_("Save Chat"))
- # Translators: Button to close chat dialog
- self.closeBtn = wx.Button(self, wx.ID_CANCEL, label=_("Close"))
-
- self.saveBtn.Enable(bool(initial_text.strip()))
- self.viewBtn.Enable(bool(self.raw_content))
- self.saveContentBtn.Enable(bool(self.raw_content))
-
- if self.askBtn:
- btnSizer.Add(self.askBtn, 0, wx.ALL, 5)
- btnSizer.Add(self.viewBtn, 0, wx.ALL, 5)
- btnSizer.Add(self.saveContentBtn, 0, wx.ALL, 5)
- btnSizer.Add(self.saveBtn, 0, wx.ALL, 5)
- btnSizer.Add(self.closeBtn, 0, wx.ALL, 5)
- mainSizer.Add(btnSizer, 0, wx.ALIGN_RIGHT)
-
- self.SetSizer(mainSizer)
- if self.inputArea:
- self.inputArea.SetFocus()
- else:
- self.outputArea.SetFocus()
- if self.askBtn:
- self.askBtn.Bind(wx.EVT_BUTTON, self.onAsk)
- self.saveBtn.Bind(wx.EVT_BUTTON, self.onSave)
- if self.inputArea:
- self.inputArea.Bind(wx.EVT_TEXT_ENTER, self.onAsk)
- if display_text and self.announce_on_open:
- wx.CallLater(300, ui.message, display_text)
-
- def onAsk(self, event):
- if not self.inputArea:
- return
- question = self.inputArea.Value
- if not question.strip(): return
- # Translators: Format for displaying User message in a chat dialog
- user_msg = _("\nYou: {text}\n").format(text=question)
- self.outputArea.AppendText(user_msg)
- self.inputArea.Clear()
- # Translators: Message shown while processing in a chat dialog
- msg = _("Thinking...")
- ui.message(msg)
- threading.Thread(target=self.process_question, args=(question,), daemon=True).start()
-
- def process_question(self, question):
- result_tuple = self.callback_fn(self.context_data, question, self.chat_history, self.extra_info)
- response_text, _ = result_tuple
- if response_text:
- if not (self.extra_info and self.extra_info.get('file_context')):
- self.chat_history.append({"role": "user", "parts": [{"text": question}]})
- self.chat_history.append({"role": "model", "parts": [{"text": response_text}]})
- final_text = clean_markdown(response_text) if self.should_clean else response_text
- wx.CallAfter(self.update_response, final_text, response_text)
-
- def update_response(self, display_text, raw_text=None):
- if raw_text:
- self.raw_content = raw_text
- self.viewBtn.Enable(True)
- self.saveContentBtn.Enable(True)
- # Translators: Format for displaying AI message in a chat dialog
- ai_msg = _("AI: {text}\n").format(text=display_text)
- self.outputArea.AppendText(ai_msg)
- self.saveBtn.Enable(True)
- if config.conf["VisionAssistant"]["copy_to_clipboard"]:
- api.copyToClip(raw_text if raw_text else display_text)
- self.outputArea.ShowPosition(self.outputArea.GetLastPosition())
- ui.message(display_text)
-
- def report_save(self, msg):
- if self.status_callback: self.status_callback(msg)
- else: ui.message(msg)
-
- def onView(self, event):
- full_html = ""
- # Translators: Format for displaying User message in a chat dialog
- user_label = _("\nYou: {text}\n").format(text="").strip()
- # Translators: Format for displaying AI message in a chat dialog
- ai_label = _("AI: {text}\n").format(text="").strip()
-
- if self.chat_history:
- for item in self.chat_history:
- role = item.get("role", "")
- text = item.get("parts", [{}])[0].get("text", "")
- if role == "user":
- safe_text = text.replace("&", "&").replace("<", "<").replace(">", ">")
- full_html += f"{user_label}
{safe_text}
"
- elif role == "model":
- formatted_text = markdown_to_html(text, full_page=False)
- full_html += f"{ai_label}
{formatted_text}
"
-
- if not full_html and self.raw_content:
- formatted_text = markdown_to_html(self.raw_content, full_page=False)
- full_html += f"{ai_label}
{formatted_text}"
-
- if not full_html: return
- try:
- # Translators: Title of the formatted result window
- ui.browseableMessage(full_html, _("Formatted Conversation"), isHtml=True)
- except Exception as e:
- # Translators: Error message if viewing fails
- msg = _("Error displaying content: {error}").format(error=e)
- show_error_dialog(msg)
-
- def onSave(self, event):
- # Translators: Save dialog title
- path = get_file_path(_("Save Chat Log"), "Text files (*.txt)|*.txt", mode="save")
- if path:
- try:
- with open(path, "w", encoding="utf-8") as f: f.write(self.outputArea.GetValue())
- # Translators: Message shown on successful save of a file.
- self.report_save(_("Saved."))
- except Exception as e:
- # Translators: Message in the error dialog when saving fails.
- msg = _("Save failed: {error}").format(error=e)
- show_error_dialog(msg)
-
- def onSaveContent(self, event):
- # Translators: Save dialog title
- path = get_file_path(_("Save Result"), "HTML files (*.html)|*.html", mode="save")
- if path:
- try:
- full_html = markdown_to_html(self.raw_content, full_page=True)
- with open(path, "w", encoding="utf-8") as f: f.write(full_html)
- # Translators: Message on successful save
- self.report_save(_("Saved."))
- except Exception as e:
- # Translators: Message in the error dialog when saving fails.
- msg = _("Save failed: {error}").format(error=e)
- show_error_dialog(msg)
-
-class SettingsPanel(gui.settingsDialogs.SettingsPanel):
- title = ADDON_NAME
- def makeSettings(self, settingsSizer):
- # --- Connection Group ---
- # Translators: Title of the settings group for connection and updates
- groupLabel = _("Connection")
- self.connectionBox = wx.StaticBox(self, label=groupLabel)
- connectionSizer = wx.StaticBoxSizer(self.connectionBox, wx.VERTICAL)
- cHelper = gui.guiHelper.BoxSizerHelper(self.connectionBox, sizer=connectionSizer)
-
- # Translators: Label for API Key input
- apiLabel = wx.StaticText(self.connectionBox, label=_("Gemini API Key (Separate multiple keys with comma or newline):"))
- cHelper.addItem(apiLabel)
-
- api_value = config.conf["VisionAssistant"]["api_key"]
-
- self.apiKeyCtrl_hidden = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_PASSWORD, size=(-1, -1))
-
- self.apiKeyCtrl_visible = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_MULTILINE | wx.TE_DONTWRAP, size=(-1, 60))
- self.apiKeyCtrl_visible.Hide()
-
- cHelper.addItem(self.apiKeyCtrl_hidden)
- cHelper.addItem(self.apiKeyCtrl_visible)
-
- # Translators: Checkbox to toggle API Key visibility
- self.showApiCheck = wx.CheckBox(self.connectionBox, label=_("Show API Key"))
- self.showApiCheck.Bind(wx.EVT_CHECKBOX, self.onToggleApiVisibility)
- cHelper.addItem(self.showApiCheck)
-
- model_display_names = [opt[0] for opt in MODELS]
- # Translators: Label for Model selection
- self.model = cHelper.addLabeledControl(_("AI Model:"), wx.Choice, choices=model_display_names)
- current_id = config.conf["VisionAssistant"]["model_name"]
- try:
- index = next(i for i, v in enumerate(MODELS) if v[1] == current_id)
- self.model.SetSelection(index)
- except StopIteration: self.model.SetSelection(0)
-
- # Translators: Label for Proxy URL input
- self.proxyUrl = cHelper.addLabeledControl(_("Proxy URL:"), wx.TextCtrl)
- self.proxyUrl.Value = config.conf["VisionAssistant"]["proxy_url"]
-
- # Translators: Checkbox to enable/disable automatic update checks on NVDA startup
- self.checkUpdateStartup = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Check for updates on startup")))
- self.checkUpdateStartup.Value = config.conf["VisionAssistant"]["check_update_startup"]
- # Translators: Checkbox to toggle markdown cleaning in chat windows
- self.cleanMarkdown = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Clean Markdown in Chat")))
- self.cleanMarkdown.Value = config.conf["VisionAssistant"]["clean_markdown_chat"]
- # Translators: Checkbox to enable copying AI responses to clipboard
- self.copyToClipboard = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Copy AI responses to clipboard")))
- self.copyToClipboard.Value = config.conf["VisionAssistant"]["copy_to_clipboard"]
- # Translators: Checkbox to skip chat window and only speak AI responses
- self.skipChatDialog = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Direct Output (No Chat Window)")))
- self.skipChatDialog.Value = config.conf["VisionAssistant"]["skip_chat_dialog"]
- settingsSizer.Add(connectionSizer, 0, wx.EXPAND | wx.ALL, 5)
-
- # --- Translation Languages Group ---
- # Translators: Title of the settings group for translation languages configuration
- groupLabel = _("Translation Languages")
- langBox = wx.StaticBox(self, label=groupLabel)
- langSizer = wx.StaticBoxSizer(langBox, wx.VERTICAL)
- lHelper = gui.guiHelper.BoxSizerHelper(langBox, sizer=langSizer)
-
- # Translators: Label for Source Language selection
- self.sourceLang = lHelper.addLabeledControl(_("Source:"), wx.Choice, choices=SOURCE_NAMES)
- try: self.sourceLang.SetSelection(SOURCE_NAMES.index(config.conf["VisionAssistant"]["source_language"]))
- except: self.sourceLang.SetSelection(0)
-
- # Translators: Label for Target Language selection
- self.targetLang = lHelper.addLabeledControl(_("Target:"), wx.Choice, choices=TARGET_NAMES)
- try: self.targetLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["target_language"]))
- except: self.targetLang.SetSelection(0)
-
- # Translators: Label for AI Response Language selection
- self.aiResponseLang = lHelper.addLabeledControl(_("AI Response:"), wx.Choice, choices=TARGET_NAMES)
- try: self.aiResponseLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["ai_response_language"]))
- except: self.aiResponseLang.SetSelection(0)
-
- # Translators: Checkbox for Smart Swap feature
- self.smartSwap = lHelper.addItem(wx.CheckBox(langBox, label=_("Smart Swap")))
- self.smartSwap.Value = config.conf["VisionAssistant"]["smart_swap"]
- settingsSizer.Add(langSizer, 0, wx.EXPAND | wx.ALL, 5)
-
- # --- Document Reader Settings ---
- # Translators: Title of settings group for Document Reader features
- groupLabel = _("Document Reader")
- docBox = wx.StaticBox(self, label=groupLabel)
- docSizer = wx.StaticBoxSizer(docBox, wx.VERTICAL)
- dHelper = gui.guiHelper.BoxSizerHelper(docBox, sizer=docSizer)
-
- # Translators: Label for OCR Engine selection
- self.ocr_sel = dHelper.addLabeledControl(_("OCR Engine:"), wx.Choice, choices=[x[0] for x in OCR_ENGINES])
- curr_ocr = config.conf["VisionAssistant"]["ocr_engine"]
- try:
- o_idx = next(i for i, v in enumerate(OCR_ENGINES) if v[1] == curr_ocr)
- self.ocr_sel.SetSelection(o_idx)
- except: self.ocr_sel.SetSelection(0)
-
- voice_choices = [f"{v[0]} - {v[1]}" for v in GEMINI_VOICES]
- # Translators: Label for TTS Voice selection
- self.voice_sel = dHelper.addLabeledControl(_("TTS Voice:"), wx.Choice, choices=voice_choices)
- curr_voice = config.conf["VisionAssistant"]["tts_voice"]
- try:
- v_idx = next(i for i, v in enumerate(GEMINI_VOICES) if v[0] == curr_voice)
- self.voice_sel.SetSelection(v_idx)
- except: self.voice_sel.SetSelection(1)
- settingsSizer.Add(docSizer, 0, wx.EXPAND | wx.ALL, 5)
-
- # --- CAPTCHA Group ---
- # Translators: Title of the settings group for CAPTCHA options
- groupLabel = _("CAPTCHA")
- capBox = wx.StaticBox(self, label=groupLabel)
- capSizer = wx.StaticBoxSizer(capBox, wx.VERTICAL)
- capHelper = gui.guiHelper.BoxSizerHelper(capBox, sizer=capSizer)
- # Translators: Label for CAPTCHA capture method selection
- self.captchaMode = capHelper.addLabeledControl(_("Capture Method:"), wx.Choice, choices=[
- # Translators: A choice for capture method. Captures only the specific object under the NVDA navigator cursor.
- _("Navigator Object"),
- # Translators: A choice for capture method. Captures the entire visible screen area.
- _("Full Screen")
- ])
- self.captchaMode.SetSelection(0 if config.conf["VisionAssistant"]["captcha_mode"] == 'navigator' else 1)
- settingsSizer.Add(capSizer, 0, wx.EXPAND | wx.ALL, 5)
-
- self.defaultPromptItems = get_configured_default_prompts()
- self.customPromptItems = load_configured_custom_prompts()
-
- # --- Prompt Manager Group ---
- # Translators: Title of the settings group for prompt management
- groupLabel = _("Prompts")
- promptsBox = wx.StaticBox(self, label=groupLabel)
- promptsSizer = wx.StaticBoxSizer(promptsBox, wx.VERTICAL)
- pHelper = gui.guiHelper.BoxSizerHelper(promptsBox, sizer=promptsSizer)
- # Translators: Description for the prompt manager button.
- pHelper.addItem(wx.StaticText(promptsBox, label=_("Manage default and custom prompts.")))
- # Translators: Button label to open prompt manager dialog.
- self.managePromptsBtn = wx.Button(promptsBox, label=_("Manage Prompts..."))
- self.managePromptsBtn.Bind(wx.EVT_BUTTON, self.onManagePrompts)
- pHelper.addItem(self.managePromptsBtn)
- self.promptsSummary = wx.StaticText(promptsBox)
- pHelper.addItem(self.promptsSummary)
- self._refreshPromptSummary()
- settingsSizer.Add(promptsSizer, 0, wx.EXPAND | wx.ALL, 5)
-
- def _refreshPromptSummary(self):
- # Translators: Summary text for prompt counts in settings.
- summary = _("Default prompts: {defaultCount}, Custom prompts: {customCount}").format(
- defaultCount=len(self.defaultPromptItems),
- customCount=len(self.customPromptItems),
- )
- self.promptsSummary.SetLabel(summary)
-
- def onManagePrompts(self, event):
- top = wx.GetTopLevelParent(self)
- dlg = PromptManagerDialog(
- self,
- self.defaultPromptItems,
- self.customPromptItems,
- PROMPT_VARIABLES_GUIDE,
- )
- try:
- if dlg.ShowModal() == wx.ID_OK:
- self.defaultPromptItems = dlg.get_default_items()
- self.customPromptItems = dlg.get_custom_items()
- self._refreshPromptSummary()
- finally:
- dlg.Destroy()
- if top:
- top.Enable(True)
- top.SetFocus()
-
- def onToggleApiVisibility(self, event):
- if self.showApiCheck.IsChecked():
- self.apiKeyCtrl_visible.SetValue(self.apiKeyCtrl_hidden.GetValue())
- self.apiKeyCtrl_hidden.Hide()
- self.apiKeyCtrl_visible.Show()
- else:
- self.apiKeyCtrl_hidden.SetValue(self.apiKeyCtrl_visible.GetValue())
- self.apiKeyCtrl_visible.Hide()
- self.apiKeyCtrl_hidden.Show()
-
- self.connectionBox.GetParent().Layout()
-
- def onSave(self):
- val = self.apiKeyCtrl_visible.GetValue() if self.showApiCheck.IsChecked() else self.apiKeyCtrl_hidden.GetValue()
- config.conf["VisionAssistant"]["api_key"] = val.strip()
- config.conf["VisionAssistant"]["model_name"] = MODELS[self.model.GetSelection()][1]
- config.conf["VisionAssistant"]["proxy_url"] = self.proxyUrl.Value.strip()
- config.conf["VisionAssistant"]["source_language"] = SOURCE_NAMES[self.sourceLang.GetSelection()]
- config.conf["VisionAssistant"]["target_language"] = TARGET_NAMES[self.targetLang.GetSelection()]
- config.conf["VisionAssistant"]["ai_response_language"] = TARGET_NAMES[self.aiResponseLang.GetSelection()]
- config.conf["VisionAssistant"]["smart_swap"] = self.smartSwap.Value
- config.conf["VisionAssistant"]["check_update_startup"] = self.checkUpdateStartup.Value
- config.conf["VisionAssistant"]["clean_markdown_chat"] = self.cleanMarkdown.Value
- config.conf["VisionAssistant"]["copy_to_clipboard"] = self.copyToClipboard.Value
- config.conf["VisionAssistant"]["skip_chat_dialog"] = self.skipChatDialog.Value
- config.conf["VisionAssistant"]["captcha_mode"] = 'navigator' if self.captchaMode.GetSelection() == 0 else 'fullscreen'
- config.conf["VisionAssistant"]["custom_prompts_v2"] = serialize_custom_prompts_v2(self.customPromptItems)
- config.conf["VisionAssistant"]["custom_prompts"] = ""
- config.conf["VisionAssistant"]["default_refine_prompts"] = serialize_default_prompt_overrides(self.defaultPromptItems)
- config.conf["VisionAssistant"]["ocr_engine"] = OCR_ENGINES[self.ocr_sel.GetSelection()][1]
- config.conf["VisionAssistant"]["tts_voice"] = GEMINI_VOICES[self.voice_sel.GetSelection()][0]
-
-class RangeDialog(wx.Dialog):
- def __init__(self, parent, total_pages):
- # Translators: Title of the PDF options dialog
- super().__init__(parent, title=_("Options"), size=(350, 320))
- sizer = wx.BoxSizer(wx.VERTICAL)
- # Translators: Label showing total pages found
- sizer.Add(wx.StaticText(self, label=_("Total Pages (All Files): {count}").format(count=total_pages)), 0, wx.ALL, 10)
-
- # Translators: Box title for page range selection
- box_range = wx.StaticBoxSizer(wx.VERTICAL, self, _("Range"))
- g_sizer = wx.FlexGridSizer(2, 2, 10, 10)
- # Translators: Label for start page
- g_sizer.Add(wx.StaticText(self, label=_("From:")), 0, wx.ALIGN_CENTER_VERTICAL)
- self.spin_from = wx.SpinCtrl(self, min=1, max=total_pages, initial=1)
- g_sizer.Add(self.spin_from, 1, wx.EXPAND)
- # Translators: Label for end page
- g_sizer.Add(wx.StaticText(self, label=_("To:")), 0, wx.ALIGN_CENTER_VERTICAL)
- self.spin_to = wx.SpinCtrl(self, min=1, max=total_pages, initial=total_pages)
- g_sizer.Add(self.spin_to, 1, wx.EXPAND)
- box_range.Add(g_sizer, 1, wx.EXPAND | wx.ALL, 5)
- sizer.Add(box_range, 0, wx.EXPAND | wx.ALL, 10)
-
- # Translators: Box title for translation options
- box_trans = wx.StaticBoxSizer(wx.VERTICAL, self, _("Translation"))
- # Translators: Checkbox to enable translation
- self.chk_trans = wx.CheckBox(self, label=_("Translate Output"))
- box_trans.Add(self.chk_trans, 0, wx.ALL, 5)
- h_sizer = wx.BoxSizer(wx.HORIZONTAL)
- # Translators: Label for target language
- h_sizer.Add(wx.StaticText(self, label=_("Target:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
- self.cmb_lang = wx.Choice(self, choices=TARGET_NAMES)
- self.cmb_lang.SetSelection(0)
- h_sizer.Add(self.cmb_lang, 1)
- box_trans.Add(h_sizer, 1, wx.EXPAND | wx.ALL, 5)
- sizer.Add(box_trans, 0, wx.EXPAND | wx.ALL, 10)
-
- btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
- # Translators: Button to start processing
- btn_ok = wx.Button(self, wx.ID_OK, label=_("Start"))
- btn_ok.SetDefault()
- # Translators: Button to cancel
- btn_cancel = wx.Button(self, wx.ID_CANCEL, label=_("Cancel"))
- btn_sizer.Add(btn_ok, 0, wx.RIGHT, 10)
- btn_sizer.Add(btn_cancel, 0)
- sizer.Add(btn_sizer, 0, wx.ALIGN_CENTER | wx.ALL, 10)
- self.SetSizer(sizer)
-
- self.chk_trans.Bind(wx.EVT_CHECKBOX, self.on_check)
- self.cmb_lang.Disable()
-
- def on_check(self, event):
- self.cmb_lang.Enable(self.chk_trans.IsChecked())
-
- def get_settings(self):
- return {
- 'start': self.spin_from.GetValue() - 1,
- 'end': self.spin_to.GetValue() - 1,
- 'translate': self.chk_trans.IsChecked(),
- 'lang': TARGET_NAMES[self.cmb_lang.GetSelection()]
- }
-
-class ChatDialog(wx.Dialog):
- instance = None
-
- def __init__(self, parent, file_path):
- # Translators: Title of the chat dialog
- super().__init__(parent, title=_("Ask about Document"), size=(600, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
- ChatDialog.instance = self
- self.file_path = file_path
- self.file_uri = None
- self.mime_type = get_mime_type(file_path)
- self.history = []
-
- sizer = wx.BoxSizer(wx.VERTICAL)
- # Translators: Label showing the analyzed file name
- lbl_info = wx.StaticText(self, label=_("File: {name}").format(name=os.path.basename(file_path)))
- sizer.Add(lbl_info, 0, wx.ALL, 5)
- self.display = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
- sizer.Add(self.display, 1, wx.EXPAND | wx.ALL, 10)
- # Translators: Status message while uploading
- self.display.SetValue(_("Uploading to Gemini...\n"))
-
- input_sizer = wx.BoxSizer(wx.HORIZONTAL)
- # Translators: Label for the chat input field
- input_sizer.Add(wx.StaticText(self, label=_("Your Question:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
- self.input = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
- self.input.Bind(wx.EVT_TEXT_ENTER, self.on_send)
- input_sizer.Add(self.input, 1, wx.EXPAND | wx.RIGHT, 5)
-
- # Translators: Button to send message
- self.btn_send = wx.Button(self, label=_("Send"))
- self.btn_send.Bind(wx.EVT_BUTTON, self.on_send)
- self.btn_send.Disable()
- input_sizer.Add(self.btn_send, 0)
- sizer.Add(input_sizer, 0, wx.EXPAND | wx.ALL, 10)
- self.SetSizer(sizer)
- self.Bind(wx.EVT_CLOSE, self.on_close)
- threading.Thread(target=self.init_upload, daemon=True).start()
-
- def on_close(self, event):
- ChatDialog.instance = None
- self.Destroy()
-
- def init_upload(self):
- uri = GeminiHandler.upload_for_chat(self.file_path, self.mime_type)
- if uri and not str(uri).startswith("ERROR:"):
- self.file_uri = uri
- wx.CallAfter(self.on_ready)
- else:
- err_msg = str(uri)[6:] if uri else _("Upload failed.")
- wx.CallAfter(show_error_dialog, err_msg)
- wx.CallAfter(self.Close)
-
- def on_ready(self):
- # Translators: Message when ready to chat
- self.display.AppendText(_("Ready! Ask your questions.\n"))
- self.btn_send.Enable()
- self.input.SetFocus()
-
- def on_send(self, event):
- msg = self.input.GetValue().strip()
- if not msg: return
- self.input.Clear()
- self.display.AppendText(f"You: {msg}\n")
- # Translators: Message showing AI is thinking
- ui.message(_("Thinking..."))
- threading.Thread(target=self.do_chat, args=(msg,), daemon=True).start()
-
- def do_chat(self, msg):
- resp = GeminiHandler.chat(self.history, msg, self.file_uri, self.mime_type)
-
- if str(resp).startswith("ERROR:"):
- show_error_dialog(resp[6:])
- if _vision_assistant_instance:
- # Translators: Initial status when the add-on is doing nothing
- _vision_assistant_instance.current_status = _("Idle")
- return
-
- self.history.append({"role": "user", "parts": [{"text": msg}]})
- self.history.append({"role": "model", "parts": [{"text": resp}]})
- wx.CallAfter(self.display.AppendText, f"AI: {resp}\n\n")
- # Translators: Spoken prefix for AI response
- wx.CallAfter(ui.message, _("AI: ") + resp)
-
-class DocumentViewerDialog(wx.Dialog):
- def __init__(self, parent, virtual_doc, settings):
- # Translators: Title of the Document Reader window.
- title_text = f"{ADDON_NAME} - {_('Document Reader')}"
- super().__init__(parent, title=title_text, size=(800, 600), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.MAXIMIZE_BOX)
- self.v_doc = virtual_doc
- self.start_page = settings['start']
- self.end_page = settings['end']
- self.do_translate = settings['translate']
- self.target_lang = settings['lang']
- self.range_count = self.end_page - self.start_page + 1
- self.page_cache = {}
- self.current_page = self.start_page
- self.thread_pool = ThreadPoolExecutor(max_workers=5)
-
- self.init_ui()
- self.Centre()
- threading.Thread(target=self.start_auto_processing, daemon=True).start()
-
- def init_ui(self):
- panel = wx.Panel(self)
- vbox = wx.BoxSizer(wx.VERTICAL)
- # Translators: Initial status message
- self.lbl_status = wx.StaticText(panel, label=_("Initializing..."))
- vbox.Add(self.lbl_status, 0, wx.ALL, 5)
- self.txt_content = wx.TextCtrl(panel, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
- vbox.Add(self.txt_content, 1, wx.EXPAND | wx.LEFT | wx.RIGHT, 10)
- hbox_nav = wx.BoxSizer(wx.HORIZONTAL)
- # Translators: Button to go to previous page
- self.btn_prev = wx.Button(panel, label=_("Previous (Ctrl+PageUp)"))
- self.btn_prev.Bind(wx.EVT_BUTTON, self.on_prev)
- hbox_nav.Add(self.btn_prev, 0, wx.RIGHT, 5)
- # Translators: Button to go to next page
- self.btn_next = wx.Button(panel, label=_("Next (Ctrl+PageDown)"))
- self.btn_next.Bind(wx.EVT_BUTTON, self.on_next)
- hbox_nav.Add(self.btn_next, 0, wx.RIGHT, 15)
- # Translators: Label for Go To Page
- hbox_nav.Add(wx.StaticText(panel, label=_("Go to:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
- choices = [str(i+1) for i in range(self.start_page, self.end_page + 1)]
- self.cmb_pages = wx.Choice(panel, choices=choices)
- self.cmb_pages.Bind(wx.EVT_CHOICE, self.on_page_select)
- hbox_nav.Add(self.cmb_pages, 0, wx.RIGHT, 15)
- vbox.Add(hbox_nav, 0, wx.ALIGN_CENTER | wx.ALL, 10)
- hbox_actions = wx.BoxSizer(wx.HORIZONTAL)
- # Translators: Button to Ask questions about the document
- self.btn_ask = wx.Button(panel, label=_("Ask AI (Alt+A)"))
- self.btn_ask.Bind(wx.EVT_BUTTON, self.on_ask)
- hbox_actions.Add(self.btn_ask, 0, wx.RIGHT, 5)
-
- # Translators: Button to force re-scan
- self.btn_gemini = wx.Button(panel, label=_("Re-scan with Gemini (Alt+R)"))
- self.btn_gemini.Bind(wx.EVT_BUTTON, self.on_gemini_scan)
- hbox_actions.Add(self.btn_gemini, 0, wx.RIGHT, 5)
-
- # Translators: Button to generate audio
- self.btn_tts = wx.Button(panel, label=_("Generate Audio (Alt+G)"))
- self.btn_tts.Bind(wx.EVT_BUTTON, self.on_tts)
- hbox_actions.Add(self.btn_tts, 0, wx.RIGHT, 5)
-
- # Translators: Button to view formatted content
- self.btn_view = wx.Button(panel, label=_("View Formatted"))
- self.btn_view.Bind(wx.EVT_BUTTON, self.on_view)
- hbox_actions.Add(self.btn_view, 0, wx.RIGHT, 5)
-
- # Translators: Button to save text
- self.btn_save = wx.Button(panel, label=_("Save (Alt+S)"))
- self.btn_save.Bind(wx.EVT_BUTTON, self.on_save_all)
- hbox_actions.Add(self.btn_save, 0)
-
- vbox.Add(hbox_actions, 0, wx.ALIGN_CENTER | wx.ALL, 5)
- btn_close = wx.Button(panel, wx.ID_CLOSE, label=_("Close"))
- btn_close.Bind(wx.EVT_BUTTON, lambda e: self.Destroy())
- vbox.Add(btn_close, 0, wx.ALIGN_RIGHT | wx.ALL, 10)
- panel.SetSizer(vbox)
- accel_tbl = wx.AcceleratorTable([
- (wx.ACCEL_CTRL, wx.WXK_PAGEDOWN, self.btn_next.GetId()),
- (wx.ACCEL_CTRL, wx.WXK_PAGEUP, self.btn_prev.GetId()),
- (wx.ACCEL_CTRL, ord('S'), self.btn_save.GetId()),
- (wx.ACCEL_ALT, ord('S'), self.btn_save.GetId()),
- (wx.ACCEL_ALT, ord('A'), self.btn_ask.GetId()),
- (wx.ACCEL_ALT, ord('R'), self.btn_gemini.GetId()),
- (wx.ACCEL_ALT, ord('G'), self.btn_tts.GetId())
- ])
- self.SetAcceleratorTable(accel_tbl)
- self.cmb_pages.SetSelection(0)
- self.update_view()
- self.txt_content.SetFocus()
-
- def start_auto_processing(self):
- engine = config.conf["VisionAssistant"]["ocr_engine"]
-
- if engine == 'gemini':
- threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
- else:
- for i in range(self.start_page, self.end_page + 1):
- self.thread_pool.submit(self.process_page_worker, i)
-
- def process_page_worker(self, page_num):
- if page_num in self.page_cache: return
- text = self._get_page_text_logic(page_num)
- self.page_cache[page_num] = text
- if page_num == self.current_page:
- wx.CallAfter(self.update_view)
- # Translators: Spoken message when the current page is ready
- wx.CallAfter(ui.message, _("Page {num} ready").format(num=page_num + 1))
-
- def _get_page_text_logic(self, page_num):
- file_path, page_idx = self.v_doc.get_page_info(page_num)
- if not file_path: return ""
- try:
- doc = fitz.open(file_path)
- page = doc.load_page(page_idx)
- pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
- img_bytes = pix.tobytes("jpg")
- doc.close()
- engine = config.conf["VisionAssistant"]["ocr_engine"]
- text = None
- if engine == 'gemini':
- try: text = GeminiHandler.ocr_page(img_bytes)
- except: text = None
- if not text or not text.strip() or engine == 'chrome':
- text = ChromeOCREngine.recognize(img_bytes)
- if not text or not text.strip():
- text = SmartProgrammersOCREngine.recognize(img_bytes)
- if not text or not text.strip():
- # Translators: Placeholder text when OCR fails
- text = _("[OCR failed. Try Gemini Re-scan.]")
- if self.do_translate and text and "[OCR failed" not in text:
- if engine == 'gemini':
- text = GeminiHandler.translate(text, self.target_lang)
- else:
- text = GoogleTranslator.translate(text, self.target_lang)
- return text
- except:
- # Translators: Error message for page processing failure
- return _("Error processing page.")
-
- def update_view(self):
- rel_page = self.current_page - self.start_page + 1
- # Translators: Status label format
- self.lbl_status.SetLabel(_("Page {current} of {total}").format(current=rel_page, total=self.range_count))
- if self.current_page in self.page_cache:
- self.txt_content.SetValue(self.page_cache[self.current_page])
- self.txt_content.SetInsertionPoint(0)
- self.txt_content.SetFocus()
- else:
- # Translators: Status when page is loading
- self.txt_content.SetValue(_("Processing in background..."))
- self.txt_content.SetInsertionPoint(0)
- self.txt_content.SetFocus()
- self.btn_prev.Enable(self.current_page > self.start_page)
- self.btn_next.Enable(self.current_page < self.end_page)
-
- def load_page(self, page_num):
- if page_num < self.start_page or page_num > self.end_page: return
- self.current_page = page_num
- self.cmb_pages.SetSelection(page_num - self.start_page)
- # Translators: Spoken message when switching pages
- ui.message(_("Page {num}").format(num=page_num + 1))
- self.update_view()
-
- def on_prev(self, event):
- if self.current_page > self.start_page: self.load_page(self.current_page - 1)
-
- def on_next(self, event):
- if self.current_page < self.end_page: self.load_page(self.current_page + 1)
-
- def on_page_select(self, event):
- self.load_page(self.start_page + self.cmb_pages.GetSelection())
-
- def on_view(self, event):
- full_html = []
- for i in range(self.start_page, self.end_page + 1):
- if i in self.page_cache:
- page_text = self.page_cache[i]
- page_content = markdown_to_html(page_text, full_page=False)
- # Translators: Heading for each page in the formatted content view.
- page_label = _("Page {num}").format(num=i+1)
- full_html.append(f"{page_label}
")
- full_html.append(page_content)
- full_html.append("
")
-
- if not full_html:
- text = self.txt_content.GetValue()
- if not text: return
- full_html.append(markdown_to_html(text, full_page=False))
-
- combined_html = "".join(full_html)
- try:
- # Translators: Title of the formatted result window
- ui.browseableMessage(combined_html, _("Formatted Content"), isHtml=True)
- except Exception as e:
- show_error_dialog(str(e))
-
- def on_gemini_scan(self, event):
- if not config.conf["VisionAssistant"]["api_key"]:
- wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
- return
- menu = wx.Menu()
- # Translators: Menu option for current page
- item_curr = menu.Append(wx.ID_ANY, _("Current Page"))
- # Translators: Menu option for all pages
- item_all = menu.Append(wx.ID_ANY, _("All Pages (In Range)"))
- self.Bind(wx.EVT_MENU, self.do_rescan_current, item_curr)
- self.Bind(wx.EVT_MENU, self.do_rescan_all, item_all)
- self.PopupMenu(menu)
- menu.Destroy()
-
- def do_rescan_current(self, event):
- if self.current_page in self.page_cache: del self.page_cache[self.current_page]
- self.update_view()
- # Translators: Message during manual scan
- ui.message(_("Scanning with Gemini..."))
- threading.Thread(target=self.gemini_scan_single_thread, args=(self.current_page,), daemon=True).start()
-
- def gemini_scan_single_thread(self, page_num):
- try:
- file_path, page_idx = self.v_doc.get_page_info(page_num)
- doc = fitz.open(file_path)
- page = doc.load_page(page_idx)
- pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
- text = GeminiHandler.ocr_page(pix.tobytes("jpg"))
- doc.close()
- if self.do_translate: text = GeminiHandler.translate(text, self.target_lang)
- self.page_cache[page_num] = text
- if self.current_page == page_num:
- wx.CallAfter(self.update_view)
- # Translators: Message when scan is complete
- wx.CallAfter(ui.message, _("Scan complete"))
- except: pass
-
- def do_rescan_all(self, event):
- threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
-
- def gemini_scan_batch_thread(self):
- # Translators: Message when batch scan starts
- msg = _("Batch Processing Started")
- if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
- wx.CallAfter(ui.message, msg)
-
- for i in range(self.start_page, self.end_page + 1):
- if i in self.page_cache: del self.page_cache[i]
- wx.CallAfter(self.update_view)
-
- upload_path = self.v_doc.create_merged_pdf(self.start_page, self.end_page)
- if not upload_path:
- # Translators: Error message if PDF creation fails
- wx.CallAfter(self.lbl_status.SetLabel, _("Error creating temporary PDF."))
- return
-
- try:
- count = (self.end_page - self.start_page) + 1
- results = GeminiHandler.upload_and_process_batch(upload_path, "application/pdf", count)
-
- if not results or (len(results) == 1 and str(results[0]).startswith("ERROR:")):
- err_msg = results[0][6:] if results else _("Unknown error")
- # Translators: Message reported when batch scan fails
- error_text = _("Scan failed: {err}").format(err=err_msg)
- for i in range(self.start_page, self.end_page + 1):
- self.page_cache[i] = error_text
-
- wx.CallAfter(self.update_view)
- wx.CallAfter(ui.message, error_text)
- return
-
- for i, text_part in enumerate(results):
- if i >= count: break
- idx = self.start_page + i
- clean = text_part.strip()
- if self.do_translate:
- clean = GeminiHandler.translate(clean, self.target_lang)
- self.page_cache[idx] = clean
-
- wx.CallAfter(self.update_view)
- # Translators: Message when batch scan is complete
- final_msg = _("Batch Scan Complete")
- if _vision_assistant_instance:
- # Translators: Initial status when the add-on is doing nothing
- _vision_assistant_instance.current_status = _("Idle")
- wx.CallAfter(ui.message, final_msg)
- finally:
- if upload_path and os.path.exists(upload_path):
- try: os.remove(upload_path)
- except: pass
-
- def on_tts(self, event):
- if not config.conf["VisionAssistant"]["api_key"]:
- wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
- return
- menu = wx.Menu()
- # Translators: Menu option for TTS current page
- item_curr = menu.Append(wx.ID_ANY, _("Generate for Current Page"))
- # Translators: Menu option for TTS all pages
- item_all = menu.Append(wx.ID_ANY, _("Generate for All Pages (In Range)"))
- self.Bind(wx.EVT_MENU, self.do_tts_current, item_curr)
- self.Bind(wx.EVT_MENU, self.do_tts_all, item_all)
- self.PopupMenu(menu)
- menu.Destroy()
-
- def do_tts_current(self, event):
- text = self.txt_content.GetValue().strip()
- if not text:
- # Translators: Error message when text field is empty
- wx.MessageBox(_("No text to read."), "Error")
- return
- self._save_tts(text)
-
- def do_tts_all(self, event):
- threading.Thread(target=self.tts_batch_thread, daemon=True).start()
-
- def tts_batch_thread(self):
- full_text = []
- # Translators: Message while gathering text
- wx.CallAfter(ui.message, _("Gathering text for audio..."))
- for i in range(self.start_page, self.end_page + 1):
- while i not in self.page_cache: time.sleep(0.1)
- full_text.append(self.page_cache[i])
- final_text = "\n".join(full_text).strip()
- if not final_text: return
- wx.CallAfter(self._save_tts, final_text)
-
- def _save_tts(self, text):
- # Translators: File dialog title for saving audio
- path = get_file_path(_("Save Audio"), "MP3 Files (*.mp3)|*.mp3|WAV Files (*.wav)|*.wav", mode="save")
- if path:
- voice = config.conf["VisionAssistant"]["tts_voice"]
- threading.Thread(target=self.tts_worker, args=(text, voice, path), daemon=True).start()
-
- def tts_worker(self, text, voice, path):
- # Translators: Message while generating audio
- msg = _("Generating Audio...")
- if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
- wx.CallAfter(ui.message, msg)
- try:
- audio_b64 = GeminiHandler.generate_speech(text, voice)
- if not audio_b64 or len(audio_b64) < 100:
- wx.CallAfter(wx.MessageBox, f"TTS Error: {audio_b64}", "Error", wx.ICON_ERROR)
- return
- missing_padding = len(audio_b64) % 4
- if missing_padding: audio_b64 += '=' * (4 - missing_padding)
- pcm_data = base64.b64decode(audio_b64)
-
- if path.lower().endswith(".mp3"):
- import subprocess
- lame_path = os.path.join(os.path.dirname(__file__), "lib", "lame.exe")
- if not os.path.exists(lame_path):
- wx.CallAfter(wx.MessageBox, _("lame.exe not found in lib folder."), "Error", wx.ICON_ERROR)
- return
-
- process = subprocess.Popen(
- [lame_path, "-r", "-s", "24", "-m", "m", "-b", "128", "--bitwidth", "16", "--resample", "24", "-q", "0", "-", path],
- stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- creationflags=getattr(subprocess, 'CREATE_NO_WINDOW', 0)
- )
-
- process.communicate(input=pcm_data)
- else:
- with wave.open(path, "wb") as wf:
- wf.setnchannels(1)
- wf.setsampwidth(2)
- wf.setframerate(24000)
- wf.writeframes(pcm_data)
-
- # Translators: Spoken message when audio is saved
- res_msg = _("Audio Saved")
- if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
- wx.CallAfter(ui.message, res_msg)
- wx.CallAfter(wx.MessageBox, _("Audio file generated and saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
- except Exception as e:
- if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
- wx.CallAfter(wx.MessageBox, f"TTS Error: {e}", "Error", wx.ICON_ERROR)
-
- def on_ask(self, event):
- if not config.conf["VisionAssistant"]["api_key"]:
- wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
- return
- if ChatDialog.instance:
- ChatDialog.instance.Raise()
- ChatDialog.instance.SetFocus()
- return
- file_path, _ = self.v_doc.get_page_info(self.current_page)
- if file_path:
- dlg = ChatDialog(self, file_path)
- dlg.Show()
-
- def on_save_all(self, event):
- # Translators: File dialog filter for saving text/html
- wildcard = "Text File (*.txt)|*.txt|HTML File (*.html)|*.html"
- # Translators: File dialog title for saving
- path = get_file_path(_("Save"), wildcard, mode="save")
- if path:
- is_html = path.lower().endswith('.html')
- self.btn_save.Disable()
- threading.Thread(target=self.save_thread, args=(path, is_html), daemon=True).start()
-
- def save_thread(self, path, is_html):
- full_content = []
- try:
- for i in range(self.start_page, self.end_page + 1):
- # Translators: Message showing save progress
- wx.CallAfter(self.lbl_status.SetLabel, _("Saving Page {num}...").format(num=i+1))
- while i not in self.page_cache: time.sleep(0.1)
- txt = self.page_cache[i]
- if is_html:
- h = markdown_to_html(txt)
- if "" in h: h = h.split("")[1].split("")[0]
- full_content.append(f"
Page {i+1}
{h}")
- else:
- full_content.append(f"--- Page {i+1} ---\n{txt}\n")
- with open(path, "w", encoding="utf-8") as f:
- if is_html: f.write(f"{''.join(full_content)}")
- else: f.write("\n".join(full_content))
- # Translators: Status label when save is complete
- wx.CallAfter(self.lbl_status.SetLabel, _("Saved"))
- # Translators: Message box content for successful save
- wx.CallAfter(wx.MessageBox, _("File saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
- except Exception as e:
- wx.CallAfter(wx.MessageBox, f"Save Error: {e}", "Error", wx.ICON_ERROR)
- finally: wx.CallAfter(self.btn_save.Enable)
+from .updater import UpdateDialog, UpdateManager
+
+from .dialogs import (
+ ChatDialog,
+ DocumentViewerDialog,
+ RangeDialog,
+ SettingsPanel,
+ VisionQADialog,
+ set_vision_assistant_instance,
+)
class GlobalPlugin(globalPluginHandler.GlobalPlugin):
scriptCategory = ADDON_NAME
@@ -2379,8 +134,7 @@ class GlobalPlugin(globalPluginHandler.GlobalPlugin):
def __init__(self):
super(GlobalPlugin, self).__init__()
- global _vision_assistant_instance
- _vision_assistant_instance = self
+ set_vision_assistant_instance(self)
try:
migrate_prompt_config_if_needed()
except Exception as e:
@@ -2503,7 +257,6 @@ def script_activateLayer(self, gesture):
tones.beep(500, 100)
def terminate(self):
- global _vision_assistant_instance
try:
if hasattr(self, 'va_submenu_item') and self.va_submenu_item:
self.tools_menu.Remove(self.va_submenu_item.GetId())
@@ -2529,7 +282,7 @@ def terminate(self):
self.translation_cache = {}
self._last_source_text = None
- _vision_assistant_instance = None
+ set_vision_assistant_instance(None)
gc.collect()
def report_status(self, msg):
diff --git a/addon/globalPlugins/visionAssistant/constants.py b/addon/globalPlugins/visionAssistant/constants.py
new file mode 100644
index 0000000..bcd8ac2
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/constants.py
@@ -0,0 +1,393 @@
+# -*- coding: utf-8 -*-
+
+import addonHandler
+import config
+
+addonHandler.initTranslation()
+
+ADDON_NAME = addonHandler.getCodeAddon().manifest["summary"]
+GITHUB_REPO = "mahmoodhozhabri/VisionAssistantPro"
+
+CHROME_OCR_KEYS = [
+ "AIzaSyA2KlwBX3mkFo30om9LUFYQhpqLoa_BNhE",
+ "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
+]
+
+MODELS = [
+ # --- 1. Recommended (Auto-Updating) ---
+ # Translators: AI Model info. [Auto] = Automatic updates. (Latest) = Newest version.
+ (_("[Auto]") + " Gemini Flash " + _("(Latest)"), "gemini-flash-latest"),
+ (_("[Auto]") + " Gemini Flash Lite " + _("(Latest)"), "gemini-flash-lite-latest"),
+
+ # --- 2. Current Standard (Free & Fast) ---
+ # Translators: AI Model info. [Free] = Generous usage limits. (Preview) = Experimental or early-access version.
+ (_("[Free]") + " Gemini 3.0 Flash " + _("(Preview)"), "gemini-3-flash-preview"),
+ (_("[Free]") + " Gemini 2.5 Flash", "gemini-2.5-flash"),
+ (_("[Free]") + " Gemini 2.5 Flash Lite", "gemini-2.5-flash-lite"),
+
+ # --- 3. High Intelligence (Paid/Pro/Preview) ---
+ # Translators: AI Model info. [Pro] = High intelligence/Paid tier. (Preview) = Experimental version.
+ (_("[Pro]") + " Gemini 3.0 Pro " + _("(Preview)"), "gemini-3-pro-preview"),
+ (_("[Pro]") + " Gemini 2.5 Pro", "gemini-2.5-pro"),
+]
+
+GEMINI_VOICES = [
+ # Translators: Adjective describing a bright AI voice style.
+ ("Zephyr", _("Bright")),
+ # Translators: Adjective describing an upbeat AI voice style.
+ ("Puck", _("Upbeat")),
+ # Translators: Adjective describing an informative AI voice style.
+ ("Charon", _("Informative")),
+ # Translators: Adjective describing a firm AI voice style.
+ ("Kore", _("Firm")),
+ # Translators: Adjective describing an excitable AI voice style.
+ ("Fenrir", _("Excitable")),
+ # Translators: Adjective describing a youthful AI voice style.
+ ("Leda", _("Youthful")),
+ # Translators: Adjective describing a firm AI voice style.
+ ("Orus", _("Firm")),
+ # Translators: Adjective describing a breezy AI voice style.
+ ("Aoede", _("Breezy")),
+ # Translators: Adjective describing an easy-going AI voice style.
+ ("Callirrhoe", _("Easy-going")),
+ # Translators: Adjective describing a bright AI voice style.
+ ("Autonoe", _("Bright")),
+ # Translators: Adjective describing a breathy AI voice style.
+ ("Enceladus", _("Breathy")),
+ # Translators: Adjective describing a clear AI voice style.
+ ("Iapetus", _("Clear")),
+ # Translators: Adjective describing an easy-going AI voice style.
+ ("Umbriel", _("Easy-going")),
+ # Translators: Adjective describing a smooth AI voice style.
+ ("Algieba", _("Smooth")),
+ # Translators: Adjective describing a smooth AI voice style.
+ ("Despina", _("Smooth")),
+ # Translators: Adjective describing a clear AI voice style.
+ ("Erinome", _("Clear")),
+ # Translators: Adjective describing a gravelly AI voice style.
+ ("Algenib", _("Gravelly")),
+ # Translators: Adjective describing an informative AI voice style.
+ ("Rasalgethi", _("Informative")),
+ # Translators: Adjective describing an upbeat AI voice style.
+ ("Laomedeia", _("Upbeat")),
+ # Translators: Adjective describing a soft AI voice style.
+ ("Achernar", _("Soft")),
+ # Translators: Adjective describing a firm AI voice style.
+ ("Alnilam", _("Firm")),
+ # Translators: Adjective describing an even AI voice style.
+ ("Schedar", _("Even")),
+ # Translators: Adjective describing a mature AI voice style.
+ ("Gacrux", _("Mature")),
+ # Translators: Adjective describing a forward AI voice style.
+ ("Pulcherrima", _("Forward")),
+ # Translators: Adjective describing a friendly AI voice style.
+ ("Achird", _("Friendly")),
+ # Translators: Adjective describing a casual AI voice style.
+ ("Zubenelgenubi", _("Casual")),
+ # Translators: Adjective describing a gentle AI voice style.
+ ("Vindemiatrix", _("Gentle")),
+ # Translators: Adjective describing a lively AI voice style.
+ ("Sadachbia", _("Lively")),
+ # Translators: Adjective describing a knowledgeable AI voice style.
+ ("Sadaltager", _("Knowledgeable")),
+ # Translators: Adjective describing a warm AI voice style.
+ ("Sulafat", _("Warm"))
+]
+
+BASE_LANGUAGES = [
+ ("Arabic", "ar"), ("Bulgarian", "bg"), ("Chinese", "zh"), ("Czech", "cs"), ("Danish", "da"),
+ ("Dutch", "nl"), ("English", "en"), ("Finnish", "fi"), ("French", "fr"),
+ ("German", "de"), ("Greek", "el"), ("Hebrew", "he"), ("Hindi", "hi"),
+ ("Hungarian", "hu"), ("Indonesian", "id"), ("Italian", "it"), ("Japanese", "ja"),
+ ("Korean", "ko"), ("Nepali", "ne"), ("Norwegian", "no"), ("Persian", "fa"), ("Polish", "pl"),
+ ("Portuguese", "pt"), ("Romanian", "ro"), ("Russian", "ru"), ("Spanish", "es"),
+ ("Swedish", "sv"), ("Thai", "th"), ("Turkish", "tr"), ("Ukrainian", "uk"),
+ ("Vietnamese", "vi")
+]
+SOURCE_LIST = [("Auto-detect", "auto")] + BASE_LANGUAGES
+SOURCE_NAMES = [x[0] for x in SOURCE_LIST]
+TARGET_LIST = BASE_LANGUAGES
+TARGET_NAMES = [x[0] for x in TARGET_LIST]
+TARGET_CODES = {x[0]: x[1] for x in BASE_LANGUAGES}
+
+OCR_ENGINES = [
+ # Translators: OCR Engine option (Fast but less formatted)
+ (_("Chrome (Fast)"), "chrome"),
+ # Translators: OCR Engine option (Slower but better formatting)
+ (_("Gemini (Formatted)"), "gemini")
+]
+
+confspec = {
+ "proxy_url": "string(default='')",
+ "api_key": "string(default='')",
+ "model_name": "string(default='gemini-flash-lite-latest')",
+ "target_language": "string(default='English')",
+ "source_language": "string(default='Auto-detect')",
+ "ai_response_language": "string(default='English')",
+ "smart_swap": "boolean(default=True)",
+ "captcha_mode": "string(default='navigator')",
+ "custom_prompts": "string(default='')",
+ "custom_prompts_v2": "string(default='')",
+ "default_refine_prompts": "string(default='')",
+ "check_update_startup": "boolean(default=False)",
+ "clean_markdown_chat": "boolean(default=True)",
+ "copy_to_clipboard": "boolean(default=False)",
+ "skip_chat_dialog": "boolean(default=False)",
+ "ocr_engine": "string(default='chrome')",
+ "tts_voice": "string(default='Puck')"
+}
+
+config.conf.spec["VisionAssistant"] = confspec
+
+PROMPT_TRANSLATE = """
+Task: Translate the text below to "{target_lang}".
+
+Configuration:
+- Target Language: "{target_lang}"
+- Swap Language: "{swap_target}"
+- Smart Swap: {smart_swap}
+
+Rules:
+1. DEFAULT: Translate the input strictly to "{target_lang}".
+2. MIXED CONTENT: If the text contains mixed languages (e.g., Arabic content with English UI terms like 'Reply', 'From', 'Forwarded'), translate EVERYTHING to "{target_lang}".
+3. EXCEPTION: If (and ONLY if) the input is already completely in "{target_lang}" AND "Smart Swap" is True, then translate to "{swap_target}".
+
+Constraints:
+- Output ONLY the translation.
+- Do NOT translate actual programming code (Python, C++, etc.) or URLs.
+- Translate ALL UI elements, menus, and interface labels.
+
+Input Text:
+{text_content}
+"""
+
+PROMPT_UI_LOCATOR = "Analyze UI (Size: {width}x{height}). Request: '{query}'. Output JSON: {{\"x\": int, \"y\": int, \"found\": bool}}."
+
+REFINE_PROMPT_KEYS = ("summarize", "fix_grammar", "fix_translate", "explain")
+
+LEGACY_REFINER_TOKENS = {
+ "summarize": "[summarize]",
+ "fix_grammar": "[fix_grammar]",
+ "fix_translate": "[fix_translate]",
+ "explain": "[explain]",
+}
+
+DEFAULT_SYSTEM_PROMPTS = (
+ {
+ "key": "summarize",
+ # Translators: Section header for text refinement prompts in Prompt Manager.
+ "section": _("Refine"),
+ # Translators: Label for the text summarization prompt.
+ "label": _("Summarize"),
+ "prompt": "Summarize the text below in {response_lang}.",
+ },
+ {
+ "key": "fix_grammar",
+ # Translators: Section header for text refinement prompts in Prompt Manager.
+ "section": _("Refine"),
+ # Translators: Label for the grammar correction prompt.
+ "label": _("Fix Grammar"),
+ "prompt": "Fix grammar in the text below. Output ONLY the fixed text.",
+ },
+ {
+ "key": "fix_translate",
+ # Translators: Section header for text refinement prompts in Prompt Manager.
+ "section": _("Refine"),
+ # Translators: Label for the grammar correction and translation prompt.
+ "label": _("Fix Grammar & Translate"),
+ "prompt": "Fix grammar and translate to {target_lang}.{swap_instruction} Output ONLY the result.",
+ },
+ {
+ "key": "explain",
+ # Translators: Section header for text refinement prompts in Prompt Manager.
+ "section": _("Refine"),
+ # Translators: Label for the text explanation prompt.
+ "label": _("Explain"),
+ "prompt": "Explain the text below in {response_lang}.",
+ },
+ {
+ "key": "translate_main",
+ # Translators: Section header for translation-related prompts in Prompt Manager.
+ "section": _("Translation"),
+ # Translators: Label for the smart translation prompt.
+ "label": _("Smart Translation"),
+ "prompt": PROMPT_TRANSLATE.strip(),
+ },
+ {
+ "key": "translate_quick",
+ # Translators: Section header for translation-related prompts in Prompt Manager.
+ "section": _("Translation"),
+ # Translators: Label for the quick translation prompt.
+ "label": _("Quick Translation"),
+ "prompt": "Translate to {target_lang}. Output ONLY translation.",
+ },
+ {
+ "key": "document_chat_system",
+ # Translators: Section header for document-related prompts in Prompt Manager.
+ "section": _("Document"),
+ # Translators: Label for the initial context prompt in document chat.
+ "label": _("Document Chat Context"),
+ "prompt": "STRICTLY Respond in {response_lang}. Use Markdown formatting. Analyze the attached content to answer.",
+ },
+ {
+ "key": "document_chat_ack",
+ # Translators: Section header for advanced/internal prompts in Prompt Manager.
+ "section": _("Advanced"),
+ # Translators: Label for the AI's acknowledgement reply in document chat.
+ "label": _("Document Chat Bootstrap Reply"),
+ "internal": True,
+ "prompt": "Context received. Ready for questions.",
+ },
+ {
+ "key": "vision_navigator_object",
+ # Translators: Section header for image analysis prompts in Prompt Manager.
+ "section": _("Vision"),
+ # Translators: Label for the prompt used to analyze the current navigator object.
+ "label": _("Navigator Object Analysis"),
+ "prompt": (
+ "Analyze this image. Describe the layout, visible text, and UI elements. "
+ "Use Markdown formatting (headings, lists) to organize the description. "
+ "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
+ "IMPORTANT: Start directly with the description content. Do not add introductory "
+ "sentences like 'Here is the analysis' or 'The image shows'."
+ ),
+ },
+ {
+ "key": "vision_fullscreen",
+ # Translators: Section header for image analysis prompts in Prompt Manager.
+ "section": _("Vision"),
+ # Translators: Label for the prompt used to analyze the entire screen.
+ "label": _("Full Screen Analysis"),
+ "prompt": (
+ "Analyze this image. Describe the layout, visible text, and UI elements. "
+ "Use Markdown formatting (headings, lists) to organize the description. "
+ "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
+ "IMPORTANT: Start directly with the description content. Do not add introductory "
+ "sentences like 'Here is the analysis' or 'The image shows'."
+ ),
+ },
+ {
+ "key": "vision_followup_context",
+ # Translators: Section header for advanced/internal prompts in Prompt Manager.
+ "section": _("Advanced"),
+ # Translators: Label for the follow-up context in image analysis chat.
+ "label": _("Vision Follow-up Context"),
+ "internal": True,
+ "prompt": "Image Context. Target Language: {response_lang}",
+ },
+ {
+ "key": "vision_followup_suffix",
+ # Translators: Section header for advanced/internal prompts in Prompt Manager.
+ "section": _("Advanced"),
+ # Translators: Label for the rule enforced during image analysis follow-up questions.
+ "label": _("Vision Follow-up Answer Rule"),
+ "internal": True,
+ "prompt": "Answer strictly in {response_lang}",
+ },
+ {
+ "key": "video_analysis",
+ # Translators: Section header for video analysis prompts in Prompt Manager.
+ "section": _("Video"),
+ # Translators: Label for the video content analysis prompt.
+ "label": _("Video Analysis"),
+ "prompt": (
+ "Analyze this video. Provide a detailed description of the visual content and a "
+ "summary of the audio. IMPORTANT: Write the entire response STRICTLY in "
+ "{response_lang} language."
+ ),
+ },
+ {
+ "key": "audio_transcription",
+ # Translators: Section header for audio-related prompts in Prompt Manager.
+ "section": _("Audio"),
+ # Translators: Label for the audio file transcription prompt.
+ "label": _("Audio Transcription"),
+ "prompt": "Transcribe this audio in {response_lang}.",
+ },
+ {
+ "key": "dictation_transcribe",
+ # Translators: Section header for audio-related prompts in Prompt Manager.
+ "section": _("Audio"),
+ # Translators: Label for the smart voice dictation prompt.
+ "label": _("Smart Dictation"),
+ "prompt": (
+ "Transcribe speech. Use native script. Fix stutters. If there is no speech, silence, "
+ "or background noise only, write exactly: [[[NOSPEECH]]]"
+ ),
+ },
+ {
+ "key": "ocr_image_extract",
+ # Translators: Section header for OCR-related prompts in Prompt Manager.
+ "section": _("OCR"),
+ # Translators: Label for the OCR prompt used for image text extraction.
+ "label": _("OCR Image Extraction"),
+ "prompt": (
+ "Extract all visible text from this image. Strictly preserve original formatting "
+ "(headings, lists, tables) using Markdown. Do not output any system messages or "
+ "code block backticks (```). Output ONLY the raw content."
+ ),
+ },
+ {
+ "key": "ocr_document_extract",
+ # Translators: Section header for OCR-related prompts in Prompt Manager.
+ "section": _("OCR"),
+ # Translators: Label for the OCR prompt used for document text extraction.
+ "label": _("OCR Document Extraction"),
+ "prompt": (
+ "Extract all visible text from this document. Strictly preserve original formatting "
+ "(headings, lists, tables) using Markdown. You MUST insert the exact delimiter "
+ "'[[[PAGE_SEP]]]' immediately after the content of every single page. Do not output "
+ "any system messages or code block backticks (```). Output ONLY the raw content."
+ ),
+ },
+ {
+ "key": "ocr_document_translate",
+ # Translators: Section header for document-related prompts in Prompt Manager.
+ "section": _("Document"),
+ # Translators: Label for the combined OCR and translation prompt for documents.
+ "label": _("Document OCR + Translate"),
+ "prompt": (
+ "Extract all text from this document. Preserve formatting (Markdown). Then translate "
+ "the content to {target_lang}. Output ONLY the translated content. Do not add "
+ "explanations."
+ ),
+ },
+ {
+ "key": "captcha_solver_base",
+ # Translators: Section header for CAPTCHA-related prompts in Prompt Manager.
+ "section": _("CAPTCHA"),
+ # Translators: Label for the CAPTCHA solving prompt.
+ "label": _("CAPTCHA Solver"),
+ "internal": True,
+ "prompt": (
+ "Blind user. Return CAPTCHA code only. If NO CAPTCHA is detected in the image, "
+ "strictly return: [[[NO_CAPTCHA]]].{captcha_extra}"
+ ),
+ },
+ {
+ "key": "refine_files_only",
+ # Translators: Section header for advanced/internal prompts in Prompt Manager.
+ "section": _("Advanced"),
+ # Translators: Label for the fallback prompt when only files are provided in Refine.
+ "label": _("Refine Files-Only Fallback"),
+ "internal": True,
+ "prompt": "Analyze these files.",
+ },
+)
+
+PROMPT_VARIABLES_GUIDE = (
+ # Translators: Description and input type for the [selection] variable in the Variables Guide.
+ ("[selection]", _("Currently selected text"), _("Text")),
+ # Translators: Description for the [clipboard] variable in the Variables Guide.
+ ("[clipboard]", _("Clipboard content"), _("Text")),
+ # Translators: Description and input type for the [screen_obj] variable in the Variables Guide.
+ ("[screen_obj]", _("Screenshot of the navigator object"), _("Image")),
+ # Translators: Description for the [screen_full] variable in the Variables Guide.
+ ("[screen_full]", _("Screenshot of the entire screen"), _("Image")),
+ # Translators: Description and input type for the [file_ocr] variable in the Variables Guide.
+ ("[file_ocr]", _("Select image/PDF/TIFF for text extraction"), _("Image, PDF, TIFF")),
+ # Translators: Description and input type for the [file_read] variable in the Variables Guide.
+ ("[file_read]", _("Select document for reading"), _("TXT, Code, PDF")),
+ # Translators: Description and input type for the [file_audio] variable in the Variables Guide.
+ ("[file_audio]", _("Select audio file for analysis"), _("MP3, WAV, OGG")),
+)
diff --git a/addon/globalPlugins/visionAssistant/dialogs.py b/addon/globalPlugins/visionAssistant/dialogs.py
new file mode 100644
index 0000000..8d2e6fd
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/dialogs.py
@@ -0,0 +1,1014 @@
+# -*- coding: utf-8 -*-
+
+import os
+import json
+import io
+import tempfile
+import threading
+import time
+import gc
+import wave
+import logging
+import base64
+from concurrent.futures import ThreadPoolExecutor
+
+import wx
+
+import addonHandler
+import config
+import gui
+import ui
+import api
+import textInfos
+import tones
+import scriptHandler
+
+from .prompt_manager_dialog import PromptManagerDialog
+from .constants import (
+ ADDON_NAME,
+ GEMINI_VOICES,
+ MODELS,
+ OCR_ENGINES,
+ PROMPT_VARIABLES_GUIDE,
+ SOURCE_NAMES,
+ TARGET_NAMES,
+)
+from .markdown_utils import clean_markdown, markdown_to_html
+from .prompt_helpers import (
+ get_configured_default_prompts,
+ load_configured_custom_prompts,
+ serialize_custom_prompts_v2,
+ serialize_default_prompt_overrides,
+)
+from .services import (
+ ChromeOCREngine,
+ GeminiHandler,
+ GoogleTranslator,
+ SmartProgrammersOCREngine,
+ get_file_path,
+ get_mime_type,
+ show_error_dialog,
+)
+
+try:
+ import fitz
+except ImportError:
+ fitz = None
+
+log = logging.getLogger(__name__)
+addonHandler.initTranslation()
+_vision_assistant_instance = None
+
+
+def set_vision_assistant_instance(instance):
+ global _vision_assistant_instance
+ _vision_assistant_instance = instance
+
+class VisionQADialog(wx.Dialog):
+ def __init__(self, parent, title, initial_text, context_data, callback_fn, extra_info=None, raw_content=None, status_callback=None, announce_on_open=True, allow_questions=True):
+ super(VisionQADialog, self).__init__(parent, title=title, size=(550, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
+ self.context_data = context_data
+ self.callback_fn = callback_fn
+ self.extra_info = extra_info
+ self.chat_history = []
+ self.raw_content = raw_content
+ self.status_callback = status_callback
+ self.announce_on_open = announce_on_open
+ self.allow_questions = allow_questions
+
+ mainSizer = wx.BoxSizer(wx.VERTICAL)
+ # Translators: Label for the AI response text area in a chat dialog
+ lbl_text = _("AI Response:")
+ lbl = wx.StaticText(self, label=lbl_text)
+ mainSizer.Add(lbl, 0, wx.ALL, 5)
+ self.outputArea = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY)
+ mainSizer.Add(self.outputArea, 1, wx.EXPAND | wx.ALL, 5)
+
+ self.should_clean = config.conf["VisionAssistant"]["clean_markdown_chat"]
+ display_text = clean_markdown(initial_text) if self.should_clean else initial_text
+ if display_text:
+ # Translators: Format for displaying AI message in a chat dialog
+ init_msg = _("AI: {text}\n").format(text=display_text)
+ self.outputArea.AppendText(init_msg)
+ if config.conf["VisionAssistant"]["copy_to_clipboard"]:
+ api.copyToClip(raw_content if raw_content else display_text)
+
+ if not (extra_info and extra_info.get('skip_init_history')):
+ self.chat_history.append({"role": "model", "parts": [{"text": initial_text}]})
+
+ self.inputArea = None
+ if allow_questions:
+ # Translators: Label for user input field in a chat dialog
+ ask_text = _("Ask:")
+ inputLbl = wx.StaticText(self, label=ask_text)
+ mainSizer.Add(inputLbl, 0, wx.ALL, 5)
+ self.inputArea = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
+ mainSizer.Add(self.inputArea, 0, wx.EXPAND | wx.ALL, 5)
+
+ btnSizer = wx.BoxSizer(wx.HORIZONTAL)
+ self.askBtn = None
+ if allow_questions:
+ # Translators: Button to send message in a chat dialog
+ self.askBtn = wx.Button(self, label=_("Send"))
+ # Translators: Button to view the content in a formatted HTML window
+ self.viewBtn = wx.Button(self, label=_("View Formatted"))
+ self.viewBtn.Bind(wx.EVT_BUTTON, self.onView)
+ # Translators: Button to save only the result content without chat history
+ self.saveContentBtn = wx.Button(self, label=_("Save Content"))
+ self.saveContentBtn.Bind(wx.EVT_BUTTON, self.onSaveContent)
+ # Translators: Button to save chat in a chat dialog
+ self.saveBtn = wx.Button(self, label=_("Save Chat"))
+ # Translators: Button to close chat dialog
+ self.closeBtn = wx.Button(self, wx.ID_CANCEL, label=_("Close"))
+
+ self.saveBtn.Enable(bool(initial_text.strip()))
+ self.viewBtn.Enable(bool(self.raw_content))
+ self.saveContentBtn.Enable(bool(self.raw_content))
+
+ if self.askBtn:
+ btnSizer.Add(self.askBtn, 0, wx.ALL, 5)
+ btnSizer.Add(self.viewBtn, 0, wx.ALL, 5)
+ btnSizer.Add(self.saveContentBtn, 0, wx.ALL, 5)
+ btnSizer.Add(self.saveBtn, 0, wx.ALL, 5)
+ btnSizer.Add(self.closeBtn, 0, wx.ALL, 5)
+ mainSizer.Add(btnSizer, 0, wx.ALIGN_RIGHT)
+
+ self.SetSizer(mainSizer)
+ if self.inputArea:
+ self.inputArea.SetFocus()
+ else:
+ self.outputArea.SetFocus()
+ if self.askBtn:
+ self.askBtn.Bind(wx.EVT_BUTTON, self.onAsk)
+ self.saveBtn.Bind(wx.EVT_BUTTON, self.onSave)
+ if self.inputArea:
+ self.inputArea.Bind(wx.EVT_TEXT_ENTER, self.onAsk)
+ if display_text and self.announce_on_open:
+ wx.CallLater(300, ui.message, display_text)
+
+ def onAsk(self, event):
+ if not self.inputArea:
+ return
+ question = self.inputArea.Value
+ if not question.strip(): return
+ # Translators: Format for displaying User message in a chat dialog
+ user_msg = _("\nYou: {text}\n").format(text=question)
+ self.outputArea.AppendText(user_msg)
+ self.inputArea.Clear()
+ # Translators: Message shown while processing in a chat dialog
+ msg = _("Thinking...")
+ ui.message(msg)
+ threading.Thread(target=self.process_question, args=(question,), daemon=True).start()
+
+ def process_question(self, question):
+ result_tuple = self.callback_fn(self.context_data, question, self.chat_history, self.extra_info)
+ response_text, _ = result_tuple
+ if response_text:
+ if not (self.extra_info and self.extra_info.get('file_context')):
+ self.chat_history.append({"role": "user", "parts": [{"text": question}]})
+ self.chat_history.append({"role": "model", "parts": [{"text": response_text}]})
+ final_text = clean_markdown(response_text) if self.should_clean else response_text
+ wx.CallAfter(self.update_response, final_text, response_text)
+
+ def update_response(self, display_text, raw_text=None):
+ if raw_text:
+ self.raw_content = raw_text
+ self.viewBtn.Enable(True)
+ self.saveContentBtn.Enable(True)
+ # Translators: Format for displaying AI message in a chat dialog
+ ai_msg = _("AI: {text}\n").format(text=display_text)
+ self.outputArea.AppendText(ai_msg)
+ self.saveBtn.Enable(True)
+ if config.conf["VisionAssistant"]["copy_to_clipboard"]:
+ api.copyToClip(raw_text if raw_text else display_text)
+ self.outputArea.ShowPosition(self.outputArea.GetLastPosition())
+ ui.message(display_text)
+
+ def report_save(self, msg):
+ if self.status_callback: self.status_callback(msg)
+ else: ui.message(msg)
+
+ def onView(self, event):
+ full_html = ""
+ # Translators: Format for displaying User message in a chat dialog
+ user_label = _("\nYou: {text}\n").format(text="").strip()
+ # Translators: Format for displaying AI message in a chat dialog
+ ai_label = _("AI: {text}\n").format(text="").strip()
+
+ if self.chat_history:
+ for item in self.chat_history:
+ role = item.get("role", "")
+ text = item.get("parts", [{}])[0].get("text", "")
+ if role == "user":
+ safe_text = text.replace("&", "&").replace("<", "<").replace(">", ">")
+ full_html += f"{user_label}
{safe_text}
"
+ elif role == "model":
+ formatted_text = markdown_to_html(text, full_page=False)
+ full_html += f"{ai_label}
{formatted_text}
"
+
+ if not full_html and self.raw_content:
+ formatted_text = markdown_to_html(self.raw_content, full_page=False)
+ full_html += f"{ai_label}
{formatted_text}"
+
+ if not full_html: return
+ try:
+ # Translators: Title of the formatted result window
+ ui.browseableMessage(full_html, _("Formatted Conversation"), isHtml=True)
+ except Exception as e:
+ # Translators: Error message if viewing fails
+ msg = _("Error displaying content: {error}").format(error=e)
+ show_error_dialog(msg)
+
+ def onSave(self, event):
+ # Translators: Save dialog title
+ path = get_file_path(_("Save Chat Log"), "Text files (*.txt)|*.txt", mode="save")
+ if path:
+ try:
+ with open(path, "w", encoding="utf-8") as f: f.write(self.outputArea.GetValue())
+ # Translators: Message shown on successful save of a file.
+ self.report_save(_("Saved."))
+ except Exception as e:
+ # Translators: Message in the error dialog when saving fails.
+ msg = _("Save failed: {error}").format(error=e)
+ show_error_dialog(msg)
+
+ def onSaveContent(self, event):
+ # Translators: Save dialog title
+ path = get_file_path(_("Save Result"), "HTML files (*.html)|*.html", mode="save")
+ if path:
+ try:
+ full_html = markdown_to_html(self.raw_content, full_page=True)
+ with open(path, "w", encoding="utf-8") as f: f.write(full_html)
+ # Translators: Message on successful save
+ self.report_save(_("Saved."))
+ except Exception as e:
+ # Translators: Message in the error dialog when saving fails.
+ msg = _("Save failed: {error}").format(error=e)
+ show_error_dialog(msg)
+
+class SettingsPanel(gui.settingsDialogs.SettingsPanel):
+ title = ADDON_NAME
+ def makeSettings(self, settingsSizer):
+ # --- Connection Group ---
+ # Translators: Title of the settings group for connection and updates
+ groupLabel = _("Connection")
+ self.connectionBox = wx.StaticBox(self, label=groupLabel)
+ connectionSizer = wx.StaticBoxSizer(self.connectionBox, wx.VERTICAL)
+ cHelper = gui.guiHelper.BoxSizerHelper(self.connectionBox, sizer=connectionSizer)
+
+ # Translators: Label for API Key input
+ apiLabel = wx.StaticText(self.connectionBox, label=_("Gemini API Key (Separate multiple keys with comma or newline):"))
+ cHelper.addItem(apiLabel)
+
+ api_value = config.conf["VisionAssistant"]["api_key"]
+
+ self.apiKeyCtrl_hidden = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_PASSWORD, size=(-1, -1))
+
+ self.apiKeyCtrl_visible = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_MULTILINE | wx.TE_DONTWRAP, size=(-1, 60))
+ self.apiKeyCtrl_visible.Hide()
+
+ cHelper.addItem(self.apiKeyCtrl_hidden)
+ cHelper.addItem(self.apiKeyCtrl_visible)
+
+ # Translators: Checkbox to toggle API Key visibility
+ self.showApiCheck = wx.CheckBox(self.connectionBox, label=_("Show API Key"))
+ self.showApiCheck.Bind(wx.EVT_CHECKBOX, self.onToggleApiVisibility)
+ cHelper.addItem(self.showApiCheck)
+
+ model_display_names = [opt[0] for opt in MODELS]
+ # Translators: Label for Model selection
+ self.model = cHelper.addLabeledControl(_("AI Model:"), wx.Choice, choices=model_display_names)
+ current_id = config.conf["VisionAssistant"]["model_name"]
+ try:
+ index = next(i for i, v in enumerate(MODELS) if v[1] == current_id)
+ self.model.SetSelection(index)
+ except StopIteration: self.model.SetSelection(0)
+
+ # Translators: Label for Proxy URL input
+ self.proxyUrl = cHelper.addLabeledControl(_("Proxy URL:"), wx.TextCtrl)
+ self.proxyUrl.Value = config.conf["VisionAssistant"]["proxy_url"]
+
+ # Translators: Checkbox to enable/disable automatic update checks on NVDA startup
+ self.checkUpdateStartup = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Check for updates on startup")))
+ self.checkUpdateStartup.Value = config.conf["VisionAssistant"]["check_update_startup"]
+ # Translators: Checkbox to toggle markdown cleaning in chat windows
+ self.cleanMarkdown = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Clean Markdown in Chat")))
+ self.cleanMarkdown.Value = config.conf["VisionAssistant"]["clean_markdown_chat"]
+ # Translators: Checkbox to enable copying AI responses to clipboard
+ self.copyToClipboard = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Copy AI responses to clipboard")))
+ self.copyToClipboard.Value = config.conf["VisionAssistant"]["copy_to_clipboard"]
+ # Translators: Checkbox to skip chat window and only speak AI responses
+ self.skipChatDialog = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Direct Output (No Chat Window)")))
+ self.skipChatDialog.Value = config.conf["VisionAssistant"]["skip_chat_dialog"]
+ settingsSizer.Add(connectionSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+ # --- Translation Languages Group ---
+ # Translators: Title of the settings group for translation languages configuration
+ groupLabel = _("Translation Languages")
+ langBox = wx.StaticBox(self, label=groupLabel)
+ langSizer = wx.StaticBoxSizer(langBox, wx.VERTICAL)
+ lHelper = gui.guiHelper.BoxSizerHelper(langBox, sizer=langSizer)
+
+ # Translators: Label for Source Language selection
+ self.sourceLang = lHelper.addLabeledControl(_("Source:"), wx.Choice, choices=SOURCE_NAMES)
+ try: self.sourceLang.SetSelection(SOURCE_NAMES.index(config.conf["VisionAssistant"]["source_language"]))
+ except: self.sourceLang.SetSelection(0)
+
+ # Translators: Label for Target Language selection
+ self.targetLang = lHelper.addLabeledControl(_("Target:"), wx.Choice, choices=TARGET_NAMES)
+ try: self.targetLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["target_language"]))
+ except: self.targetLang.SetSelection(0)
+
+ # Translators: Label for AI Response Language selection
+ self.aiResponseLang = lHelper.addLabeledControl(_("AI Response:"), wx.Choice, choices=TARGET_NAMES)
+ try: self.aiResponseLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["ai_response_language"]))
+ except: self.aiResponseLang.SetSelection(0)
+
+ # Translators: Checkbox for Smart Swap feature
+ self.smartSwap = lHelper.addItem(wx.CheckBox(langBox, label=_("Smart Swap")))
+ self.smartSwap.Value = config.conf["VisionAssistant"]["smart_swap"]
+ settingsSizer.Add(langSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+ # --- Document Reader Settings ---
+ # Translators: Title of settings group for Document Reader features
+ groupLabel = _("Document Reader")
+ docBox = wx.StaticBox(self, label=groupLabel)
+ docSizer = wx.StaticBoxSizer(docBox, wx.VERTICAL)
+ dHelper = gui.guiHelper.BoxSizerHelper(docBox, sizer=docSizer)
+
+ # Translators: Label for OCR Engine selection
+ self.ocr_sel = dHelper.addLabeledControl(_("OCR Engine:"), wx.Choice, choices=[x[0] for x in OCR_ENGINES])
+ curr_ocr = config.conf["VisionAssistant"]["ocr_engine"]
+ try:
+ o_idx = next(i for i, v in enumerate(OCR_ENGINES) if v[1] == curr_ocr)
+ self.ocr_sel.SetSelection(o_idx)
+ except: self.ocr_sel.SetSelection(0)
+
+ voice_choices = [f"{v[0]} - {v[1]}" for v in GEMINI_VOICES]
+ # Translators: Label for TTS Voice selection
+ self.voice_sel = dHelper.addLabeledControl(_("TTS Voice:"), wx.Choice, choices=voice_choices)
+ curr_voice = config.conf["VisionAssistant"]["tts_voice"]
+ try:
+ v_idx = next(i for i, v in enumerate(GEMINI_VOICES) if v[0] == curr_voice)
+ self.voice_sel.SetSelection(v_idx)
+ except: self.voice_sel.SetSelection(1)
+ settingsSizer.Add(docSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+ # --- CAPTCHA Group ---
+ # Translators: Title of the settings group for CAPTCHA options
+ groupLabel = _("CAPTCHA")
+ capBox = wx.StaticBox(self, label=groupLabel)
+ capSizer = wx.StaticBoxSizer(capBox, wx.VERTICAL)
+ capHelper = gui.guiHelper.BoxSizerHelper(capBox, sizer=capSizer)
+ # Translators: Label for CAPTCHA capture method selection
+ self.captchaMode = capHelper.addLabeledControl(_("Capture Method:"), wx.Choice, choices=[
+ # Translators: A choice for capture method. Captures only the specific object under the NVDA navigator cursor.
+ _("Navigator Object"),
+ # Translators: A choice for capture method. Captures the entire visible screen area.
+ _("Full Screen")
+ ])
+ self.captchaMode.SetSelection(0 if config.conf["VisionAssistant"]["captcha_mode"] == 'navigator' else 1)
+ settingsSizer.Add(capSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+ self.defaultPromptItems = get_configured_default_prompts()
+ self.customPromptItems = load_configured_custom_prompts()
+
+ # --- Prompt Manager Group ---
+ # Translators: Title of the settings group for prompt management
+ groupLabel = _("Prompts")
+ promptsBox = wx.StaticBox(self, label=groupLabel)
+ promptsSizer = wx.StaticBoxSizer(promptsBox, wx.VERTICAL)
+ pHelper = gui.guiHelper.BoxSizerHelper(promptsBox, sizer=promptsSizer)
+ # Translators: Description for the prompt manager button.
+ pHelper.addItem(wx.StaticText(promptsBox, label=_("Manage default and custom prompts.")))
+ # Translators: Button label to open prompt manager dialog.
+ self.managePromptsBtn = wx.Button(promptsBox, label=_("Manage Prompts..."))
+ self.managePromptsBtn.Bind(wx.EVT_BUTTON, self.onManagePrompts)
+ pHelper.addItem(self.managePromptsBtn)
+ self.promptsSummary = wx.StaticText(promptsBox)
+ pHelper.addItem(self.promptsSummary)
+ self._refreshPromptSummary()
+ settingsSizer.Add(promptsSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+ def _refreshPromptSummary(self):
+ # Translators: Summary text for prompt counts in settings.
+ summary = _("Default prompts: {defaultCount}, Custom prompts: {customCount}").format(
+ defaultCount=len(self.defaultPromptItems),
+ customCount=len(self.customPromptItems),
+ )
+ self.promptsSummary.SetLabel(summary)
+
+ def onManagePrompts(self, event):
+ top = wx.GetTopLevelParent(self)
+ dlg = PromptManagerDialog(
+ self,
+ self.defaultPromptItems,
+ self.customPromptItems,
+ PROMPT_VARIABLES_GUIDE,
+ )
+ try:
+ if dlg.ShowModal() == wx.ID_OK:
+ self.defaultPromptItems = dlg.get_default_items()
+ self.customPromptItems = dlg.get_custom_items()
+ self._refreshPromptSummary()
+ finally:
+ dlg.Destroy()
+ if top:
+ top.Enable(True)
+ top.SetFocus()
+
+ def onToggleApiVisibility(self, event):
+ if self.showApiCheck.IsChecked():
+ self.apiKeyCtrl_visible.SetValue(self.apiKeyCtrl_hidden.GetValue())
+ self.apiKeyCtrl_hidden.Hide()
+ self.apiKeyCtrl_visible.Show()
+ else:
+ self.apiKeyCtrl_hidden.SetValue(self.apiKeyCtrl_visible.GetValue())
+ self.apiKeyCtrl_visible.Hide()
+ self.apiKeyCtrl_hidden.Show()
+
+ self.connectionBox.GetParent().Layout()
+
+ def onSave(self):
+ val = self.apiKeyCtrl_visible.GetValue() if self.showApiCheck.IsChecked() else self.apiKeyCtrl_hidden.GetValue()
+ config.conf["VisionAssistant"]["api_key"] = val.strip()
+ config.conf["VisionAssistant"]["model_name"] = MODELS[self.model.GetSelection()][1]
+ config.conf["VisionAssistant"]["proxy_url"] = self.proxyUrl.Value.strip()
+ config.conf["VisionAssistant"]["source_language"] = SOURCE_NAMES[self.sourceLang.GetSelection()]
+ config.conf["VisionAssistant"]["target_language"] = TARGET_NAMES[self.targetLang.GetSelection()]
+ config.conf["VisionAssistant"]["ai_response_language"] = TARGET_NAMES[self.aiResponseLang.GetSelection()]
+ config.conf["VisionAssistant"]["smart_swap"] = self.smartSwap.Value
+ config.conf["VisionAssistant"]["check_update_startup"] = self.checkUpdateStartup.Value
+ config.conf["VisionAssistant"]["clean_markdown_chat"] = self.cleanMarkdown.Value
+ config.conf["VisionAssistant"]["copy_to_clipboard"] = self.copyToClipboard.Value
+ config.conf["VisionAssistant"]["skip_chat_dialog"] = self.skipChatDialog.Value
+ config.conf["VisionAssistant"]["captcha_mode"] = 'navigator' if self.captchaMode.GetSelection() == 0 else 'fullscreen'
+ config.conf["VisionAssistant"]["custom_prompts_v2"] = serialize_custom_prompts_v2(self.customPromptItems)
+ config.conf["VisionAssistant"]["custom_prompts"] = ""
+ config.conf["VisionAssistant"]["default_refine_prompts"] = serialize_default_prompt_overrides(self.defaultPromptItems)
+ config.conf["VisionAssistant"]["ocr_engine"] = OCR_ENGINES[self.ocr_sel.GetSelection()][1]
+ config.conf["VisionAssistant"]["tts_voice"] = GEMINI_VOICES[self.voice_sel.GetSelection()][0]
+
+class RangeDialog(wx.Dialog):
+ def __init__(self, parent, total_pages):
+ # Translators: Title of the PDF options dialog
+ super().__init__(parent, title=_("Options"), size=(350, 320))
+ sizer = wx.BoxSizer(wx.VERTICAL)
+ # Translators: Label showing total pages found
+ sizer.Add(wx.StaticText(self, label=_("Total Pages (All Files): {count}").format(count=total_pages)), 0, wx.ALL, 10)
+
+ # Translators: Box title for page range selection
+ box_range = wx.StaticBoxSizer(wx.VERTICAL, self, _("Range"))
+ g_sizer = wx.FlexGridSizer(2, 2, 10, 10)
+ # Translators: Label for start page
+ g_sizer.Add(wx.StaticText(self, label=_("From:")), 0, wx.ALIGN_CENTER_VERTICAL)
+ self.spin_from = wx.SpinCtrl(self, min=1, max=total_pages, initial=1)
+ g_sizer.Add(self.spin_from, 1, wx.EXPAND)
+ # Translators: Label for end page
+ g_sizer.Add(wx.StaticText(self, label=_("To:")), 0, wx.ALIGN_CENTER_VERTICAL)
+ self.spin_to = wx.SpinCtrl(self, min=1, max=total_pages, initial=total_pages)
+ g_sizer.Add(self.spin_to, 1, wx.EXPAND)
+ box_range.Add(g_sizer, 1, wx.EXPAND | wx.ALL, 5)
+ sizer.Add(box_range, 0, wx.EXPAND | wx.ALL, 10)
+
+ # Translators: Box title for translation options
+ box_trans = wx.StaticBoxSizer(wx.VERTICAL, self, _("Translation"))
+ # Translators: Checkbox to enable translation
+ self.chk_trans = wx.CheckBox(self, label=_("Translate Output"))
+ box_trans.Add(self.chk_trans, 0, wx.ALL, 5)
+ h_sizer = wx.BoxSizer(wx.HORIZONTAL)
+ # Translators: Label for target language
+ h_sizer.Add(wx.StaticText(self, label=_("Target:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
+ self.cmb_lang = wx.Choice(self, choices=TARGET_NAMES)
+ self.cmb_lang.SetSelection(0)
+ h_sizer.Add(self.cmb_lang, 1)
+ box_trans.Add(h_sizer, 1, wx.EXPAND | wx.ALL, 5)
+ sizer.Add(box_trans, 0, wx.EXPAND | wx.ALL, 10)
+
+ btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
+ # Translators: Button to start processing
+ btn_ok = wx.Button(self, wx.ID_OK, label=_("Start"))
+ btn_ok.SetDefault()
+ # Translators: Button to cancel
+ btn_cancel = wx.Button(self, wx.ID_CANCEL, label=_("Cancel"))
+ btn_sizer.Add(btn_ok, 0, wx.RIGHT, 10)
+ btn_sizer.Add(btn_cancel, 0)
+ sizer.Add(btn_sizer, 0, wx.ALIGN_CENTER | wx.ALL, 10)
+ self.SetSizer(sizer)
+
+ self.chk_trans.Bind(wx.EVT_CHECKBOX, self.on_check)
+ self.cmb_lang.Disable()
+
+ def on_check(self, event):
+ self.cmb_lang.Enable(self.chk_trans.IsChecked())
+
+ def get_settings(self):
+ return {
+ 'start': self.spin_from.GetValue() - 1,
+ 'end': self.spin_to.GetValue() - 1,
+ 'translate': self.chk_trans.IsChecked(),
+ 'lang': TARGET_NAMES[self.cmb_lang.GetSelection()]
+ }
+
+class ChatDialog(wx.Dialog):
+ instance = None
+
+ def __init__(self, parent, file_path):
+ # Translators: Title of the chat dialog
+ super().__init__(parent, title=_("Ask about Document"), size=(600, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
+ ChatDialog.instance = self
+ self.file_path = file_path
+ self.file_uri = None
+ self.mime_type = get_mime_type(file_path)
+ self.history = []
+
+ sizer = wx.BoxSizer(wx.VERTICAL)
+ # Translators: Label showing the analyzed file name
+ lbl_info = wx.StaticText(self, label=_("File: {name}").format(name=os.path.basename(file_path)))
+ sizer.Add(lbl_info, 0, wx.ALL, 5)
+ self.display = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
+ sizer.Add(self.display, 1, wx.EXPAND | wx.ALL, 10)
+ # Translators: Status message while uploading
+ self.display.SetValue(_("Uploading to Gemini...\n"))
+
+ input_sizer = wx.BoxSizer(wx.HORIZONTAL)
+ # Translators: Label for the chat input field
+ input_sizer.Add(wx.StaticText(self, label=_("Your Question:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
+ self.input = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
+ self.input.Bind(wx.EVT_TEXT_ENTER, self.on_send)
+ input_sizer.Add(self.input, 1, wx.EXPAND | wx.RIGHT, 5)
+
+ # Translators: Button to send message
+ self.btn_send = wx.Button(self, label=_("Send"))
+ self.btn_send.Bind(wx.EVT_BUTTON, self.on_send)
+ self.btn_send.Disable()
+ input_sizer.Add(self.btn_send, 0)
+ sizer.Add(input_sizer, 0, wx.EXPAND | wx.ALL, 10)
+ self.SetSizer(sizer)
+ self.Bind(wx.EVT_CLOSE, self.on_close)
+ threading.Thread(target=self.init_upload, daemon=True).start()
+
+ def on_close(self, event):
+ ChatDialog.instance = None
+ self.Destroy()
+
+ def init_upload(self):
+ uri = GeminiHandler.upload_for_chat(self.file_path, self.mime_type)
+ if uri and not str(uri).startswith("ERROR:"):
+ self.file_uri = uri
+ wx.CallAfter(self.on_ready)
+ else:
+ err_msg = str(uri)[6:] if uri else _("Upload failed.")
+ wx.CallAfter(show_error_dialog, err_msg)
+ wx.CallAfter(self.Close)
+
+ def on_ready(self):
+ # Translators: Message when ready to chat
+ self.display.AppendText(_("Ready! Ask your questions.\n"))
+ self.btn_send.Enable()
+ self.input.SetFocus()
+
+ def on_send(self, event):
+ msg = self.input.GetValue().strip()
+ if not msg: return
+ self.input.Clear()
+ self.display.AppendText(f"You: {msg}\n")
+ # Translators: Message showing AI is thinking
+ ui.message(_("Thinking..."))
+ threading.Thread(target=self.do_chat, args=(msg,), daemon=True).start()
+
+ def do_chat(self, msg):
+ resp = GeminiHandler.chat(self.history, msg, self.file_uri, self.mime_type)
+
+ if str(resp).startswith("ERROR:"):
+ show_error_dialog(resp[6:])
+ if _vision_assistant_instance:
+ # Translators: Initial status when the add-on is doing nothing
+ _vision_assistant_instance.current_status = _("Idle")
+ return
+
+ self.history.append({"role": "user", "parts": [{"text": msg}]})
+ self.history.append({"role": "model", "parts": [{"text": resp}]})
+ wx.CallAfter(self.display.AppendText, f"AI: {resp}\n\n")
+ # Translators: Spoken prefix for AI response
+ wx.CallAfter(ui.message, _("AI: ") + resp)
+
+class DocumentViewerDialog(wx.Dialog):
+ def __init__(self, parent, virtual_doc, settings):
+ # Translators: Title of the Document Reader window.
+ title_text = f"{ADDON_NAME} - {_('Document Reader')}"
+ super().__init__(parent, title=title_text, size=(800, 600), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.MAXIMIZE_BOX)
+ self.v_doc = virtual_doc
+ self.start_page = settings['start']
+ self.end_page = settings['end']
+ self.do_translate = settings['translate']
+ self.target_lang = settings['lang']
+ self.range_count = self.end_page - self.start_page + 1
+ self.page_cache = {}
+ self.current_page = self.start_page
+ self.thread_pool = ThreadPoolExecutor(max_workers=5)
+
+ self.init_ui()
+ self.Centre()
+ threading.Thread(target=self.start_auto_processing, daemon=True).start()
+
+ def init_ui(self):
+ panel = wx.Panel(self)
+ vbox = wx.BoxSizer(wx.VERTICAL)
+ # Translators: Initial status message
+ self.lbl_status = wx.StaticText(panel, label=_("Initializing..."))
+ vbox.Add(self.lbl_status, 0, wx.ALL, 5)
+ self.txt_content = wx.TextCtrl(panel, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
+ vbox.Add(self.txt_content, 1, wx.EXPAND | wx.LEFT | wx.RIGHT, 10)
+ hbox_nav = wx.BoxSizer(wx.HORIZONTAL)
+ # Translators: Button to go to previous page
+ self.btn_prev = wx.Button(panel, label=_("Previous (Ctrl+PageUp)"))
+ self.btn_prev.Bind(wx.EVT_BUTTON, self.on_prev)
+ hbox_nav.Add(self.btn_prev, 0, wx.RIGHT, 5)
+ # Translators: Button to go to next page
+ self.btn_next = wx.Button(panel, label=_("Next (Ctrl+PageDown)"))
+ self.btn_next.Bind(wx.EVT_BUTTON, self.on_next)
+ hbox_nav.Add(self.btn_next, 0, wx.RIGHT, 15)
+ # Translators: Label for Go To Page
+ hbox_nav.Add(wx.StaticText(panel, label=_("Go to:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
+ choices = [str(i+1) for i in range(self.start_page, self.end_page + 1)]
+ self.cmb_pages = wx.Choice(panel, choices=choices)
+ self.cmb_pages.Bind(wx.EVT_CHOICE, self.on_page_select)
+ hbox_nav.Add(self.cmb_pages, 0, wx.RIGHT, 15)
+ vbox.Add(hbox_nav, 0, wx.ALIGN_CENTER | wx.ALL, 10)
+ hbox_actions = wx.BoxSizer(wx.HORIZONTAL)
+ # Translators: Button to Ask questions about the document
+ self.btn_ask = wx.Button(panel, label=_("Ask AI (Alt+A)"))
+ self.btn_ask.Bind(wx.EVT_BUTTON, self.on_ask)
+ hbox_actions.Add(self.btn_ask, 0, wx.RIGHT, 5)
+
+ # Translators: Button to force re-scan
+ self.btn_gemini = wx.Button(panel, label=_("Re-scan with Gemini (Alt+R)"))
+ self.btn_gemini.Bind(wx.EVT_BUTTON, self.on_gemini_scan)
+ hbox_actions.Add(self.btn_gemini, 0, wx.RIGHT, 5)
+
+ # Translators: Button to generate audio
+ self.btn_tts = wx.Button(panel, label=_("Generate Audio (Alt+G)"))
+ self.btn_tts.Bind(wx.EVT_BUTTON, self.on_tts)
+ hbox_actions.Add(self.btn_tts, 0, wx.RIGHT, 5)
+
+ # Translators: Button to view formatted content
+ self.btn_view = wx.Button(panel, label=_("View Formatted"))
+ self.btn_view.Bind(wx.EVT_BUTTON, self.on_view)
+ hbox_actions.Add(self.btn_view, 0, wx.RIGHT, 5)
+
+ # Translators: Button to save text
+ self.btn_save = wx.Button(panel, label=_("Save (Alt+S)"))
+ self.btn_save.Bind(wx.EVT_BUTTON, self.on_save_all)
+ hbox_actions.Add(self.btn_save, 0)
+
+ vbox.Add(hbox_actions, 0, wx.ALIGN_CENTER | wx.ALL, 5)
+ btn_close = wx.Button(panel, wx.ID_CLOSE, label=_("Close"))
+ btn_close.Bind(wx.EVT_BUTTON, lambda e: self.Destroy())
+ vbox.Add(btn_close, 0, wx.ALIGN_RIGHT | wx.ALL, 10)
+ panel.SetSizer(vbox)
+ accel_tbl = wx.AcceleratorTable([
+ (wx.ACCEL_CTRL, wx.WXK_PAGEDOWN, self.btn_next.GetId()),
+ (wx.ACCEL_CTRL, wx.WXK_PAGEUP, self.btn_prev.GetId()),
+ (wx.ACCEL_CTRL, ord('S'), self.btn_save.GetId()),
+ (wx.ACCEL_ALT, ord('S'), self.btn_save.GetId()),
+ (wx.ACCEL_ALT, ord('A'), self.btn_ask.GetId()),
+ (wx.ACCEL_ALT, ord('R'), self.btn_gemini.GetId()),
+ (wx.ACCEL_ALT, ord('G'), self.btn_tts.GetId())
+ ])
+ self.SetAcceleratorTable(accel_tbl)
+ self.cmb_pages.SetSelection(0)
+ self.update_view()
+ self.txt_content.SetFocus()
+
+ def start_auto_processing(self):
+ engine = config.conf["VisionAssistant"]["ocr_engine"]
+
+ if engine == 'gemini':
+ threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
+ else:
+ for i in range(self.start_page, self.end_page + 1):
+ self.thread_pool.submit(self.process_page_worker, i)
+
+ def process_page_worker(self, page_num):
+ if page_num in self.page_cache: return
+ text = self._get_page_text_logic(page_num)
+ self.page_cache[page_num] = text
+ if page_num == self.current_page:
+ wx.CallAfter(self.update_view)
+ # Translators: Spoken message when the current page is ready
+ wx.CallAfter(ui.message, _("Page {num} ready").format(num=page_num + 1))
+
+ def _get_page_text_logic(self, page_num):
+ file_path, page_idx = self.v_doc.get_page_info(page_num)
+ if not file_path: return ""
+ try:
+ doc = fitz.open(file_path)
+ page = doc.load_page(page_idx)
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+ img_bytes = pix.tobytes("jpg")
+ doc.close()
+ engine = config.conf["VisionAssistant"]["ocr_engine"]
+ text = None
+ if engine == 'gemini':
+ try: text = GeminiHandler.ocr_page(img_bytes)
+ except: text = None
+ if not text or not text.strip() or engine == 'chrome':
+ text = ChromeOCREngine.recognize(img_bytes)
+ if not text or not text.strip():
+ text = SmartProgrammersOCREngine.recognize(img_bytes)
+ if not text or not text.strip():
+ # Translators: Placeholder text when OCR fails
+ text = _("[OCR failed. Try Gemini Re-scan.]")
+ if self.do_translate and text and "[OCR failed" not in text:
+ if engine == 'gemini':
+ text = GeminiHandler.translate(text, self.target_lang)
+ else:
+ text = GoogleTranslator.translate(text, self.target_lang)
+ return text
+ except:
+ # Translators: Error message for page processing failure
+ return _("Error processing page.")
+
+ def update_view(self):
+ rel_page = self.current_page - self.start_page + 1
+ # Translators: Status label format
+ self.lbl_status.SetLabel(_("Page {current} of {total}").format(current=rel_page, total=self.range_count))
+ if self.current_page in self.page_cache:
+ self.txt_content.SetValue(self.page_cache[self.current_page])
+ self.txt_content.SetInsertionPoint(0)
+ self.txt_content.SetFocus()
+ else:
+ # Translators: Status when page is loading
+ self.txt_content.SetValue(_("Processing in background..."))
+ self.txt_content.SetInsertionPoint(0)
+ self.txt_content.SetFocus()
+ self.btn_prev.Enable(self.current_page > self.start_page)
+ self.btn_next.Enable(self.current_page < self.end_page)
+
+ def load_page(self, page_num):
+ if page_num < self.start_page or page_num > self.end_page: return
+ self.current_page = page_num
+ self.cmb_pages.SetSelection(page_num - self.start_page)
+ # Translators: Spoken message when switching pages
+ ui.message(_("Page {num}").format(num=page_num + 1))
+ self.update_view()
+
+ def on_prev(self, event):
+ if self.current_page > self.start_page: self.load_page(self.current_page - 1)
+
+ def on_next(self, event):
+ if self.current_page < self.end_page: self.load_page(self.current_page + 1)
+
+ def on_page_select(self, event):
+ self.load_page(self.start_page + self.cmb_pages.GetSelection())
+
+ def on_view(self, event):
+ full_html = []
+ for i in range(self.start_page, self.end_page + 1):
+ if i in self.page_cache:
+ page_text = self.page_cache[i]
+ page_content = markdown_to_html(page_text, full_page=False)
+ # Translators: Heading for each page in the formatted content view.
+ page_label = _("Page {num}").format(num=i+1)
+ full_html.append(f"{page_label}
")
+ full_html.append(page_content)
+ full_html.append("
")
+
+ if not full_html:
+ text = self.txt_content.GetValue()
+ if not text: return
+ full_html.append(markdown_to_html(text, full_page=False))
+
+ combined_html = "".join(full_html)
+ try:
+ # Translators: Title of the formatted result window
+ ui.browseableMessage(combined_html, _("Formatted Content"), isHtml=True)
+ except Exception as e:
+ show_error_dialog(str(e))
+
+ def on_gemini_scan(self, event):
+ if not config.conf["VisionAssistant"]["api_key"]:
+ wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
+ return
+ menu = wx.Menu()
+ # Translators: Menu option for current page
+ item_curr = menu.Append(wx.ID_ANY, _("Current Page"))
+ # Translators: Menu option for all pages
+ item_all = menu.Append(wx.ID_ANY, _("All Pages (In Range)"))
+ self.Bind(wx.EVT_MENU, self.do_rescan_current, item_curr)
+ self.Bind(wx.EVT_MENU, self.do_rescan_all, item_all)
+ self.PopupMenu(menu)
+ menu.Destroy()
+
+ def do_rescan_current(self, event):
+ if self.current_page in self.page_cache: del self.page_cache[self.current_page]
+ self.update_view()
+ # Translators: Message during manual scan
+ ui.message(_("Scanning with Gemini..."))
+ threading.Thread(target=self.gemini_scan_single_thread, args=(self.current_page,), daemon=True).start()
+
+ def gemini_scan_single_thread(self, page_num):
+ try:
+ file_path, page_idx = self.v_doc.get_page_info(page_num)
+ doc = fitz.open(file_path)
+ page = doc.load_page(page_idx)
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+ text = GeminiHandler.ocr_page(pix.tobytes("jpg"))
+ doc.close()
+ if self.do_translate: text = GeminiHandler.translate(text, self.target_lang)
+ self.page_cache[page_num] = text
+ if self.current_page == page_num:
+ wx.CallAfter(self.update_view)
+ # Translators: Message when scan is complete
+ wx.CallAfter(ui.message, _("Scan complete"))
+ except: pass
+
+ def do_rescan_all(self, event):
+ threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
+
+ def gemini_scan_batch_thread(self):
+ # Translators: Message when batch scan starts
+ msg = _("Batch Processing Started")
+ if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
+ wx.CallAfter(ui.message, msg)
+
+ for i in range(self.start_page, self.end_page + 1):
+ if i in self.page_cache: del self.page_cache[i]
+ wx.CallAfter(self.update_view)
+
+ upload_path = self.v_doc.create_merged_pdf(self.start_page, self.end_page)
+ if not upload_path:
+ # Translators: Error message if PDF creation fails
+ wx.CallAfter(self.lbl_status.SetLabel, _("Error creating temporary PDF."))
+ return
+
+ try:
+ count = (self.end_page - self.start_page) + 1
+ results = GeminiHandler.upload_and_process_batch(upload_path, "application/pdf", count)
+
+ if not results or (len(results) == 1 and str(results[0]).startswith("ERROR:")):
+ err_msg = results[0][6:] if results else _("Unknown error")
+ # Translators: Message reported when batch scan fails
+ error_text = _("Scan failed: {err}").format(err=err_msg)
+ for i in range(self.start_page, self.end_page + 1):
+ self.page_cache[i] = error_text
+
+ wx.CallAfter(self.update_view)
+ wx.CallAfter(ui.message, error_text)
+ return
+
+ for i, text_part in enumerate(results):
+ if i >= count: break
+ idx = self.start_page + i
+ clean = text_part.strip()
+ if self.do_translate:
+ clean = GeminiHandler.translate(clean, self.target_lang)
+ self.page_cache[idx] = clean
+
+ wx.CallAfter(self.update_view)
+ # Translators: Message when batch scan is complete
+ final_msg = _("Batch Scan Complete")
+ if _vision_assistant_instance:
+ # Translators: Initial status when the add-on is doing nothing
+ _vision_assistant_instance.current_status = _("Idle")
+ wx.CallAfter(ui.message, final_msg)
+ finally:
+ if upload_path and os.path.exists(upload_path):
+ try: os.remove(upload_path)
+ except: pass
+
+ def on_tts(self, event):
+ if not config.conf["VisionAssistant"]["api_key"]:
+ wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
+ return
+ menu = wx.Menu()
+ # Translators: Menu option for TTS current page
+ item_curr = menu.Append(wx.ID_ANY, _("Generate for Current Page"))
+ # Translators: Menu option for TTS all pages
+ item_all = menu.Append(wx.ID_ANY, _("Generate for All Pages (In Range)"))
+ self.Bind(wx.EVT_MENU, self.do_tts_current, item_curr)
+ self.Bind(wx.EVT_MENU, self.do_tts_all, item_all)
+ self.PopupMenu(menu)
+ menu.Destroy()
+
+ def do_tts_current(self, event):
+ text = self.txt_content.GetValue().strip()
+ if not text:
+ # Translators: Error message when text field is empty
+ wx.MessageBox(_("No text to read."), "Error")
+ return
+ self._save_tts(text)
+
+ def do_tts_all(self, event):
+ threading.Thread(target=self.tts_batch_thread, daemon=True).start()
+
+ def tts_batch_thread(self):
+ full_text = []
+ # Translators: Message while gathering text
+ wx.CallAfter(ui.message, _("Gathering text for audio..."))
+ for i in range(self.start_page, self.end_page + 1):
+ while i not in self.page_cache: time.sleep(0.1)
+ full_text.append(self.page_cache[i])
+ final_text = "\n".join(full_text).strip()
+ if not final_text: return
+ wx.CallAfter(self._save_tts, final_text)
+
+ def _save_tts(self, text):
+ # Translators: File dialog title for saving audio
+ path = get_file_path(_("Save Audio"), "MP3 Files (*.mp3)|*.mp3|WAV Files (*.wav)|*.wav", mode="save")
+ if path:
+ voice = config.conf["VisionAssistant"]["tts_voice"]
+ threading.Thread(target=self.tts_worker, args=(text, voice, path), daemon=True).start()
+
+ def tts_worker(self, text, voice, path):
+ # Translators: Message while generating audio
+ msg = _("Generating Audio...")
+ if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
+ wx.CallAfter(ui.message, msg)
+ try:
+ audio_b64 = GeminiHandler.generate_speech(text, voice)
+ if not audio_b64 or len(audio_b64) < 100:
+ wx.CallAfter(wx.MessageBox, f"TTS Error: {audio_b64}", "Error", wx.ICON_ERROR)
+ return
+ missing_padding = len(audio_b64) % 4
+ if missing_padding: audio_b64 += '=' * (4 - missing_padding)
+ pcm_data = base64.b64decode(audio_b64)
+
+ if path.lower().endswith(".mp3"):
+ import subprocess
+ lame_path = os.path.join(os.path.dirname(__file__), "lib", "lame.exe")
+ if not os.path.exists(lame_path):
+ wx.CallAfter(wx.MessageBox, _("lame.exe not found in lib folder."), "Error", wx.ICON_ERROR)
+ return
+
+ process = subprocess.Popen(
+ [lame_path, "-r", "-s", "24", "-m", "m", "-b", "128", "--bitwidth", "16", "--resample", "24", "-q", "0", "-", path],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ creationflags=getattr(subprocess, 'CREATE_NO_WINDOW', 0)
+ )
+
+ process.communicate(input=pcm_data)
+ else:
+ with wave.open(path, "wb") as wf:
+ wf.setnchannels(1)
+ wf.setsampwidth(2)
+ wf.setframerate(24000)
+ wf.writeframes(pcm_data)
+
+ # Translators: Spoken message when audio is saved
+ res_msg = _("Audio Saved")
+ if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
+ wx.CallAfter(ui.message, res_msg)
+ wx.CallAfter(wx.MessageBox, _("Audio file generated and saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
+ except Exception as e:
+ if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
+ wx.CallAfter(wx.MessageBox, f"TTS Error: {e}", "Error", wx.ICON_ERROR)
+
+ def on_ask(self, event):
+ if not config.conf["VisionAssistant"]["api_key"]:
+ wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
+ return
+ if ChatDialog.instance:
+ ChatDialog.instance.Raise()
+ ChatDialog.instance.SetFocus()
+ return
+ file_path, _ = self.v_doc.get_page_info(self.current_page)
+ if file_path:
+ dlg = ChatDialog(self, file_path)
+ dlg.Show()
+
+ def on_save_all(self, event):
+ # Translators: File dialog filter for saving text/html
+ wildcard = "Text File (*.txt)|*.txt|HTML File (*.html)|*.html"
+ # Translators: File dialog title for saving
+ path = get_file_path(_("Save"), wildcard, mode="save")
+ if path:
+ is_html = path.lower().endswith('.html')
+ self.btn_save.Disable()
+ threading.Thread(target=self.save_thread, args=(path, is_html), daemon=True).start()
+
+ def save_thread(self, path, is_html):
+ full_content = []
+ try:
+ for i in range(self.start_page, self.end_page + 1):
+ # Translators: Message showing save progress
+ wx.CallAfter(self.lbl_status.SetLabel, _("Saving Page {num}...").format(num=i+1))
+ while i not in self.page_cache: time.sleep(0.1)
+ txt = self.page_cache[i]
+ if is_html:
+ h = markdown_to_html(txt)
+ if "" in h: h = h.split("")[1].split("")[0]
+ full_content.append(f"
Page {i+1}
{h}")
+ else:
+ full_content.append(f"--- Page {i+1} ---\n{txt}\n")
+ with open(path, "w", encoding="utf-8") as f:
+ if is_html: f.write(f"{''.join(full_content)}")
+ else: f.write("\n".join(full_content))
+ # Translators: Status label when save is complete
+ wx.CallAfter(self.lbl_status.SetLabel, _("Saved"))
+ # Translators: Message box content for successful save
+ wx.CallAfter(wx.MessageBox, _("File saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
+ except Exception as e:
+ wx.CallAfter(wx.MessageBox, f"Save Error: {e}", "Error", wx.ICON_ERROR)
+ finally: wx.CallAfter(self.btn_save.Enable)
diff --git a/addon/globalPlugins/visionAssistant/markdown_utils.py b/addon/globalPlugins/visionAssistant/markdown_utils.py
new file mode 100644
index 0000000..ba438a6
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/markdown_utils.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+
+def clean_markdown(text):
+ if not text:
+ return ""
+ text = re.sub(r'\*\*|__|[*_]', '', text)
+ text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE)
+ text = re.sub(r'```', '', text)
+ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
+ text = re.sub(r'^\s*-\s+', '', text, flags=re.MULTILINE)
+ return text.strip()
+
+
+def markdown_to_html(text, full_page=False):
+ if not text:
+ return ""
+
+ html = text.replace("&", "&").replace("<", "<").replace(">", ">")
+ html = re.sub(r'\*\*(.*?)\*\*', r'\1', html)
+ html = re.sub(r'__(.*?)__', r'\1', html)
+ html = re.sub(r'^### (.*)', r'\1
', html, flags=re.M)
+ html = re.sub(r'^## (.*)', r'\1
', html, flags=re.M)
+ html = re.sub(r'^# (.*)', r'\1
', html, flags=re.M)
+
+ lines = html.split('\n')
+ in_table = False
+ new_lines = []
+ table_style = 'border="1" style="border-collapse: collapse; width: 100%; margin-bottom: 10px;"'
+ td_style = 'style="padding: 5px; border: 1px solid #ccc;"'
+
+ for line in lines:
+ stripped = line.strip()
+ if stripped.startswith('|') or (stripped.count('|') > 1 and len(stripped) > 5):
+ if not in_table:
+ new_lines.append(f'')
+ in_table = True
+ if '---' in stripped:
+ continue
+ row_content = stripped.strip('|').split('|')
+ cells = "".join([f'| {c.strip()} | ' for c in row_content])
+ new_lines.append(f'{cells}
')
+ else:
+ if in_table:
+ new_lines.append('
')
+ in_table = False
+ if stripped:
+ new_lines.append(line + "
")
+ else:
+ new_lines.append("
")
+ if in_table:
+ new_lines.append('')
+ html_body = "".join(new_lines)
+
+ if not full_page:
+ return html_body
+ return f"""{html_body}"""
diff --git a/addon/globalPlugins/visionAssistant/prompt_helpers.py b/addon/globalPlugins/visionAssistant/prompt_helpers.py
new file mode 100644
index 0000000..64d8c01
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/prompt_helpers.py
@@ -0,0 +1,283 @@
+# -*- coding: utf-8 -*-
+
+import json
+import logging
+
+import addonHandler
+import config
+
+from .constants import DEFAULT_SYSTEM_PROMPTS, LEGACY_REFINER_TOKENS, REFINE_PROMPT_KEYS
+
+addonHandler.initTranslation()
+log = logging.getLogger(__name__)
+
+
+def get_builtin_default_prompts():
+ builtins = []
+ for item in DEFAULT_SYSTEM_PROMPTS:
+ p = str(item["prompt"]).strip()
+ builtins.append({
+ "key": item["key"],
+ "section": item["section"],
+ "label": item["label"],
+ "display_label": f"{item['section']} - {item['label']}",
+ "internal": bool(item.get("internal")),
+ "prompt": p,
+ "default": p,
+ })
+ return builtins
+
+
+def get_builtin_default_prompt_map():
+ return {item["key"]: item for item in get_builtin_default_prompts()}
+
+
+def _normalize_custom_prompt_items(items):
+ normalized = []
+ if not isinstance(items, list):
+ return normalized
+
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ name = item.get("name")
+ content = item.get("content")
+ if not isinstance(name, str) or not isinstance(content, str):
+ continue
+ name = name.strip()
+ content = content.strip()
+ if name and content:
+ normalized.append({"name": name, "content": content})
+ return normalized
+
+
+def parse_custom_prompts_legacy(raw_value):
+ items = []
+ if not raw_value:
+ return items
+
+ normalized = raw_value.replace("\r\n", "\n").replace("\r", "\n")
+ for line in normalized.split("\n"):
+ for segment in line.split("|"):
+ segment = segment.strip()
+ if not segment or ":" not in segment:
+ continue
+ name, content = segment.split(":", 1)
+ name = name.strip()
+ content = content.strip()
+ if name and content:
+ items.append({"name": name, "content": content})
+ return items
+
+
+def parse_custom_prompts_v2(raw_value):
+ if not isinstance(raw_value, str) or not raw_value.strip():
+ return None
+ try:
+ data = json.loads(raw_value)
+ except Exception as e:
+ log.warning(f"Invalid custom_prompts_v2 config, falling back to legacy format: {e}")
+ return None
+ return _normalize_custom_prompt_items(data)
+
+
+def serialize_custom_prompts_v2(items):
+ normalized = _normalize_custom_prompt_items(items)
+ if not normalized:
+ return ""
+ return json.dumps(normalized, ensure_ascii=False)
+
+
+def load_configured_custom_prompts():
+ try:
+ raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
+ except Exception:
+ raw_v2 = ""
+ items_v2 = parse_custom_prompts_v2(raw_v2)
+ if items_v2 is not None:
+ return items_v2
+ return parse_custom_prompts_legacy(config.conf["VisionAssistant"]["custom_prompts"])
+
+
+def _sanitize_default_prompt_overrides(data):
+ if not isinstance(data, dict):
+ return {}, False
+
+ changed = False
+ mutable = dict(data)
+ # Migrate old key used in previous versions.
+ legacy_vision = mutable.pop("vision_image_analysis", None)
+ if legacy_vision is not None:
+ changed = True
+ if isinstance(legacy_vision, str) and legacy_vision.strip():
+ legacy_text = legacy_vision.strip()
+ nav_value = mutable.get("vision_navigator_object")
+ if not isinstance(nav_value, str) or not nav_value.strip():
+ mutable["vision_navigator_object"] = legacy_text
+ changed = True
+ full_value = mutable.get("vision_fullscreen")
+ if not isinstance(full_value, str) or not full_value.strip():
+ mutable["vision_fullscreen"] = legacy_text
+ changed = True
+
+ valid_keys = set(get_builtin_default_prompt_map().keys())
+ sanitized = {}
+ for key, value in mutable.items():
+ if key not in valid_keys or not isinstance(value, str):
+ changed = True
+ continue
+ prompt_text = value.strip()
+ if not prompt_text:
+ changed = True
+ continue
+ if key in LEGACY_REFINER_TOKENS and prompt_text == LEGACY_REFINER_TOKENS[key]:
+ # Drop old token-only overrides and fallback to current built-ins.
+ changed = True
+ continue
+ if prompt_text != value:
+ changed = True
+ sanitized[key] = prompt_text
+ return sanitized, changed
+
+
+def migrate_prompt_config_if_needed():
+ changed = False
+
+ try:
+ raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
+ except Exception:
+ raw_v2 = ""
+ raw_legacy = config.conf["VisionAssistant"]["custom_prompts"]
+
+ v2_items = parse_custom_prompts_v2(raw_v2)
+ if v2_items is None:
+ target_items = parse_custom_prompts_legacy(raw_legacy)
+ else:
+ target_items = v2_items
+
+ serialized_v2 = serialize_custom_prompts_v2(target_items)
+ if serialized_v2 != (raw_v2 or ""):
+ config.conf["VisionAssistant"]["custom_prompts_v2"] = serialized_v2
+ changed = True
+
+ # Legacy mirror is disabled. Clear old storage to prevent stale fallback data.
+ if raw_legacy:
+ config.conf["VisionAssistant"]["custom_prompts"] = ""
+ changed = True
+
+ try:
+ raw_defaults = config.conf["VisionAssistant"]["default_refine_prompts"]
+ except Exception:
+ raw_defaults = ""
+ if isinstance(raw_defaults, str) and raw_defaults.strip():
+ try:
+ defaults_data = json.loads(raw_defaults)
+ except Exception:
+ defaults_data = None
+ if isinstance(defaults_data, dict):
+ sanitized, migrated = _sanitize_default_prompt_overrides(defaults_data)
+ if migrated:
+ config.conf["VisionAssistant"]["default_refine_prompts"] = (
+ json.dumps(sanitized, ensure_ascii=False) if sanitized else ""
+ )
+ changed = True
+
+ return changed
+
+
+def load_default_prompt_overrides():
+ try:
+ raw = config.conf["VisionAssistant"]["default_refine_prompts"]
+ except Exception:
+ raw = ""
+ if not isinstance(raw, str) or not raw.strip():
+ return {}
+
+ try:
+ data = json.loads(raw)
+ except Exception as e:
+ log.warning(f"Invalid default_refine_prompts config, using built-ins: {e}")
+ return {}
+
+ overrides, _ = _sanitize_default_prompt_overrides(data)
+ return overrides
+
+
+def get_configured_default_prompt_map():
+ prompt_map = get_builtin_default_prompt_map()
+ overrides = load_default_prompt_overrides()
+ for key, override in overrides.items():
+ if key not in prompt_map:
+ continue
+ if key in LEGACY_REFINER_TOKENS and override == LEGACY_REFINER_TOKENS[key]:
+ continue
+ prompt_map[key]["prompt"] = override
+ return prompt_map
+
+
+def get_configured_default_prompts():
+ prompt_map = get_configured_default_prompt_map()
+ items = []
+ for item in DEFAULT_SYSTEM_PROMPTS:
+ if item.get("internal"):
+ continue
+ key = item["key"]
+ if key in prompt_map:
+ items.append(dict(prompt_map[key]))
+ items.sort(key=lambda item: item.get("display_label", "").casefold())
+ return items
+
+
+def get_prompt_text(prompt_key):
+ prompt_map = get_configured_default_prompt_map()
+ item = prompt_map.get(prompt_key)
+ if item:
+ return item["prompt"]
+ return ""
+
+
+def serialize_default_prompt_overrides(items):
+ if not items:
+ return ""
+
+ base_map = {item["key"]: item["prompt"] for item in get_builtin_default_prompts()}
+ overrides = {}
+ for item in items:
+ key = item.get("key")
+ prompt_text = item.get("prompt", "")
+ if key not in base_map:
+ continue
+ if not isinstance(prompt_text, str):
+ continue
+ prompt_text = prompt_text.strip()
+ if prompt_text and prompt_text != base_map[key]:
+ overrides[key] = prompt_text
+
+ if not overrides:
+ return ""
+ return json.dumps(overrides, ensure_ascii=False)
+
+
+def get_refine_menu_options():
+ options = []
+ prompt_map = get_configured_default_prompt_map()
+ for key in REFINE_PROMPT_KEYS:
+ item = prompt_map.get(key)
+ if item:
+ options.append((item["label"], item["prompt"]))
+
+ for item in load_configured_custom_prompts():
+ # Translators: Prefix for custom prompts in the Refine menu
+ options.append((_("Custom: ") + item["name"], item["content"]))
+ return options
+
+
+def apply_prompt_template(template, replacements):
+ if not isinstance(template, str):
+ return ""
+
+ text = template
+ for key, value in replacements:
+ text = text.replace("{" + key + "}", str(value))
+
+ return text.strip()
diff --git a/addon/globalPlugins/visionAssistant/services.py b/addon/globalPlugins/visionAssistant/services.py
new file mode 100644
index 0000000..b944d78
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/services.py
@@ -0,0 +1,586 @@
+# -*- coding: utf-8 -*-
+
+import os
+import json
+import base64
+import tempfile
+import time
+import ctypes
+import re
+import logging
+
+import wx
+from urllib import request, error, parse
+from urllib.parse import quote, urlencode
+from http import cookiejar
+from uuid import uuid4
+
+try:
+ import fitz
+except ImportError:
+ fitz = None
+
+import addonHandler
+import config
+import gui
+
+from .constants import ADDON_NAME, CHROME_OCR_KEYS, TARGET_CODES
+from .prompt_helpers import apply_prompt_template, get_prompt_text
+
+log = logging.getLogger(__name__)
+addonHandler.initTranslation()
+
+def get_mime_type(path):
+ ext = os.path.splitext(path)[1].lower()
+ if ext == '.pdf': return 'application/pdf'
+ if ext in ['.jpg', '.jpeg']: return 'image/jpeg'
+ if ext == '.png': return 'image/png'
+ if ext == '.webp': return 'image/webp'
+ if ext in ['.tif', '.tiff']: return 'image/jpeg'
+ if ext == '.mp3': return 'audio/mpeg'
+ if ext == '.wav': return 'audio/wav'
+ if ext == '.ogg': return 'audio/ogg'
+ if ext == '.mp4': return 'video/mp4'
+ return 'application/octet-stream'
+
+def show_error_dialog(message):
+ # Translators: Title of the error dialog box
+ title = _("{name} Error").format(name=ADDON_NAME)
+ wx.CallAfter(gui.messageBox, message, title, wx.OK | wx.ICON_ERROR)
+
+def send_ctrl_v():
+ try:
+ user32 = ctypes.windll.user32
+ VK_CONTROL = 0x11; VK_V = 0x56; KEYEVENTF_KEYUP = 0x0002
+ user32.keybd_event(VK_CONTROL, 0, 0, 0)
+ user32.keybd_event(VK_V, 0, 0, 0)
+ user32.keybd_event(VK_V, 0, KEYEVENTF_KEYUP, 0)
+ user32.keybd_event(VK_CONTROL, 0, KEYEVENTF_KEYUP, 0)
+ return True
+ except Exception:
+ log.warning("Failed to send Ctrl+V", exc_info=True)
+ return False
+
+def get_proxy_opener():
+ proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+ if proxy_url:
+ if "127.0.0.1" in proxy_url or "localhost" in proxy_url or ":" in proxy_url.split("/")[-1]:
+ handler = request.ProxyHandler({'http': proxy_url, 'https': proxy_url})
+ return request.build_opener(handler)
+ return request.build_opener()
+
+def get_twitter_download_link(tweet_url):
+ cj = cookiejar.CookieJar()
+ opener = request.build_opener(request.HTTPCookieProcessor(cj))
+ base_url = "https://savetwitter.net/en4"
+ api_url = "https://savetwitter.net/api/ajaxSearch"
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Referer': base_url}
+ try:
+ req_init = request.Request(base_url, headers=headers)
+ opener.open(req_init)
+ params = {'q': tweet_url, 'lang': 'en', 'cftoken': ''}
+ data = urlencode(params).encode('utf-8')
+ req_post = request.Request(api_url, data=data, headers=headers, method='POST')
+ with opener.open(req_post) as response:
+ res_data = json.loads(response.read().decode('utf-8'))
+ if res_data.get('status') == 'ok':
+ html = res_data.get('data', '')
+ match = re.search(r'href="(https?://dl\.snapcdn\.app/[^"]+)"', html)
+ if match: return match.group(1)
+ except Exception:
+ log.warning("Failed to fetch Twitter download link", exc_info=True)
+ return None
+
+def get_instagram_download_link(insta_url):
+ cj = cookiejar.CookieJar()
+ opener = request.build_opener(request.HTTPCookieProcessor(cj))
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': 'https://anon-viewer.com/',
+ 'Accept': '*/*'
+ }
+ opener.addheaders = list(headers.items())
+ try:
+ opener.open("https://anon-viewer.com/", timeout=30)
+
+ if "/stories/" in insta_url:
+ parts = insta_url.split("/")
+ username = parts[parts.index("stories") + 1]
+ api_url = f"https://anon-viewer.com/content.php?url={username}&method=allstories"
+ else:
+ encoded_url = quote(insta_url, safe='')
+ api_url = f"https://anon-viewer.com/content.php?url={encoded_url}"
+
+ response = opener.open(api_url, timeout=60)
+ if response.getcode() == 200:
+ res_content = response.read().decode('utf-8')
+ data = json.loads(res_content)
+ html_text = data.get('html', '')
+
+ match = re.search(r'href="([^"]+anon-viewer\.com/media\.php\?media=[^"]+)"', html_text)
+ if match:
+ return match.group(1).replace('&', '&')
+
+ source_match = re.search(r' 0:
+ result_parts = [x[0] for x in data[0] if x[0]]
+ return "".join(result_parts)
+ except Exception as e:
+ log.error(f"Google Translate Failed: {e}", exc_info=True)
+ return text
+ return text
+
+class GeminiHandler:
+ _working_key_idx = 0
+ _file_uri_keys = {}
+ _max_retries = 5
+
+ @staticmethod
+ def _get_api_keys():
+ raw = config.conf["VisionAssistant"]["api_key"]
+ clean_raw = raw.replace('\r\n', ',').replace('\n', ',')
+ return [k.strip() for k in clean_raw.split(',') if k.strip()]
+
+ @staticmethod
+ def _get_opener():
+ return get_proxy_opener()
+
+ @staticmethod
+ def _handle_error(e):
+ if hasattr(e, 'code'):
+ # Translators: Error message for Bad Request (400)
+ if e.code == 400: return _("Error 400: Bad Request (Check API Key)")
+ # Translators: Error message for Forbidden (403)
+ if e.code == 403: return _("Error 403: Forbidden (Check Region)")
+ if e.code == 429: return "QUOTA_EXCEEDED"
+ if e.code >= 500: return "SERVER_ERROR"
+ return str(e)
+
+ @staticmethod
+ def _call_with_retry(func_logic, key, *args):
+ last_exc = None
+ for attempt in range(GeminiHandler._max_retries):
+ try:
+ return func_logic(key, *args)
+ except error.HTTPError as e:
+ err_msg = GeminiHandler._handle_error(e)
+ if err_msg not in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
+ raise
+ last_exc = e
+ except error.URLError as e:
+ last_exc = e
+ if attempt < GeminiHandler._max_retries - 1:
+ time.sleep(0.5 * (attempt + 1))
+ raise last_exc
+
+ @staticmethod
+ def _register_file_uri(uri, key):
+ if uri and key:
+ GeminiHandler._file_uri_keys[uri] = key
+ while len(GeminiHandler._file_uri_keys) > 200:
+ GeminiHandler._file_uri_keys.pop(next(iter(GeminiHandler._file_uri_keys)))
+
+ @staticmethod
+ def _get_registered_key(uri):
+ if not uri:
+ return None
+ return GeminiHandler._file_uri_keys.get(uri)
+
+ @staticmethod
+ def _call_with_key(func_logic, key, *args):
+ try:
+ return GeminiHandler._call_with_retry(func_logic, key, *args)
+ except error.HTTPError as e:
+ err_msg = GeminiHandler._handle_error(e)
+ if err_msg == "QUOTA_EXCEEDED":
+ # Translators: Message of a dialog which may pop up while performing an AI call
+ err_msg = _("Error 429: Quota Exceeded (Try later)")
+ elif err_msg == "SERVER_ERROR":
+ # Translators: Message of a dialog which may pop up while performing an AI call
+ err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
+ return "ERROR:" + err_msg
+ except Exception as e:
+ return "ERROR:" + str(e)
+
+ @staticmethod
+ def _call_with_rotation(func_logic, *args):
+ keys = GeminiHandler._get_api_keys()
+ if not keys:
+ # Translators: Error when no API keys are found in settings
+ return "ERROR:" + _("No API Keys configured.")
+
+ num_keys = len(keys)
+ for i in range(num_keys):
+ idx = (GeminiHandler._working_key_idx + i) % num_keys
+ key = keys[idx]
+ try:
+ res = GeminiHandler._call_with_retry(func_logic, key, *args)
+ GeminiHandler._working_key_idx = idx
+ return res
+ except error.HTTPError as e:
+ err_msg = GeminiHandler._handle_error(e)
+ if err_msg in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
+ if i < num_keys - 1: continue
+ # Translators: Error when all available API keys fail
+ return "ERROR:" + _("All API Keys failed (Quota/Server).")
+ return "ERROR:" + err_msg
+ except Exception as e:
+ return "ERROR:" + str(e)
+ return "ERROR:" + _("Unknown error occurred.")
+
+ @staticmethod
+ def translate(text, target_lang):
+ def _logic(key, txt, lang):
+ model = config.conf["VisionAssistant"]["model_name"]
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
+ quick_template = get_prompt_text("translate_quick") or "Translate to {target_lang}. Output ONLY translation."
+ quick_prompt = apply_prompt_template(quick_template, [("target_lang", lang)])
+ payload = {"contents": [{"parts": [{"text": quick_prompt}, {"text": txt}]}]}
+ req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+ with GeminiHandler._get_opener().open(req, timeout=90) as r:
+ return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
+ return GeminiHandler._call_with_rotation(_logic, text, target_lang)
+
+ @staticmethod
+ def ocr_page(image_bytes):
+ def _logic(key, img_data):
+ model = config.conf["VisionAssistant"]["model_name"]
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
+ ocr_image_prompt = get_prompt_text("ocr_image_extract")
+ payload = {"contents": [{"parts": [{"inline_data": {"mime_type": "image/jpeg", "data": base64.b64encode(img_data).decode('utf-8')}}, {"text": ocr_image_prompt}]}]}
+ req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+ with GeminiHandler._get_opener().open(req, timeout=120) as r:
+ return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
+ return GeminiHandler._call_with_rotation(_logic, image_bytes)
+
+ @staticmethod
+ def upload_and_process_batch(file_path, mime_type, page_count):
+ keys = GeminiHandler._get_api_keys()
+ if not keys:
+ # Translators: Error message for missing API Keys
+ return [ "ERROR:" + _("No API Keys.") ]
+ model = config.conf["VisionAssistant"]["model_name"]
+
+ opener = GeminiHandler._get_opener()
+ proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+ base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+
+ for i, key in enumerate(keys):
+ try:
+ f_size = os.path.getsize(file_path)
+ init_url = f"{base_url}/upload/v1beta/files"
+ headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
+
+ req = request.Request(init_url, data=json.dumps({"file": {"display_name": "batch"}}).encode(), headers=headers, method="POST")
+ with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
+
+ with open(file_path, 'rb') as f: f_data = f.read()
+ req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
+ with opener.open(req_up, timeout=180) as r:
+ res = json.loads(r.read().decode())
+ uri, name = res['file']['uri'], res['file']['name']
+
+ active = False
+ for attempt in range(30):
+ req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
+ with opener.open(req_check, timeout=30) as r:
+ state = json.loads(r.read().decode()).get('state')
+ if state == "ACTIVE":
+ active = True
+ break
+ if state == "FAILED":
+ break
+ time.sleep(2)
+
+ if not active:
+ if i < len(keys) - 1:
+ continue
+ return [ "ERROR:" + _("Upload failed.") ]
+
+ GeminiHandler._register_file_uri(uri, key)
+
+ url = f"{base_url}/v1beta/models/{model}:generateContent"
+ prompt = get_prompt_text("ocr_document_extract")
+ contents = [{"parts": [{"file_data": {"mime_type": mime_type, "file_uri": uri}}, {"text": prompt}]}]
+
+ req_gen = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+ with opener.open(req_gen, timeout=180) as r:
+ res = json.loads(r.read().decode())
+ text = res['candidates'][0]['content']['parts'][0]['text']
+ return text.split('[[[PAGE_SEP]]]')
+
+ except error.HTTPError as e:
+ err_code = GeminiHandler._handle_error(e)
+ if err_code in ["QUOTA_EXCEEDED", "SERVER_ERROR"] and i < len(keys) - 1:
+ continue
+ if err_code == "QUOTA_EXCEEDED":
+ # Translators: Message of a dialog which may pop up while performing an AI call
+ err_msg = _("Error 429: Quota Exceeded (Try later)")
+ elif err_code == "SERVER_ERROR":
+ # Translators: Message of a dialog which may pop up while performing an AI call
+ err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
+ else:
+ err_msg = err_code
+ return ["ERROR:" + err_msg]
+ except Exception as e:
+ return ["ERROR:" + str(e)]
+ return ["ERROR:" + _("All keys failed.")]
+
+ @staticmethod
+ def chat(history, new_msg, file_uri, mime_type):
+ def _logic(key, hist, msg, uri, mime):
+ model = config.conf["VisionAssistant"]["model_name"]
+ proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+ base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+ url = f"{base_url}/v1beta/models/{model}:generateContent"
+
+ contents = list(hist)
+ if uri:
+ user_parts = [{"file_data": {"mime_type": mime, "file_uri": uri}}]
+ else:
+ user_parts = []
+ user_parts.append({"text": msg})
+ contents.append({"role": "user", "parts": user_parts})
+
+ req = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+ with GeminiHandler._get_opener().open(req, timeout=120) as r:
+ return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
+ forced_key = GeminiHandler._get_registered_key(file_uri) if file_uri else None
+ if forced_key:
+ return GeminiHandler._call_with_key(_logic, forced_key, history, new_msg, file_uri, mime_type)
+ return GeminiHandler._call_with_rotation(_logic, history, new_msg, file_uri, mime_type)
+
+ @staticmethod
+ def upload_for_chat(file_path, mime_type):
+ keys = GeminiHandler._get_api_keys()
+ if not keys: return None
+ opener = GeminiHandler._get_opener()
+ proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+ base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+
+ for key in keys:
+ try:
+ f_size = os.path.getsize(file_path)
+ init_url = f"{base_url}/upload/v1beta/files"
+ headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
+ req = request.Request(init_url, data=json.dumps({"file": {"display_name": os.path.basename(file_path)}}).encode(), headers=headers, method="POST")
+ with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
+ with open(file_path, 'rb') as f: f_data = f.read()
+ req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
+ with opener.open(req_up, timeout=180) as r:
+ res = json.loads(r.read().decode())
+ uri, name = res['file']['uri'], res['file']['name']
+ for attempt in range(30):
+ req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
+ with opener.open(req_check, timeout=30) as r:
+ state = json.loads(r.read().decode()).get('state')
+ if state == "ACTIVE":
+ GeminiHandler._register_file_uri(uri, key)
+ return uri
+ time.sleep(2)
+ return None
+ except Exception:
+ log.debug("Failed to upload file for chat with current key", exc_info=True)
+ continue
+ return None
+
+ @staticmethod
+ def generate_speech(text, voice_name):
+ def _logic(key, txt, voice):
+ main_model = config.conf["VisionAssistant"]["model_name"]
+ if "pro" in main_model.lower():
+ tts_model = "gemini-2.5-pro-preview-tts"
+ else:
+ tts_model = "gemini-2.5-flash-preview-tts"
+
+ proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+ base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+ url = f"{base_url}/v1beta/models/{tts_model}:generateContent"
+
+ payload = {
+ "contents": [{"parts": [{"text": txt}]}],
+ "generationConfig": {
+ "responseModalities": ["AUDIO"],
+ "speechConfig": {"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": voice}}}
+ }
+ }
+ req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+ with GeminiHandler._get_opener().open(req, timeout=600) as r:
+ res = json.loads(r.read().decode())
+ candidates = res.get('candidates', [])
+ if not candidates: raise Exception("No candidates returned")
+ content = candidates[0].get('content', {})
+ parts = content.get('parts', [])
+ if not parts: raise Exception("No parts in response")
+ part = parts[0]
+ if 'inlineData' in part: return part['inlineData']['data']
+ if 'inline_data' in part: return part['inline_data']['data']
+ if 'text' in part: raise Exception(f"Model refused audio: {part['text']}")
+ raise Exception("Unknown response format")
+ return GeminiHandler._call_with_rotation(_logic, text, voice_name)
diff --git a/addon/globalPlugins/visionAssistant/updater.py b/addon/globalPlugins/visionAssistant/updater.py
new file mode 100644
index 0000000..16e899d
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/updater.py
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+import json
+import threading
+import logging
+import os
+import re
+import tempfile
+
+import wx
+from urllib import request
+
+import addonHandler
+import gui
+import ui
+from .constants import ADDON_NAME
+from .markdown_utils import clean_markdown
+from .services import show_error_dialog
+
+log = logging.getLogger(__name__)
+addonHandler.initTranslation()
+
+class UpdateDialog(wx.Dialog):
+ def __init__(self, parent, version, name, changes):
+ # Translators: Title of update confirmation dialog
+ super().__init__(parent, title=_("Update Available"), size=(500, 450))
+ self.Centre()
+
+ panel = wx.Panel(self)
+ vbox = wx.BoxSizer(wx.VERTICAL)
+
+ # Translators: Message asking user to update. {version} is version number.
+ msg = _("A new version ({version}) of {name} is available.").format(version=version, name=name)
+ header = wx.StaticText(panel, label=msg)
+ vbox.Add(header, 0, wx.ALL, 15)
+
+ # Translators: Label for the changes text box
+ change_lbl = wx.StaticText(panel, label=_("Changes:"))
+ vbox.Add(change_lbl, 0, wx.LEFT | wx.RIGHT, 15)
+
+ self.changes_ctrl = wx.TextCtrl(panel, value=changes, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
+ vbox.Add(self.changes_ctrl, 1, wx.EXPAND | wx.ALL, 15)
+
+ # Translators: Question to download and install
+ question = wx.StaticText(panel, label=_("Download and Install?"))
+ vbox.Add(question, 0, wx.LEFT | wx.RIGHT | wx.BOTTOM, 15)
+
+ btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
+ # Translators: Button to accept update
+ self.yes_btn = wx.Button(panel, wx.ID_YES, label=_("&Yes"))
+ # Translators: Button to reject update
+ self.no_btn = wx.Button(panel, wx.ID_NO, label=_("&No"))
+
+ btn_sizer.Add(self.yes_btn, 0, wx.RIGHT, 10)
+ btn_sizer.Add(self.no_btn, 0)
+ vbox.Add(btn_sizer, 0, wx.ALIGN_RIGHT | wx.ALL, 15)
+
+ panel.SetSizer(vbox)
+ self.yes_btn.SetDefault()
+ self.yes_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_YES))
+ self.no_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_NO))
+
+class UpdateManager:
+ def __init__(self, repo_name):
+ self.repo_name = repo_name
+ self.current_version = addonHandler.getCodeAddon().manifest['version']
+
+ def check_for_updates(self, silent=True):
+ threading.Thread(target=self._check_thread, args=(silent,), daemon=True).start()
+
+ def _check_thread(self, silent):
+ try:
+ url = f"https://api.github.com/repos/{self.repo_name}/releases/latest"
+ req = request.Request(url, headers={"User-Agent": "NVDA-Addon"})
+ with request.urlopen(req, timeout=60) as response:
+ if response.status == 200:
+ data = json.loads(response.read().decode('utf-8'))
+ latest_tag = data.get("tag_name", "").lstrip("v")
+ if self._compare_versions(latest_tag, self.current_version) > 0:
+ download_url = None
+ for asset in data.get("assets", []):
+ if asset["name"].endswith(".nvda-addon"):
+ download_url = asset["browser_download_url"]
+ break
+ if download_url:
+ raw_changes = data.get("body", "")
+
+ clean_changes = re.split(r'SHA256|Checklist|---', raw_changes, flags=re.I)[0].strip()
+ clean_changes = clean_markdown(clean_changes)
+
+ wx.CallAfter(self._prompt_update, latest_tag, download_url, clean_changes)
+ elif not silent:
+ # Translators: Error message when an update is found but the addon file is missing from GitHub.
+ msg = _("Update found but no .nvda-addon file in release.")
+ show_error_dialog(msg)
+ elif not silent:
+ # Translators: Status message informing the user they are already on the latest version.
+ msg = _("You have the latest version.")
+ wx.CallAfter(ui.message, msg)
+ except Exception as e:
+ if not silent:
+ msg = _("Update check failed: {error}").format(error=e)
+ show_error_dialog(msg)
+
+ def _compare_versions(self, v1, v2):
+ try:
+ parts1 = [int(x) for x in v1.split('.')]
+ parts2 = [int(x) for x in v2.split('.')]
+ return (parts1 > parts2) - (parts1 < parts2)
+ except Exception:
+ return 0 if v1 == v2 else 1
+
+ def _prompt_update(self, version, url, changes):
+ dlg = UpdateDialog(gui.mainFrame, version, ADDON_NAME, changes)
+ if dlg.ShowModal() == wx.ID_YES:
+ threading.Thread(target=self._download_install_worker, args=(url,), daemon=True).start()
+ dlg.Destroy()
+
+ def _download_install_worker(self, url):
+ try:
+ # Translators: Message shown while downloading update
+ msg = _("Downloading update...")
+ wx.CallAfter(ui.message, msg)
+ temp_dir = tempfile.gettempdir()
+ file_path = os.path.join(temp_dir, "VisionAssistant_Update.nvda-addon")
+ with request.urlopen(url) as response, open(file_path, 'wb') as out_file:
+ out_file.write(response.read())
+ wx.CallAfter(os.startfile, file_path)
+ except Exception as e:
+ # Translators: Error message for download failure
+ msg = _("Download failed: {error}").format(error=e)
+ show_error_dialog(msg)
+
+# --- UI Classes ---