diff --git a/addon/globalPlugins/visionAssistant/__init__.py b/addon/globalPlugins/visionAssistant/__init__.py
index 089a7b4..702d21b 100644
--- a/addon/globalPlugins/visionAssistant/__init__.py
+++ b/addon/globalPlugins/visionAssistant/__init__.py
@@ -49,654 +49,36 @@
 log = logging.getLogger(__name__)
 addonHandler.initTranslation()
 
-_vision_assistant_instance = None
-
-ADDON_NAME = addonHandler.getCodeAddon().manifest["summary"]
-GITHUB_REPO = "mahmoodhozhabri/VisionAssistantPro"
-
-# --- Constants & Config ---
-
-CHROME_OCR_KEYS = [
-    "AIzaSyA2KlwBX3mkFo30om9LUFYQhpqLoa_BNhE",
-    "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
-]
-
-MODELS = [
-    # --- 1. Recommended (Auto-Updating) ---
-    # Translators: AI Model info. [Auto] = Automatic updates. (Latest) = Newest version.
-    (_("[Auto]") + " Gemini Flash " + _("(Latest)"), "gemini-flash-latest"),
-    (_("[Auto]") + " Gemini Flash Lite " + _("(Latest)"), "gemini-flash-lite-latest"),
-
-    # --- 2. Current Standard (Free & Fast) ---
-    # Translators: AI Model info. [Free] = Generous usage limits. (Preview) = Experimental or early-access version.
-    (_("[Free]") + " Gemini 3.0 Flash " + _("(Preview)"), "gemini-3-flash-preview"),
-    (_("[Free]") + " Gemini 2.5 Flash", "gemini-2.5-flash"),
-    (_("[Free]") + " Gemini 2.5 Flash Lite", "gemini-2.5-flash-lite"),
-
-    # --- 3. High Intelligence (Paid/Pro/Preview) ---
-    # Translators: AI Model info. [Pro] = High intelligence/Paid tier. (Preview) = Experimental version.
-    (_("[Pro]") + " Gemini 3.0 Pro " + _("(Preview)"), "gemini-3-pro-preview"),
-    (_("[Pro]") + " Gemini 2.5 Pro", "gemini-2.5-pro"),
-]
-
-GEMINI_VOICES = [
-    # Translators: Adjective describing a bright AI voice style.
-    ("Zephyr", _("Bright")), 
-    # Translators: Adjective describing an upbeat AI voice style.
-    ("Puck", _("Upbeat")), 
-    # Translators: Adjective describing an informative AI voice style.
-    ("Charon", _("Informative")), 
-    # Translators: Adjective describing a firm AI voice style.
-    ("Kore", _("Firm")), 
-    # Translators: Adjective describing an excitable AI voice style.
-    ("Fenrir", _("Excitable")), 
-    # Translators: Adjective describing a youthful AI voice style.
-    ("Leda", _("Youthful")), 
-    # Translators: Adjective describing a firm AI voice style.
-    ("Orus", _("Firm")), 
-    # Translators: Adjective describing a breezy AI voice style.
-    ("Aoede", _("Breezy")), 
-    # Translators: Adjective describing an easy-going AI voice style.
-    ("Callirrhoe", _("Easy-going")), 
-    # Translators: Adjective describing a bright AI voice style.
-    ("Autonoe", _("Bright")), 
-    # Translators: Adjective describing a breathy AI voice style.
-    ("Enceladus", _("Breathy")), 
-    # Translators: Adjective describing a clear AI voice style.
-    ("Iapetus", _("Clear")), 
-    # Translators: Adjective describing an easy-going AI voice style.
-    ("Umbriel", _("Easy-going")), 
-    # Translators: Adjective describing a smooth AI voice style.
-    ("Algieba", _("Smooth")), 
-    # Translators: Adjective describing a smooth AI voice style.
-    ("Despina", _("Smooth")), 
-    # Translators: Adjective describing a clear AI voice style.
-    ("Erinome", _("Clear")), 
-    # Translators: Adjective describing a gravelly AI voice style.
-    ("Algenib", _("Gravelly")), 
-    # Translators: Adjective describing an informative AI voice style.
-    ("Rasalgethi", _("Informative")), 
-    # Translators: Adjective describing an upbeat AI voice style.
-    ("Laomedeia", _("Upbeat")), 
-    # Translators: Adjective describing a soft AI voice style.
-    ("Achernar", _("Soft")), 
-    # Translators: Adjective describing a firm AI voice style.
-    ("Alnilam", _("Firm")), 
-    # Translators: Adjective describing an even AI voice style.
-    ("Schedar", _("Even")), 
-    # Translators: Adjective describing a mature AI voice style.
-    ("Gacrux", _("Mature")), 
-    # Translators: Adjective describing a forward AI voice style.
-    ("Pulcherrima", _("Forward")), 
-    # Translators: Adjective describing a friendly AI voice style.
-    ("Achird", _("Friendly")), 
-    # Translators: Adjective describing a casual AI voice style.
-    ("Zubenelgenubi", _("Casual")), 
-    # Translators: Adjective describing a gentle AI voice style.
-    ("Vindemiatrix", _("Gentle")), 
-    # Translators: Adjective describing a lively AI voice style.
-    ("Sadachbia", _("Lively")), 
-    # Translators: Adjective describing a knowledgeable AI voice style.
-    ("Sadaltager", _("Knowledgeable")), 
-    # Translators: Adjective describing a warm AI voice style.
-    ("Sulafat", _("Warm"))
-]
-
-BASE_LANGUAGES = [
-    ("Arabic", "ar"), ("Bulgarian", "bg"), ("Chinese", "zh"), ("Czech", "cs"), ("Danish", "da"),
-    ("Dutch", "nl"), ("English", "en"), ("Finnish", "fi"), ("French", "fr"),
-    ("German", "de"), ("Greek", "el"), ("Hebrew", "he"), ("Hindi", "hi"),
-    ("Hungarian", "hu"), ("Indonesian", "id"), ("Italian", "it"), ("Japanese", "ja"),
-    ("Korean", "ko"), ("Nepali", "ne"), ("Norwegian", "no"), ("Persian", "fa"), ("Polish", "pl"),
-    ("Portuguese", "pt"), ("Romanian", "ro"), ("Russian", "ru"), ("Spanish", "es"),
-    ("Swedish", "sv"), ("Thai", "th"), ("Turkish", "tr"), ("Ukrainian", "uk"),
-    ("Vietnamese", "vi")
-]
-SOURCE_LIST = [("Auto-detect", "auto")] + BASE_LANGUAGES
-SOURCE_NAMES = [x[0] for x in SOURCE_LIST]
-TARGET_LIST = BASE_LANGUAGES
-TARGET_NAMES = [x[0] for x in TARGET_LIST]
-TARGET_CODES = {x[0]: x[1] for x in BASE_LANGUAGES}
-
-OCR_ENGINES = [
-    # Translators: OCR Engine option (Fast but less formatted)
-    (_("Chrome (Fast)"), "chrome"),
-    # Translators: OCR Engine option (Slower but better formatting)
-    (_("Gemini (Formatted)"), "gemini")
-]
-
-confspec = {
-    "proxy_url": "string(default='')",
-    "api_key": "string(default='')",
-    "model_name": "string(default='gemini-flash-lite-latest')",
-    "target_language": "string(default='English')",
-    "source_language": "string(default='Auto-detect')",
-    "ai_response_language": "string(default='English')",
-    "smart_swap": "boolean(default=True)",
-    "captcha_mode": "string(default='navigator')",
-    "custom_prompts": "string(default='')",
-    "custom_prompts_v2": "string(default='')",
-    "default_refine_prompts": "string(default='')",
-    "check_update_startup": "boolean(default=False)",
-    "clean_markdown_chat": "boolean(default=True)",
-    "copy_to_clipboard": "boolean(default=False)",
-    "skip_chat_dialog": "boolean(default=False)",
-    "ocr_engine": "string(default='chrome')",
-    "tts_voice": "string(default='Puck')"
-}
-
-config.conf.spec["VisionAssistant"] = confspec
-
-PROMPT_TRANSLATE = """
-Task: Translate the text below to "{target_lang}".
-
-Configuration:
-- Target Language: "{target_lang}"
-- Swap Language: "{swap_target}"
-- Smart Swap: {smart_swap}
-
-Rules:
-1. DEFAULT: Translate the input strictly to "{target_lang}".
-2. MIXED CONTENT: If the text contains mixed languages (e.g., Arabic content with English UI terms like 'Reply', 'From', 'Forwarded'), translate EVERYTHING to "{target_lang}".
-3. EXCEPTION: If (and ONLY if) the input is already completely in "{target_lang}" AND "Smart Swap" is True, then translate to "{swap_target}".
-
-Constraints:
-- Output ONLY the translation.
-- Do NOT translate actual programming code (Python, C++, etc.) or URLs.
-- Translate ALL UI elements, menus, and interface labels.
-
-Input Text:
-{text_content}
-"""
-
-PROMPT_UI_LOCATOR = "Analyze UI (Size: {width}x{height}). Request: '{query}'. Output JSON: {{\"x\": int, \"y\": int, \"found\": bool}}."
-
-REFINE_PROMPT_KEYS = ("summarize", "fix_grammar", "fix_translate", "explain")
-
-LEGACY_REFINER_TOKENS = {
-    "summarize": "[summarize]",
-    "fix_grammar": "[fix_grammar]",
-    "fix_translate": "[fix_translate]",
-    "explain": "[explain]",
-}
-
-DEFAULT_SYSTEM_PROMPTS = (
-    {
-        "key": "summarize",
-        # Translators: Section header for text refinement prompts in Prompt Manager.
-        "section": _("Refine"),
-        # Translators: Label for the text summarization prompt.
-        "label": _("Summarize"),
-        "prompt": "Summarize the text below in {response_lang}.",
-    },
-    {
-        "key": "fix_grammar",
-        # Translators: Section header for text refinement prompts in Prompt Manager.
-        "section": _("Refine"),
-        # Translators: Label for the grammar correction prompt.
-        "label": _("Fix Grammar"),
-        "prompt": "Fix grammar in the text below. Output ONLY the fixed text.",
-    },
-    {
-        "key": "fix_translate",
-        # Translators: Section header for text refinement prompts in Prompt Manager.
-        "section": _("Refine"),
-        # Translators: Label for the grammar correction and translation prompt.
-        "label": _("Fix Grammar & Translate"),
-        "prompt": "Fix grammar and translate to {target_lang}.{swap_instruction} Output ONLY the result.",
-    },
-    {
-        "key": "explain",
-        # Translators: Section header for text refinement prompts in Prompt Manager.
-        "section": _("Refine"),
-        # Translators: Label for the text explanation prompt.
-        "label": _("Explain"),
-        "prompt": "Explain the text below in {response_lang}.",
-    },
-    {
-        "key": "translate_main",
-        # Translators: Section header for translation-related prompts in Prompt Manager.
-        "section": _("Translation"),
-        # Translators: Label for the smart translation prompt.
-        "label": _("Smart Translation"),
-        "prompt": PROMPT_TRANSLATE.strip(),
-    },
-    {
-        "key": "translate_quick",
-        # Translators: Section header for translation-related prompts in Prompt Manager.
-        "section": _("Translation"),
-        # Translators: Label for the quick translation prompt.
-        "label": _("Quick Translation"),
-        "prompt": "Translate to {target_lang}. Output ONLY translation.",
-    },
-    {
-        "key": "document_chat_system",
-        # Translators: Section header for document-related prompts in Prompt Manager.
-        "section": _("Document"),
-        # Translators: Label for the initial context prompt in document chat.
-        "label": _("Document Chat Context"),
-        "prompt": "STRICTLY Respond in {response_lang}. Use Markdown formatting. Analyze the attached content to answer.",
-    },
-    {
-        "key": "document_chat_ack",
-        # Translators: Section header for advanced/internal prompts in Prompt Manager.
-        "section": _("Advanced"),
-        # Translators: Label for the AI's acknowledgement reply in document chat.
-        "label": _("Document Chat Bootstrap Reply"),
-        "internal": True,
-        "prompt": "Context received. Ready for questions.",
-    },
-    {
-        "key": "vision_navigator_object",
-        # Translators: Section header for image analysis prompts in Prompt Manager.
-        "section": _("Vision"),
-        # Translators: Label for the prompt used to analyze the current navigator object.
-        "label": _("Navigator Object Analysis"),
-        "prompt": (
-            "Analyze this image. Describe the layout, visible text, and UI elements. "
-            "Use Markdown formatting (headings, lists) to organize the description. "
-            "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
-            "IMPORTANT: Start directly with the description content. Do not add introductory "
-            "sentences like 'Here is the analysis' or 'The image shows'."
-        ),
-    },
-    {
-        "key": "vision_fullscreen",
-        # Translators: Section header for image analysis prompts in Prompt Manager.
-        "section": _("Vision"),
-        # Translators: Label for the prompt used to analyze the entire screen.
-        "label": _("Full Screen Analysis"),
-        "prompt": (
-            "Analyze this image. Describe the layout, visible text, and UI elements. "
-            "Use Markdown formatting (headings, lists) to organize the description. "
-            "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
-            "IMPORTANT: Start directly with the description content. Do not add introductory "
-            "sentences like 'Here is the analysis' or 'The image shows'."
-        ),
-    },
-    {
-        "key": "vision_followup_context",
-        # Translators: Section header for advanced/internal prompts in Prompt Manager.
-        "section": _("Advanced"),
-        # Translators: Label for the follow-up context in image analysis chat.
-        "label": _("Vision Follow-up Context"),
-        "internal": True,
-        "prompt": "Image Context. Target Language: {response_lang}",
-    },
-    {
-        "key": "vision_followup_suffix",
-        # Translators: Section header for advanced/internal prompts in Prompt Manager.
-        "section": _("Advanced"),
-        # Translators: Label for the rule enforced during image analysis follow-up questions.
-        "label": _("Vision Follow-up Answer Rule"),
-        "internal": True,
-        "prompt": "Answer strictly in {response_lang}",
-    },
-    {
-        "key": "video_analysis",
-        # Translators: Section header for video analysis prompts in Prompt Manager.
-        "section": _("Video"),
-        # Translators: Label for the video content analysis prompt.
-        "label": _("Video Analysis"),
-        "prompt": (
-            "Analyze this video. Provide a detailed description of the visual content and a "
-            "summary of the audio. IMPORTANT: Write the entire response STRICTLY in "
-            "{response_lang} language."
-        ),
-    },
-    {
-        "key": "audio_transcription",
-        # Translators: Section header for audio-related prompts in Prompt Manager.
-        "section": _("Audio"),
-        # Translators: Label for the audio file transcription prompt.
-        "label": _("Audio Transcription"),
-        "prompt": "Transcribe this audio in {response_lang}.",
-    },
-    {
-        "key": "dictation_transcribe",
-        # Translators: Section header for audio-related prompts in Prompt Manager.
-        "section": _("Audio"),
-        # Translators: Label for the smart voice dictation prompt.
-        "label": _("Smart Dictation"),
-        "prompt": (
-            "Transcribe speech. Use native script. Fix stutters. If there is no speech, silence, "
-            "or background noise only, write exactly: [[[NOSPEECH]]]"
-        ),
-    },
-    {
-        "key": "ocr_image_extract",
-        # Translators: Section header for OCR-related prompts in Prompt Manager.
-        "section": _("OCR"),
-        # Translators: Label for the OCR prompt used for image text extraction.
-        "label": _("OCR Image Extraction"),
-        "prompt": (
-            "Extract all visible text from this image. Strictly preserve original formatting "
-            "(headings, lists, tables) using Markdown. Do not output any system messages or "
-            "code block backticks (```). Output ONLY the raw content."
-        ),
-    },
-    {
-        "key": "ocr_document_extract",
-        # Translators: Section header for OCR-related prompts in Prompt Manager.
-        "section": _("OCR"),
-        # Translators: Label for the OCR prompt used for document text extraction.
-        "label": _("OCR Document Extraction"),
-        "prompt": (
-            "Extract all visible text from this document. Strictly preserve original formatting "
-            "(headings, lists, tables) using Markdown. You MUST insert the exact delimiter "
-            "'[[[PAGE_SEP]]]' immediately after the content of every single page. Do not output "
-            "any system messages or code block backticks (```). Output ONLY the raw content."
-        ),
-    },
-    {
-        "key": "ocr_document_translate",
-        # Translators: Section header for document-related prompts in Prompt Manager.
-        "section": _("Document"),
-        # Translators: Label for the combined OCR and translation prompt for documents.
-        "label": _("Document OCR + Translate"),
-        "prompt": (
-            "Extract all text from this document. Preserve formatting (Markdown). Then translate "
-            "the content to {target_lang}. Output ONLY the translated content. Do not add "
-            "explanations."
-        ),
-    },
-    {
-        "key": "captcha_solver_base",
-        # Translators: Section header for CAPTCHA-related prompts in Prompt Manager.
-        "section": _("CAPTCHA"),
-        # Translators: Label for the CAPTCHA solving prompt.
-        "label": _("CAPTCHA Solver"),
-        "internal": True,
-        "prompt": (
-            "Blind user. Return CAPTCHA code only. If NO CAPTCHA is detected in the image, "
-            "strictly return: [[[NO_CAPTCHA]]].{captcha_extra}"
-        ),
-    },
-    {
-        "key": "refine_files_only",
-        # Translators: Section header for advanced/internal prompts in Prompt Manager.
-        "section": _("Advanced"),
-        # Translators: Label for the fallback prompt when only files are provided in Refine.
-        "label": _("Refine Files-Only Fallback"),
-        "internal": True,
-        "prompt": "Analyze these files.",
-    },
+from .constants import (
+    ADDON_NAME,
+    CHROME_OCR_KEYS,
+    GEMINI_VOICES,
+    GITHUB_REPO,
+    MODELS,
+    OCR_ENGINES,
+    PROMPT_VARIABLES_GUIDE,
+    REFINE_PROMPT_KEYS,
+    SOURCE_NAMES,
+    TARGET_CODES,
+    TARGET_NAMES,
 )
-
-PROMPT_VARIABLES_GUIDE = (
-    # Translators: Description and input type for the [selection] variable in the Variables Guide.
-    ("[selection]", _("Currently selected text"), _("Text")),
-    # Translators: Description for the [clipboard] variable in the Variables Guide.
-    ("[clipboard]", _("Clipboard content"), _("Text")),
-    # Translators: Description and input type for the [screen_obj] variable in the Variables Guide.
-    ("[screen_obj]", _("Screenshot of the navigator object"), _("Image")),
-    # Translators: Description for the [screen_full] variable in the Variables Guide.
-    ("[screen_full]", _("Screenshot of the entire screen"), _("Image")),
-    # Translators: Description and input type for the [file_ocr] variable in the Variables Guide.
-    ("[file_ocr]", _("Select image/PDF/TIFF for text extraction"), _("Image, PDF, TIFF")),
-    # Translators: Description and input type for the [file_read] variable in the Variables Guide.
-    ("[file_read]", _("Select document for reading"), _("TXT, Code, PDF")),
-    # Translators: Description and input type for the [file_audio] variable in the Variables Guide.
-    ("[file_audio]", _("Select audio file for analysis"), _("MP3, WAV, OGG")),
+from .markdown_utils import clean_markdown, markdown_to_html
+from .prompt_helpers import (
+    apply_prompt_template,
+    get_builtin_default_prompts,
+    get_builtin_default_prompt_map,
+    get_configured_default_prompt_map,
+    get_configured_default_prompts,
+    get_prompt_text,
+    get_refine_menu_options,
+    load_configured_custom_prompts,
+    migrate_prompt_config_if_needed,
+    serialize_default_prompt_overrides,
+    serialize_custom_prompts_v2,
 )
 
 # --- Helpers ---
 
-def get_builtin_default_prompts():
-    builtins = []
-    for item in DEFAULT_SYSTEM_PROMPTS:
-        p = str(item["prompt"]).strip()
-        builtins.append({
-            "key": item["key"],
-            "section": item["section"],
-            "label": item["label"],
-            "display_label": f"{item['section']} - {item['label']}",
-            "internal": bool(item.get("internal")),
-            "prompt": p,
-            "default": p,
-        })
-    return builtins
-
-def get_builtin_default_prompt_map():
-    return {item["key"]: item for item in get_builtin_default_prompts()}
-
-def _normalize_custom_prompt_items(items):
-    normalized = []
-    if not isinstance(items, list):
-        return normalized
-
-    for item in items:
-        if not isinstance(item, dict):
-            continue
-        name = item.get("name")
-        content = item.get("content")
-        if not isinstance(name, str) or not isinstance(content, str):
-            continue
-        name = name.strip()
-        content = content.strip()
-        if name and content:
-            normalized.append({"name": name, "content": content})
-    return normalized
-
-def parse_custom_prompts_legacy(raw_value):
-    items = []
-    if not raw_value:
-        return items
-
-    normalized = raw_value.replace("\r\n", "\n").replace("\r", "\n")
-    for line in normalized.split("\n"):
-        for segment in line.split("|"):
-            segment = segment.strip()
-            if not segment or ":" not in segment:
-                continue
-            name, content = segment.split(":", 1)
-            name = name.strip()
-            content = content.strip()
-            if name and content:
-                items.append({"name": name, "content": content})
-    return items
-
-def parse_custom_prompts_v2(raw_value):
-    if not isinstance(raw_value, str) or not raw_value.strip():
-        return None
-    try:
-        data = json.loads(raw_value)
-    except Exception as e:
-        log.warning(f"Invalid custom_prompts_v2 config, falling back to legacy format: {e}")
-        return None
-    return _normalize_custom_prompt_items(data)
-
-def serialize_custom_prompts_v2(items):
-    normalized = _normalize_custom_prompt_items(items)
-    if not normalized:
-        return ""
-    return json.dumps(normalized, ensure_ascii=False)
-
-def load_configured_custom_prompts():
-    try:
-        raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
-    except Exception:
-        raw_v2 = ""
-    items_v2 = parse_custom_prompts_v2(raw_v2)
-    if items_v2 is not None:
-        return items_v2
-    return parse_custom_prompts_legacy(config.conf["VisionAssistant"]["custom_prompts"])
-
-def _sanitize_default_prompt_overrides(data):
-    if not isinstance(data, dict):
-        return {}, False
-
-    changed = False
-    mutable = dict(data)
-    # Migrate old key used in previous versions.
-    legacy_vision = mutable.pop("vision_image_analysis", None)
-    if legacy_vision is not None:
-        changed = True
-    if isinstance(legacy_vision, str) and legacy_vision.strip():
-        legacy_text = legacy_vision.strip()
-        nav_value = mutable.get("vision_navigator_object")
-        if not isinstance(nav_value, str) or not nav_value.strip():
-            mutable["vision_navigator_object"] = legacy_text
-            changed = True
-        full_value = mutable.get("vision_fullscreen")
-        if not isinstance(full_value, str) or not full_value.strip():
-            mutable["vision_fullscreen"] = legacy_text
-            changed = True
-
-    valid_keys = set(get_builtin_default_prompt_map().keys())
-    sanitized = {}
-    for key, value in mutable.items():
-        if key not in valid_keys or not isinstance(value, str):
-            changed = True
-            continue
-        prompt_text = value.strip()
-        if not prompt_text:
-            changed = True
-            continue
-        if key in LEGACY_REFINER_TOKENS and prompt_text == LEGACY_REFINER_TOKENS[key]:
-            # Drop old token-only overrides and fallback to current built-ins.
-            changed = True
-            continue
-        if prompt_text != value:
-            changed = True
-        sanitized[key] = prompt_text
-    return sanitized, changed
-
-def migrate_prompt_config_if_needed():
-    changed = False
-
-    try:
-        raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
-    except Exception:
-        raw_v2 = ""
-    raw_legacy = config.conf["VisionAssistant"]["custom_prompts"]
-
-    v2_items = parse_custom_prompts_v2(raw_v2)
-    if v2_items is None:
-        target_items = parse_custom_prompts_legacy(raw_legacy)
-    else:
-        target_items = v2_items
-
-    serialized_v2 = serialize_custom_prompts_v2(target_items)
-    if serialized_v2 != (raw_v2 or ""):
-        config.conf["VisionAssistant"]["custom_prompts_v2"] = serialized_v2
-        changed = True
-
-    # Legacy mirror is disabled. Clear old storage to prevent stale fallback data.
-    if raw_legacy:
-        config.conf["VisionAssistant"]["custom_prompts"] = ""
-        changed = True
-
-    try:
-        raw_defaults = config.conf["VisionAssistant"]["default_refine_prompts"]
-    except Exception:
-        raw_defaults = ""
-    if isinstance(raw_defaults, str) and raw_defaults.strip():
-        try:
-            defaults_data = json.loads(raw_defaults)
-        except Exception:
-            defaults_data = None
-        if isinstance(defaults_data, dict):
-            sanitized, migrated = _sanitize_default_prompt_overrides(defaults_data)
-            if migrated:
-                config.conf["VisionAssistant"]["default_refine_prompts"] = (
-                    json.dumps(sanitized, ensure_ascii=False) if sanitized else ""
-                )
-                changed = True
-
-    return changed
-
-def load_default_prompt_overrides():
-    try:
-        raw = config.conf["VisionAssistant"]["default_refine_prompts"]
-    except Exception:
-        raw = ""
-    if not isinstance(raw, str) or not raw.strip():
-        return {}
-
-    try:
-        data = json.loads(raw)
-    except Exception as e:
-        log.warning(f"Invalid default_refine_prompts config, using built-ins: {e}")
-        return {}
-
-    overrides, _ = _sanitize_default_prompt_overrides(data)
-    return overrides
-
-def get_configured_default_prompt_map():
-    prompt_map = get_builtin_default_prompt_map()
-    overrides = load_default_prompt_overrides()
-    for key, override in overrides.items():
-        if key not in prompt_map:
-            continue
-        if key in LEGACY_REFINER_TOKENS and override == LEGACY_REFINER_TOKENS[key]:
-            continue
-        prompt_map[key]["prompt"] = override
-    return prompt_map
-
-def get_configured_default_prompts():
-    prompt_map = get_configured_default_prompt_map()
-    items = []
-    for item in DEFAULT_SYSTEM_PROMPTS:
-        if item.get("internal"):
-            continue
-        key = item["key"]
-        if key in prompt_map:
-            items.append(dict(prompt_map[key]))
-    items.sort(key=lambda item: item.get("display_label", "").casefold())
-    return items
-
-def get_prompt_text(prompt_key):
-    prompt_map = get_configured_default_prompt_map()
-    item = prompt_map.get(prompt_key)
-    if item:
-        return item["prompt"]
-    return ""
-
-def serialize_default_prompt_overrides(items):
-    if not items:
-        return ""
-
-    base_map = {item["key"]: item["prompt"] for item in get_builtin_default_prompts()}
-    overrides = {}
-    for item in items:
-        key = item.get("key")
-        prompt_text = item.get("prompt", "")
-        if key not in base_map:
-            continue
-        if not isinstance(prompt_text, str):
-            continue
-        prompt_text = prompt_text.strip()
-        if prompt_text and prompt_text != base_map[key]:
-            overrides[key] = prompt_text
-
-    if not overrides:
-        return ""
-    return json.dumps(overrides, ensure_ascii=False)
-
-def get_refine_menu_options():
-    options = []
-    prompt_map = get_configured_default_prompt_map()
-    for key in REFINE_PROMPT_KEYS:
-        item = prompt_map.get(key)
-        if item:
-            options.append((item["label"], item["prompt"]))
-
-    for item in load_configured_custom_prompts():
-        # Translators: Prefix for custom prompts in the Refine menu
-        options.append((_("Custom: ") + item["name"], item["content"]))
-    return options
-
-def apply_prompt_template(template, replacements):
-    if not isinstance(template, str):
-        return ""
-
-    text = template
-    for key, value in replacements:
-        text = text.replace("{" + key + "}", str(value))
-
-    return text.strip()
-
 def finally_(func, final):
     @wraps(func)
     def new(*args, **kwargs):
@@ -706,1661 +88,34 @@ def new(*args, **kwargs):
             final()
     return new
 
-def clean_markdown(text):
-    if not text: return ""
-    text = re.sub(r'\*\*|__|[*_]', '', text)
-    text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE)
-    text = re.sub(r'```', '', text)
-    text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
-    text = re.sub(r'^\s*-\s+', '', text, flags=re.MULTILINE)
-    return text.strip()
-
-def markdown_to_html(text, full_page=False):
-    if not text: return ""
-    
-    html = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
-    html = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', html)
-    html = re.sub(r'__(.*?)__', r'<i>\1</i>', html)
-    html = re.sub(r'^### (.*)', r'<h3>\1</h3>', html, flags=re.M)
-    html = re.sub(r'^## (.*)', r'<h2>\1</h2>', html, flags=re.M)
-    html = re.sub(r'^# (.*)', r'<h1>\1</h1>', html, flags=re.M)
-    
-    lines = html.split('\n')
-    in_table = False
-    new_lines = []
-    table_style = 'border="1" style="border-collapse: collapse; width: 100%; margin-bottom: 10px;"'
-    td_style = 'style="padding: 5px; border: 1px solid #ccc;"'
-    
-    for line in lines:
-        stripped = line.strip()
-        if stripped.startswith('|') or (stripped.count('|') > 1 and len(stripped) > 5):
-            if not in_table:
-                new_lines.append(f'<table {table_style}>')
-                in_table = True
-            if '---' in stripped: continue
-            row_content = stripped.strip('|').split('|')
-            cells = "".join([f'<td {td_style}>{c.strip()}</td>' for c in row_content])
-            new_lines.append(f'<tr>{cells}</tr>')
-        else:
-            if in_table:
-                new_lines.append('</table>')
-                in_table = False
-            if stripped: new_lines.append(line + "<br>")
-            else: new_lines.append("<br>")
-    if in_table: new_lines.append('</table>')
-    html_body = "".join(new_lines)
-
-    if not full_page: return html_body
-    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><style>body{{font-family:"Segoe UI",Arial,sans-serif;line-height:1.6;padding:20px;color:#333;max-width:800px;margin:0 auto}}h1,h2,h3{{color:#2c3e50;border-bottom:1px solid #eee;padding-bottom:5px}}pre{{background-color:#f4f4f4;padding:10px;border-radius:5px;overflow-x:auto;font-family:Consolas,monospace}}code{{background-color:#f4f4f4;padding:2px 5px;border-radius:3px;font-family:Consolas,monospace}}table{{border-collapse:collapse;width:100%;margin-bottom:10px}}td,th{{border:1px solid #ccc;padding:8px;text-align:left}}strong,b{{color:#000;font-weight:bold}}li{{margin-bottom:5px}}</style></head><body>{html_body}</body></html>"""
-
-def get_mime_type(path):
-    ext = os.path.splitext(path)[1].lower()
-    if ext == '.pdf': return 'application/pdf'
-    if ext in ['.jpg', '.jpeg']: return 'image/jpeg'
-    if ext == '.png': return 'image/png'
-    if ext == '.webp': return 'image/webp'
-    if ext in ['.tif', '.tiff']: return 'image/jpeg'
-    if ext == '.mp3': return 'audio/mpeg'
-    if ext == '.wav': return 'audio/wav'
-    if ext == '.ogg': return 'audio/ogg'
-    if ext == '.mp4': return 'video/mp4'
-    return 'application/octet-stream'
-
-def show_error_dialog(message):
-    # Translators: Title of the error dialog box
-    title = _("{name} Error").format(name=ADDON_NAME)
-    wx.CallAfter(gui.messageBox, message, title, wx.OK | wx.ICON_ERROR)
-
-def send_ctrl_v():
-    try:
-        user32 = ctypes.windll.user32
-        VK_CONTROL = 0x11; VK_V = 0x56; KEYEVENTF_KEYUP = 0x0002
-        user32.keybd_event(VK_CONTROL, 0, 0, 0)
-        user32.keybd_event(VK_V, 0, 0, 0)
-        user32.keybd_event(VK_V, 0, KEYEVENTF_KEYUP, 0)
-        user32.keybd_event(VK_CONTROL, 0, KEYEVENTF_KEYUP, 0)
-    except: pass
-
-def get_proxy_opener():
-    proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
-    if proxy_url:
-        if "127.0.0.1" in proxy_url or "localhost" in proxy_url or ":" in proxy_url.split("/")[-1]:
-             handler = request.ProxyHandler({'http': proxy_url, 'https': proxy_url})
-             return request.build_opener(handler)
-    return request.build_opener()
-
-def get_twitter_download_link(tweet_url):
-    cj = cookiejar.CookieJar()
-    opener = request.build_opener(request.HTTPCookieProcessor(cj))
-    base_url = "https://savetwitter.net/en4"
-    api_url = "https://savetwitter.net/api/ajaxSearch"
-    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Referer': base_url}
-    try:
-        req_init = request.Request(base_url, headers=headers)
-        opener.open(req_init)
-        params = {'q': tweet_url, 'lang': 'en', 'cftoken': ''}
-        data = urlencode(params).encode('utf-8')
-        req_post = request.Request(api_url, data=data, headers=headers, method='POST')
-        with opener.open(req_post) as response:
-            res_data = json.loads(response.read().decode('utf-8'))
-            if res_data.get('status') == 'ok':
-                html = res_data.get('data', '')
-                match = re.search(r'href="(https?://dl\.snapcdn\.app/[^"]+)"', html)
-                if match: return match.group(1)
-    except: pass
-    return None
-
-def get_instagram_download_link(insta_url):
-    cj = cookiejar.CookieJar()
-    opener = request.build_opener(request.HTTPCookieProcessor(cj))
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36',
-        'X-Requested-With': 'XMLHttpRequest',
-        'Referer': 'https://anon-viewer.com/',
-        'Accept': '*/*'
-    }
-    opener.addheaders = list(headers.items())
-    try:
-        opener.open("https://anon-viewer.com/", timeout=30)
-        
-        if "/stories/" in insta_url:
-            parts = insta_url.split("/")
-            username = parts[parts.index("stories") + 1]
-            api_url = f"https://anon-viewer.com/content.php?url={username}&method=allstories"
-        else:
-            encoded_url = quote(insta_url, safe='')
-            api_url = f"https://anon-viewer.com/content.php?url={encoded_url}"
-
-        response = opener.open(api_url, timeout=60)
-        if response.getcode() == 200:
-            res_content = response.read().decode('utf-8')
-            data = json.loads(res_content)
-            html_text = data.get('html', '')
-            
-            match = re.search(r'href="([^"]+anon-viewer\.com/media\.php\?media=[^"]+)"', html_text)
-            if match:
-                return match.group(1).replace('&amp;', '&')
-            
-            source_match = re.search(r'<source src="([^"]+)"', html_text)
-            if source_match:
-                return source_match.group(1).replace('&amp;', '&')
-    except: pass
-    return None
-
-def get_tiktok_download_link(tiktok_url):
-    api_url = "https://www.tikwm.com/api/"
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
-        'X-Requested-With': 'XMLHttpRequest'
-    }
-    try:
-        params = {'url': tiktok_url, 'hd': '1'}
-        data = urlencode(params).encode('utf-8')
-        req = request.Request(api_url, data=data, headers=headers, method='POST')
-        opener = get_proxy_opener()
-        with opener.open(req, timeout=120) as response:
-            res = json.loads(response.read().decode('utf-8'))
-            if res.get('code') == 0:
-                play_url = res['data']['play']
-                return play_url if play_url.startswith('http') else "https://www.tikwm.com" + play_url
-    except: pass
-    return None
-
-def _download_temp_video(url):
-    try:
-        req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
-        with request.urlopen(req, timeout=120) as response:
-            fd, path = tempfile.mkstemp(suffix=".mp4")
-            os.close(fd)
-            with open(path, 'wb') as f:
-                while True:
-                    chunk = response.read(8192)
-                    if not chunk: break
-                    f.write(chunk)
-            return path
-    except: pass
-    return None
-
-def get_file_path(title, wildcard, mode="open", multiple=False):
-    style = wx.FD_OPEN | wx.FD_FILE_MUST_EXIST if mode == "open" else wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT
-    if multiple: style |= wx.FD_MULTIPLE
-    with wx.FileDialog(gui.mainFrame, title, wildcard=wildcard, style=style) as dlg:
-        if dlg.ShowModal() == wx.ID_OK:
-            return dlg.GetPaths() if multiple else dlg.GetPath()
-    return None
-
-class VirtualDocument:
-    def __init__(self, file_paths):
-        self.file_paths = file_paths
-        self.page_map = [] 
-        self.total_pages = 0
-        self.is_single_pdf = (len(file_paths) == 1 and file_paths[0].lower().endswith('.pdf'))
-        self.single_pdf_path = file_paths[0] if self.is_single_pdf else None
-
-    def scan(self):
-        if not fitz: return
-        for path in self.file_paths:
-            try:
-                doc = fitz.open(path)
-                count = len(doc)
-                for i in range(count):
-                    self.page_map.append((path, i))
-                doc.close()
-            except Exception as e:
-                log.error(f"Error scanning file {path}: {e}", exc_info=True)
-        self.total_pages = len(self.page_map)
-
-    def get_page_info(self, global_page_index):
-        if 0 <= global_page_index < self.total_pages:
-            return self.page_map[global_page_index]
-        return None, None
-
-    def create_merged_pdf(self, start_page, end_page):
-        if not fitz: return None
-        try:
-            out_doc = fitz.open()
-            for i in range(start_page, end_page + 1):
-                f_path, f_idx = self.get_page_info(i)
-                src_doc = fitz.open(f_path)
-                if src_doc.is_pdf:
-                    out_doc.insert_pdf(src_doc, from_page=f_idx, to_page=f_idx)
-                else:
-                    pdf_bytes = src_doc.convert_to_pdf(from_page=f_idx, to_page=f_idx)
-                    img_pdf = fitz.open("pdf", pdf_bytes)
-                    out_doc.insert_pdf(img_pdf)
-                src_doc.close()
-            
-            fd, temp_path = tempfile.mkstemp(suffix=".pdf")
-            os.close(fd)
-            out_doc.save(temp_path)
-            out_doc.close()
-            return temp_path
-        except Exception as e:
-            log.error(f"Error merging PDF: {e}", exc_info=True)
-            return None
-
-class ChromeOCREngine:
-    @staticmethod
-    def recognize(image_bytes):
-        key = CHROME_OCR_KEYS[0]
-        url = "https://ckintersect-pa.googleapis.com/v1/intersect/pixels"
-        headers = {"Content-Type": "application/json", "User-Agent": "Mozilla/5.0", "x-goog-api-key": key}
-        payload = {"imageRequests": [{"engineParameters": [{"ocrParameters": {}}], "imageBytes": base64.b64encode(image_bytes).decode('utf-8'), "imageId": str(uuid4())}]}
-        try:
-            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers=headers, method="POST")
-            opener = get_proxy_opener()
-            with opener.open(req, timeout=120) as response:
-                if response.status == 200:
-                    data = json.loads(response.read().decode('utf-8'))
-                    regions = data['results'][0]['engineResults'][0]['ocrEngine'].get('ocrRegions', [])
-                    lines = []
-                    for reg in regions:
-                        line_text = " ".join([w.get('detectedText', '') for w in reg.get('words', [])])
-                        if line_text.strip(): lines.append(line_text)
-                    return "\n".join(lines)
-        except Exception as e:
-            log.error(f"Chrome OCR Failed: {e}", exc_info=True)
-            return None
-        return None
-
-class SmartProgrammersOCREngine:
-    @staticmethod
-    def recognize(image_bytes):
-        url = "https://ubsa.in/smartprogrammers/SP%20Reader/extract.php"
-        boundary = uuid4().hex.encode('utf-8')
-        body = []
-        body.append(b'--' + boundary)
-        body.append(f'Content-Disposition: form-data; name="file"; filename="p.jpg"'.encode('utf-8'))
-        body.append(b'Content-Type: image/jpeg')
-        body.append(b'')
-        body.append(image_bytes)
-        body.append(b'--' + boundary + b'--')
-        body.append(b'')
-        try:
-            req = request.Request(url, data=b'\r\n'.join(body), headers={'Content-Type': f"multipart/form-data; boundary={boundary.decode('utf-8')}"}, method="POST")
-            opener = get_proxy_opener()
-            with opener.open(req, timeout=120) as response:
-                if response.status == 200:
-                    data = json.loads(response.read().decode('utf-8'))
-                    text = data.get("text", "").replace("\\n", "\n")
-                    return text if text.strip() else None
-        except error.HTTPError as e:
-            if e.code == 400:
-                return None
-        except Exception:
-            pass
-        return None
-
-class GoogleTranslator:
-    @staticmethod
-    def translate(text, target_lang):
-        try:
-            target_code = TARGET_CODES.get(target_lang, 'en')
-            base_url = "https://translate.googleapis.com/translate_a/single"
-            params = {"client": "gtx", "sl": "auto", "tl": target_code, "dt": "t", "q": text}
-            url = f"{base_url}?{parse.urlencode(params)}"
-            req = request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
-            opener = get_proxy_opener()
-            with opener.open(req, timeout=120) as response:
-                if response.status == 200:
-                    data = json.loads(response.read().decode('utf-8'))
-                    if data and isinstance(data, list) and len(data) > 0:
-                        result_parts = [x[0] for x in data[0] if x[0]]
-                        return "".join(result_parts)
-        except Exception as e:
-            log.error(f"Google Translate Failed: {e}", exc_info=True)
-            return text 
-        return text
-
-class GeminiHandler:
-    _working_key_idx = 0 
-    _file_uri_keys = {}
-    _max_retries = 5
-
-    @staticmethod
-    def _get_api_keys():
-        raw = config.conf["VisionAssistant"]["api_key"]
-        clean_raw = raw.replace('\r\n', ',').replace('\n', ',')
-        return [k.strip() for k in clean_raw.split(',') if k.strip()]
-
-    @staticmethod
-    def _get_opener():
-        return get_proxy_opener()
-
-    @staticmethod
-    def _handle_error(e):
-        if hasattr(e, 'code'):
-            # Translators: Error message for Bad Request (400)
-            if e.code == 400: return _("Error 400: Bad Request (Check API Key)")
-            # Translators: Error message for Forbidden (403)
-            if e.code == 403: return _("Error 403: Forbidden (Check Region)")
-            if e.code == 429: return "QUOTA_EXCEEDED"
-            if e.code >= 500: return "SERVER_ERROR"
-        return str(e)
-
-    @staticmethod
-    def _call_with_retry(func_logic, key, *args):
-        last_exc = None
-        for attempt in range(GeminiHandler._max_retries):
-            try:
-                return func_logic(key, *args)
-            except error.HTTPError as e:
-                err_msg = GeminiHandler._handle_error(e)
-                if err_msg not in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
-                    raise
-                last_exc = e
-            except error.URLError as e:
-                last_exc = e
-            if attempt < GeminiHandler._max_retries - 1:
-                time.sleep(0.5 * (attempt + 1))
-        raise last_exc
-
-    @staticmethod
-    def _register_file_uri(uri, key):
-        if uri and key:
-            GeminiHandler._file_uri_keys[uri] = key
-            while len(GeminiHandler._file_uri_keys) > 200:
-                GeminiHandler._file_uri_keys.pop(next(iter(GeminiHandler._file_uri_keys)))
-
-    @staticmethod
-    def _get_registered_key(uri):
-        if not uri:
-            return None
-        return GeminiHandler._file_uri_keys.get(uri)
-
-    @staticmethod
-    def _call_with_key(func_logic, key, *args):
-        try:
-            return GeminiHandler._call_with_retry(func_logic, key, *args)
-        except error.HTTPError as e:
-            err_msg = GeminiHandler._handle_error(e)
-            if err_msg == "QUOTA_EXCEEDED":
-                # Translators: Message of a dialog which may pop up while performing an AI call
-                err_msg = _("Error 429: Quota Exceeded (Try later)")
-            elif err_msg == "SERVER_ERROR":
-                # Translators: Message of a dialog which may pop up while performing an AI call
-                err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
-            return "ERROR:" + err_msg
-        except Exception as e:
-            return "ERROR:" + str(e)
-
-    @staticmethod
-    def _call_with_rotation(func_logic, *args):
-        keys = GeminiHandler._get_api_keys()
-        if not keys: 
-            # Translators: Error when no API keys are found in settings
-            return "ERROR:" + _("No API Keys configured.")
-        
-        num_keys = len(keys)
-        for i in range(num_keys):
-            idx = (GeminiHandler._working_key_idx + i) % num_keys
-            key = keys[idx]
-            try:
-                res = GeminiHandler._call_with_retry(func_logic, key, *args)
-                GeminiHandler._working_key_idx = idx 
-                return res
-            except error.HTTPError as e:
-                err_msg = GeminiHandler._handle_error(e)
-                if err_msg in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
-                    if i < num_keys - 1: continue
-                    # Translators: Error when all available API keys fail
-                    return "ERROR:" + _("All API Keys failed (Quota/Server).")
-                return "ERROR:" + err_msg
-            except Exception as e:
-                return "ERROR:" + str(e)
-        return "ERROR:" + _("Unknown error occurred.")
-
-    @staticmethod
-    def translate(text, target_lang):
-        def _logic(key, txt, lang):
-            model = config.conf["VisionAssistant"]["model_name"]
-            url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
-            quick_template = get_prompt_text("translate_quick") or "Translate to {target_lang}. Output ONLY translation."
-            quick_prompt = apply_prompt_template(quick_template, [("target_lang", lang)])
-            payload = {"contents": [{"parts": [{"text": quick_prompt}, {"text": txt}]}]}
-            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
-            with GeminiHandler._get_opener().open(req, timeout=90) as r:
-                return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
-        return GeminiHandler._call_with_rotation(_logic, text, target_lang)
-
-    @staticmethod
-    def ocr_page(image_bytes):
-        def _logic(key, img_data):
-            model = config.conf["VisionAssistant"]["model_name"]
-            url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
-            ocr_image_prompt = get_prompt_text("ocr_image_extract")
-            payload = {"contents": [{"parts": [{"inline_data": {"mime_type": "image/jpeg", "data": base64.b64encode(img_data).decode('utf-8')}}, {"text": ocr_image_prompt}]}]}
-            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
-            with GeminiHandler._get_opener().open(req, timeout=120) as r:
-                return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
-        return GeminiHandler._call_with_rotation(_logic, image_bytes)
-
-    @staticmethod
-    def upload_and_process_batch(file_path, mime_type, page_count):
-        keys = GeminiHandler._get_api_keys()
-        if not keys: 
-            # Translators: Error message for missing API Keys
-            return [ "ERROR:" + _("No API Keys.") ]
-        model = config.conf["VisionAssistant"]["model_name"]
-        
-        opener = GeminiHandler._get_opener()
-        proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
-        base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
-        
-        for i, key in enumerate(keys):
-            try:
-                f_size = os.path.getsize(file_path)
-                init_url = f"{base_url}/upload/v1beta/files"
-                headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
-                
-                req = request.Request(init_url, data=json.dumps({"file": {"display_name": "batch"}}).encode(), headers=headers, method="POST")
-                with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
-                
-                with open(file_path, 'rb') as f: f_data = f.read()
-                req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
-                with opener.open(req_up, timeout=180) as r:
-                    res = json.loads(r.read().decode())
-                    uri, name = res['file']['uri'], res['file']['name']
-                
-                active = False
-                for attempt in range(30):
-                    req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
-                    with opener.open(req_check, timeout=30) as r:
-                        state = json.loads(r.read().decode()).get('state')
-                        if state == "ACTIVE":
-                            active = True
-                            break
-                        if state == "FAILED":
-                            break
-                    time.sleep(2)
-
-                if not active:
-                    if i < len(keys) - 1:
-                        continue
-                    return [ "ERROR:" + _("Upload failed.") ]
-
-                GeminiHandler._register_file_uri(uri, key)
-                
-                url = f"{base_url}/v1beta/models/{model}:generateContent"
-                prompt = get_prompt_text("ocr_document_extract")
-                contents = [{"parts": [{"file_data": {"mime_type": mime_type, "file_uri": uri}}, {"text": prompt}]}]
-                
-                req_gen = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
-                with opener.open(req_gen, timeout=180) as r:
-                    res = json.loads(r.read().decode())
-                    text = res['candidates'][0]['content']['parts'][0]['text']
-                    return text.split('[[[PAGE_SEP]]]')
-                    
-            except error.HTTPError as e:
-                err_code = GeminiHandler._handle_error(e)
-                if err_code in ["QUOTA_EXCEEDED", "SERVER_ERROR"] and i < len(keys) - 1:
-                    continue
-                if err_code == "QUOTA_EXCEEDED":
-                    # Translators: Message of a dialog which may pop up while performing an AI call
-                    err_msg = _("Error 429: Quota Exceeded (Try later)")
-                elif err_code == "SERVER_ERROR":
-                    # Translators: Message of a dialog which may pop up while performing an AI call
-                    err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
-                else:
-                    err_msg = err_code
-                return ["ERROR:" + err_msg]
-            except Exception as e:
-                return ["ERROR:" + str(e)]
-        return ["ERROR:" + _("All keys failed.")]
-
-    @staticmethod
-    def chat(history, new_msg, file_uri, mime_type):
-        def _logic(key, hist, msg, uri, mime):
-            model = config.conf["VisionAssistant"]["model_name"]
-            proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
-            base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
-            url = f"{base_url}/v1beta/models/{model}:generateContent"
-            
-            contents = list(hist)
-            if uri: 
-                user_parts = [{"file_data": {"mime_type": mime, "file_uri": uri}}]
-            else:
-                user_parts = []
-            user_parts.append({"text": msg})
-            contents.append({"role": "user", "parts": user_parts})
-            
-            req = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
-            with GeminiHandler._get_opener().open(req, timeout=120) as r:
-                return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
-        forced_key = GeminiHandler._get_registered_key(file_uri) if file_uri else None
-        if forced_key:
-            return GeminiHandler._call_with_key(_logic, forced_key, history, new_msg, file_uri, mime_type)
-        return GeminiHandler._call_with_rotation(_logic, history, new_msg, file_uri, mime_type)
-
-    @staticmethod
-    def upload_for_chat(file_path, mime_type):
-        keys = GeminiHandler._get_api_keys()
-        if not keys: return None
-        opener = GeminiHandler._get_opener()
-        proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
-        base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
-        
-        for key in keys:
-            try:
-                f_size = os.path.getsize(file_path)
-                init_url = f"{base_url}/upload/v1beta/files"
-                headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
-                req = request.Request(init_url, data=json.dumps({"file": {"display_name": os.path.basename(file_path)}}).encode(), headers=headers, method="POST")
-                with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
-                with open(file_path, 'rb') as f: f_data = f.read()
-                req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
-                with opener.open(req_up, timeout=180) as r:
-                    res = json.loads(r.read().decode())
-                    uri, name = res['file']['uri'], res['file']['name']
-                for attempt in range(30):
-                    req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
-                    with opener.open(req_check, timeout=30) as r:
-                        state = json.loads(r.read().decode()).get('state')
-                        if state == "ACTIVE":
-                            GeminiHandler._register_file_uri(uri, key)
-                            return uri
-                    time.sleep(2)
-                return None 
-            except: continue 
-        return None
-
-    @staticmethod
-    def generate_speech(text, voice_name):
-        def _logic(key, txt, voice):
-            main_model = config.conf["VisionAssistant"]["model_name"]
-            if "pro" in main_model.lower():
-                tts_model = "gemini-2.5-pro-preview-tts"
-            else:
-                tts_model = "gemini-2.5-flash-preview-tts"
-
-            proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
-            base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
-            url = f"{base_url}/v1beta/models/{tts_model}:generateContent"
-            
-            payload = {
-                "contents": [{"parts": [{"text": txt}]}],
-                "generationConfig": {
-                    "responseModalities": ["AUDIO"],
-                    "speechConfig": {"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": voice}}}
-                }
-            }
-            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
-            with GeminiHandler._get_opener().open(req, timeout=600) as r:
-                res = json.loads(r.read().decode())
-                candidates = res.get('candidates', [])
-                if not candidates: raise Exception("No candidates returned")
-                content = candidates[0].get('content', {})
-                parts = content.get('parts', [])
-                if not parts: raise Exception("No parts in response")
-                part = parts[0]
-                if 'inlineData' in part: return part['inlineData']['data']
-                if 'inline_data' in part: return part['inline_data']['data']
-                if 'text' in part: raise Exception(f"Model refused audio: {part['text']}")
-                raise Exception("Unknown response format")
-        return GeminiHandler._call_with_rotation(_logic, text, voice_name)
+from .services import (
+    ChromeOCREngine,
+    GeminiHandler,
+    GoogleTranslator,
+    SmartProgrammersOCREngine,
+    VirtualDocument,
+    _download_temp_video,
+    get_file_path,
+    get_instagram_download_link,
+    get_mime_type,
+    get_proxy_opener,
+    get_tiktok_download_link,
+    get_twitter_download_link,
+    send_ctrl_v,
+    show_error_dialog,
+)
 
 # --- Update Manager ---
-class UpdateDialog(wx.Dialog):
-    def __init__(self, parent, version, name, changes):
-        # Translators: Title of update confirmation dialog
-        super().__init__(parent, title=_("Update Available"), size=(500, 450))
-        self.Centre()
-        
-        panel = wx.Panel(self)
-        vbox = wx.BoxSizer(wx.VERTICAL)
-        
-        # Translators: Message asking user to update. {version} is version number.
-        msg = _("A new version ({version}) of {name} is available.").format(version=version, name=name)
-        header = wx.StaticText(panel, label=msg)
-        vbox.Add(header, 0, wx.ALL, 15)
-        
-        # Translators: Label for the changes text box
-        change_lbl = wx.StaticText(panel, label=_("Changes:"))
-        vbox.Add(change_lbl, 0, wx.LEFT | wx.RIGHT, 15)
-        
-        self.changes_ctrl = wx.TextCtrl(panel, value=changes, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
-        vbox.Add(self.changes_ctrl, 1, wx.EXPAND | wx.ALL, 15)
-        
-        # Translators: Question to download and install
-        question = wx.StaticText(panel, label=_("Download and Install?"))
-        vbox.Add(question, 0, wx.LEFT | wx.RIGHT | wx.BOTTOM, 15)
-        
-        btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
-        # Translators: Button to accept update
-        self.yes_btn = wx.Button(panel, wx.ID_YES, label=_("&Yes"))
-        # Translators: Button to reject update
-        self.no_btn = wx.Button(panel, wx.ID_NO, label=_("&No"))
-        
-        btn_sizer.Add(self.yes_btn, 0, wx.RIGHT, 10)
-        btn_sizer.Add(self.no_btn, 0)
-        vbox.Add(btn_sizer, 0, wx.ALIGN_RIGHT | wx.ALL, 15)
-        
-        panel.SetSizer(vbox)
-        self.yes_btn.SetDefault()
-        self.yes_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_YES))
-        self.no_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_NO))
-
-class UpdateManager:
-    def __init__(self, repo_name):
-        self.repo_name = repo_name
-        self.current_version = addonHandler.getCodeAddon().manifest['version']
-
-    def check_for_updates(self, silent=True):
-        threading.Thread(target=self._check_thread, args=(silent,), daemon=True).start()
-
-    def _check_thread(self, silent):
-        try:
-            url = f"https://api.github.com/repos/{self.repo_name}/releases/latest"
-            req = request.Request(url, headers={"User-Agent": "NVDA-Addon"})
-            with request.urlopen(req, timeout=60) as response:
-                if response.status == 200:
-                    data = json.loads(response.read().decode('utf-8'))
-                    latest_tag = data.get("tag_name", "").lstrip("v")
-                    if self._compare_versions(latest_tag, self.current_version) > 0:
-                        download_url = None
-                        for asset in data.get("assets", []):
-                            if asset["name"].endswith(".nvda-addon"):
-                                download_url = asset["browser_download_url"]
-                                break
-                        if download_url:
-                            raw_changes = data.get("body", "")
-                            
-                            clean_changes = re.split(r'SHA256|Checklist|---', raw_changes, flags=re.I)[0].strip()
-                            clean_changes = clean_markdown(clean_changes)
-                            
-                            wx.CallAfter(self._prompt_update, latest_tag, download_url, clean_changes)
-                        elif not silent:
-                            # Translators: Error message when an update is found but the addon file is missing from GitHub.
-                            msg = _("Update found but no .nvda-addon file in release.")
-                            show_error_dialog(msg)
-                    elif not silent:
-                        # Translators: Status message informing the user they are already on the latest version.
-                        msg = _("You have the latest version.")
-                        wx.CallAfter(ui.message, msg)
-        except Exception as e:
-            if not silent:
-                msg = _("Update check failed: {error}").format(error=e)
-                show_error_dialog(msg)
-
-    def _compare_versions(self, v1, v2):
-        try:
-            parts1 = [int(x) for x in v1.split('.')]
-            parts2 = [int(x) for x in v2.split('.')]
-            return (parts1 > parts2) - (parts1 < parts2)
-        except: return 0 if v1 == v2 else 1
-
-    def _prompt_update(self, version, url, changes):
-        dlg = UpdateDialog(gui.mainFrame, version, ADDON_NAME, changes)
-        if dlg.ShowModal() == wx.ID_YES:
-            threading.Thread(target=self._download_install_worker, args=(url,), daemon=True).start()
-        dlg.Destroy()
-
-    def _download_install_worker(self, url):
-        try:
-            # Translators: Message shown while downloading update
-            msg = _("Downloading update...")
-            wx.CallAfter(ui.message, msg)
-            temp_dir = tempfile.gettempdir()
-            file_path = os.path.join(temp_dir, "VisionAssistant_Update.nvda-addon")
-            with request.urlopen(url) as response, open(file_path, 'wb') as out_file:
-                out_file.write(response.read())
-            wx.CallAfter(os.startfile, file_path)
-        except Exception as e:
-            # Translators: Error message for download failure
-            msg = _("Download failed: {error}").format(error=e)
-            show_error_dialog(msg)
-
-# --- UI Classes ---
-
-
-class VisionQADialog(wx.Dialog):
-    def __init__(self, parent, title, initial_text, context_data, callback_fn, extra_info=None, raw_content=None, status_callback=None, announce_on_open=True, allow_questions=True):
-        super(VisionQADialog, self).__init__(parent, title=title, size=(550, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
-        self.context_data = context_data 
-        self.callback_fn = callback_fn
-        self.extra_info = extra_info
-        self.chat_history = [] 
-        self.raw_content = raw_content
-        self.status_callback = status_callback
-        self.announce_on_open = announce_on_open
-        self.allow_questions = allow_questions
-        
-        mainSizer = wx.BoxSizer(wx.VERTICAL)
-        # Translators: Label for the AI response text area in a chat dialog
-        lbl_text = _("AI Response:")
-        lbl = wx.StaticText(self, label=lbl_text)
-        mainSizer.Add(lbl, 0, wx.ALL, 5)
-        self.outputArea = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY)
-        mainSizer.Add(self.outputArea, 1, wx.EXPAND | wx.ALL, 5)
-        
-        self.should_clean = config.conf["VisionAssistant"]["clean_markdown_chat"]
-        display_text = clean_markdown(initial_text) if self.should_clean else initial_text
-        if display_text:
-            # Translators: Format for displaying AI message in a chat dialog
-            init_msg = _("AI: {text}\n").format(text=display_text)
-            self.outputArea.AppendText(init_msg)
-            if config.conf["VisionAssistant"]["copy_to_clipboard"]:
-                api.copyToClip(raw_content if raw_content else display_text)
-        
-        if not (extra_info and extra_info.get('skip_init_history')):
-             self.chat_history.append({"role": "model", "parts": [{"text": initial_text}]})
-
-        self.inputArea = None
-        if allow_questions:
-            # Translators: Label for user input field in a chat dialog
-            ask_text = _("Ask:")
-            inputLbl = wx.StaticText(self, label=ask_text)
-            mainSizer.Add(inputLbl, 0, wx.ALL, 5)
-            self.inputArea = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
-            mainSizer.Add(self.inputArea, 0, wx.EXPAND | wx.ALL, 5)
-        
-        btnSizer = wx.BoxSizer(wx.HORIZONTAL)
-        self.askBtn = None
-        if allow_questions:
-            # Translators: Button to send message in a chat dialog
-            self.askBtn = wx.Button(self, label=_("Send"))
-        # Translators: Button to view the content in a formatted HTML window
-        self.viewBtn = wx.Button(self, label=_("View Formatted"))
-        self.viewBtn.Bind(wx.EVT_BUTTON, self.onView)
-        # Translators: Button to save only the result content without chat history
-        self.saveContentBtn = wx.Button(self, label=_("Save Content"))
-        self.saveContentBtn.Bind(wx.EVT_BUTTON, self.onSaveContent)
-        # Translators: Button to save chat in a chat dialog
-        self.saveBtn = wx.Button(self, label=_("Save Chat"))
-        # Translators: Button to close chat dialog
-        self.closeBtn = wx.Button(self, wx.ID_CANCEL, label=_("Close"))
-        
-        self.saveBtn.Enable(bool(initial_text.strip()))
-        self.viewBtn.Enable(bool(self.raw_content))
-        self.saveContentBtn.Enable(bool(self.raw_content))
-
-        if self.askBtn:
-            btnSizer.Add(self.askBtn, 0, wx.ALL, 5)
-        btnSizer.Add(self.viewBtn, 0, wx.ALL, 5)
-        btnSizer.Add(self.saveContentBtn, 0, wx.ALL, 5)
-        btnSizer.Add(self.saveBtn, 0, wx.ALL, 5)
-        btnSizer.Add(self.closeBtn, 0, wx.ALL, 5)
-        mainSizer.Add(btnSizer, 0, wx.ALIGN_RIGHT)
-        
-        self.SetSizer(mainSizer)
-        if self.inputArea:
-            self.inputArea.SetFocus()
-        else:
-            self.outputArea.SetFocus()
-        if self.askBtn:
-            self.askBtn.Bind(wx.EVT_BUTTON, self.onAsk)
-        self.saveBtn.Bind(wx.EVT_BUTTON, self.onSave)
-        if self.inputArea:
-            self.inputArea.Bind(wx.EVT_TEXT_ENTER, self.onAsk)
-        if display_text and self.announce_on_open:
-            wx.CallLater(300, ui.message, display_text)
-
-    def onAsk(self, event):
-        if not self.inputArea:
-            return
-        question = self.inputArea.Value
-        if not question.strip(): return
-        # Translators: Format for displaying User message in a chat dialog
-        user_msg = _("\nYou: {text}\n").format(text=question)
-        self.outputArea.AppendText(user_msg)
-        self.inputArea.Clear()
-        # Translators: Message shown while processing in a chat dialog
-        msg = _("Thinking...")
-        ui.message(msg)
-        threading.Thread(target=self.process_question, args=(question,), daemon=True).start()
-
-    def process_question(self, question):
-        result_tuple = self.callback_fn(self.context_data, question, self.chat_history, self.extra_info)
-        response_text, _ = result_tuple
-        if response_text:
-            if not (self.extra_info and self.extra_info.get('file_context')):
-                 self.chat_history.append({"role": "user", "parts": [{"text": question}]})
-                 self.chat_history.append({"role": "model", "parts": [{"text": response_text}]})
-            final_text = clean_markdown(response_text) if self.should_clean else response_text
-            wx.CallAfter(self.update_response, final_text, response_text)
-
-    def update_response(self, display_text, raw_text=None):
-        if raw_text:
-            self.raw_content = raw_text
-            self.viewBtn.Enable(True)
-            self.saveContentBtn.Enable(True)
-        # Translators: Format for displaying AI message in a chat dialog
-        ai_msg = _("AI: {text}\n").format(text=display_text)
-        self.outputArea.AppendText(ai_msg)
-        self.saveBtn.Enable(True)
-        if config.conf["VisionAssistant"]["copy_to_clipboard"]:
-            api.copyToClip(raw_text if raw_text else display_text)
-        self.outputArea.ShowPosition(self.outputArea.GetLastPosition())
-        ui.message(display_text)
-
-    def report_save(self, msg):
-        if self.status_callback: self.status_callback(msg)
-        else: ui.message(msg)
-
-    def onView(self, event):
-        full_html = ""
-        # Translators: Format for displaying User message in a chat dialog
-        user_label = _("\nYou: {text}\n").format(text="").strip()
-        # Translators: Format for displaying AI message in a chat dialog
-        ai_label = _("AI: {text}\n").format(text="").strip()
-        
-        if self.chat_history:
-            for item in self.chat_history:
-                role = item.get("role", "")
-                text = item.get("parts", [{}])[0].get("text", "")
-                if role == "user":
-                    safe_text = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
-                    full_html += f"<h2>{user_label}</h2><p>{safe_text}</p>"
-                elif role == "model":
-                    formatted_text = markdown_to_html(text, full_page=False)
-                    full_html += f"<h2>{ai_label}</h2>{formatted_text}<hr>"
-        
-        if not full_html and self.raw_content:
-             formatted_text = markdown_to_html(self.raw_content, full_page=False)
-             full_html += f"<h2>{ai_label}</h2>{formatted_text}"
-
-        if not full_html: return
-        try:
-            # Translators: Title of the formatted result window
-            ui.browseableMessage(full_html, _("Formatted Conversation"), isHtml=True)
-        except Exception as e:
-            # Translators: Error message if viewing fails
-            msg = _("Error displaying content: {error}").format(error=e)
-            show_error_dialog(msg)
-
-    def onSave(self, event):
-        # Translators: Save dialog title
-        path = get_file_path(_("Save Chat Log"), "Text files (*.txt)|*.txt", mode="save")
-        if path:
-            try:
-                with open(path, "w", encoding="utf-8") as f: f.write(self.outputArea.GetValue())
-                # Translators: Message shown on successful save of a file.
-                self.report_save(_("Saved."))
-            except Exception as e:
-                # Translators: Message in the error dialog when saving fails.
-                msg = _("Save failed: {error}").format(error=e)
-                show_error_dialog(msg)
-
-    def onSaveContent(self, event):
-        # Translators: Save dialog title
-        path = get_file_path(_("Save Result"), "HTML files (*.html)|*.html", mode="save")
-        if path:
-            try:
-                full_html = markdown_to_html(self.raw_content, full_page=True)
-                with open(path, "w", encoding="utf-8") as f: f.write(full_html)
-                # Translators: Message on successful save
-                self.report_save(_("Saved."))
-            except Exception as e:
-                # Translators: Message in the error dialog when saving fails.
-                msg = _("Save failed: {error}").format(error=e)
-                show_error_dialog(msg)
-
-class SettingsPanel(gui.settingsDialogs.SettingsPanel):
-    title = ADDON_NAME
-    def makeSettings(self, settingsSizer):
-        # --- Connection Group ---
-        # Translators: Title of the settings group for connection and updates
-        groupLabel = _("Connection")
-        self.connectionBox = wx.StaticBox(self, label=groupLabel)
-        connectionSizer = wx.StaticBoxSizer(self.connectionBox, wx.VERTICAL)
-        cHelper = gui.guiHelper.BoxSizerHelper(self.connectionBox, sizer=connectionSizer)
-
-        # Translators: Label for API Key input
-        apiLabel = wx.StaticText(self.connectionBox, label=_("Gemini API Key (Separate multiple keys with comma or newline):"))
-        cHelper.addItem(apiLabel)
-        
-        api_value = config.conf["VisionAssistant"]["api_key"]
-        
-        self.apiKeyCtrl_hidden = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_PASSWORD, size=(-1, -1))
-        
-        self.apiKeyCtrl_visible = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_MULTILINE | wx.TE_DONTWRAP, size=(-1, 60))
-        self.apiKeyCtrl_visible.Hide()
-        
-        cHelper.addItem(self.apiKeyCtrl_hidden)
-        cHelper.addItem(self.apiKeyCtrl_visible)
-        
-        # Translators: Checkbox to toggle API Key visibility
-        self.showApiCheck = wx.CheckBox(self.connectionBox, label=_("Show API Key"))
-        self.showApiCheck.Bind(wx.EVT_CHECKBOX, self.onToggleApiVisibility)
-        cHelper.addItem(self.showApiCheck)
-        
-        model_display_names = [opt[0] for opt in MODELS]
-        # Translators: Label for Model selection
-        self.model = cHelper.addLabeledControl(_("AI Model:"), wx.Choice, choices=model_display_names)
-        current_id = config.conf["VisionAssistant"]["model_name"]
-        try:
-            index = next(i for i, v in enumerate(MODELS) if v[1] == current_id)
-            self.model.SetSelection(index)
-        except StopIteration: self.model.SetSelection(0)
-
-        # Translators: Label for Proxy URL input
-        self.proxyUrl = cHelper.addLabeledControl(_("Proxy URL:"), wx.TextCtrl)
-        self.proxyUrl.Value = config.conf["VisionAssistant"]["proxy_url"]
-
-        # Translators: Checkbox to enable/disable automatic update checks on NVDA startup
-        self.checkUpdateStartup = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Check for updates on startup")))
-        self.checkUpdateStartup.Value = config.conf["VisionAssistant"]["check_update_startup"]
-        # Translators: Checkbox to toggle markdown cleaning in chat windows
-        self.cleanMarkdown = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Clean Markdown in Chat")))
-        self.cleanMarkdown.Value = config.conf["VisionAssistant"]["clean_markdown_chat"]
-        # Translators: Checkbox to enable copying AI responses to clipboard
-        self.copyToClipboard = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Copy AI responses to clipboard")))
-        self.copyToClipboard.Value = config.conf["VisionAssistant"]["copy_to_clipboard"]
-        # Translators: Checkbox to skip chat window and only speak AI responses
-        self.skipChatDialog = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Direct Output (No Chat Window)")))
-        self.skipChatDialog.Value = config.conf["VisionAssistant"]["skip_chat_dialog"]
-        settingsSizer.Add(connectionSizer, 0, wx.EXPAND | wx.ALL, 5)
-
-        # --- Translation Languages Group ---
-        # Translators: Title of the settings group for translation languages configuration
-        groupLabel = _("Translation Languages")
-        langBox = wx.StaticBox(self, label=groupLabel)
-        langSizer = wx.StaticBoxSizer(langBox, wx.VERTICAL)
-        lHelper = gui.guiHelper.BoxSizerHelper(langBox, sizer=langSizer)
-
-        # Translators: Label for Source Language selection
-        self.sourceLang = lHelper.addLabeledControl(_("Source:"), wx.Choice, choices=SOURCE_NAMES)
-        try: self.sourceLang.SetSelection(SOURCE_NAMES.index(config.conf["VisionAssistant"]["source_language"]))
-        except: self.sourceLang.SetSelection(0)
-        
-        # Translators: Label for Target Language selection
-        self.targetLang = lHelper.addLabeledControl(_("Target:"), wx.Choice, choices=TARGET_NAMES)
-        try: self.targetLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["target_language"]))
-        except: self.targetLang.SetSelection(0)
-        
-        # Translators: Label for AI Response Language selection
-        self.aiResponseLang = lHelper.addLabeledControl(_("AI Response:"), wx.Choice, choices=TARGET_NAMES)
-        try: self.aiResponseLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["ai_response_language"]))
-        except: self.aiResponseLang.SetSelection(0)
-
-        # Translators: Checkbox for Smart Swap feature
-        self.smartSwap = lHelper.addItem(wx.CheckBox(langBox, label=_("Smart Swap")))
-        self.smartSwap.Value = config.conf["VisionAssistant"]["smart_swap"]
-        settingsSizer.Add(langSizer, 0, wx.EXPAND | wx.ALL, 5)
-
-        # --- Document Reader Settings ---
-        # Translators: Title of settings group for Document Reader features
-        groupLabel = _("Document Reader")
-        docBox = wx.StaticBox(self, label=groupLabel)
-        docSizer = wx.StaticBoxSizer(docBox, wx.VERTICAL)
-        dHelper = gui.guiHelper.BoxSizerHelper(docBox, sizer=docSizer)
-
-        # Translators: Label for OCR Engine selection
-        self.ocr_sel = dHelper.addLabeledControl(_("OCR Engine:"), wx.Choice, choices=[x[0] for x in OCR_ENGINES])
-        curr_ocr = config.conf["VisionAssistant"]["ocr_engine"]
-        try:
-            o_idx = next(i for i, v in enumerate(OCR_ENGINES) if v[1] == curr_ocr)
-            self.ocr_sel.SetSelection(o_idx)
-        except: self.ocr_sel.SetSelection(0)
-
-        voice_choices = [f"{v[0]} - {v[1]}" for v in GEMINI_VOICES]
-        # Translators: Label for TTS Voice selection
-        self.voice_sel = dHelper.addLabeledControl(_("TTS Voice:"), wx.Choice, choices=voice_choices)
-        curr_voice = config.conf["VisionAssistant"]["tts_voice"]
-        try:
-            v_idx = next(i for i, v in enumerate(GEMINI_VOICES) if v[0] == curr_voice)
-            self.voice_sel.SetSelection(v_idx)
-        except: self.voice_sel.SetSelection(1)
-        settingsSizer.Add(docSizer, 0, wx.EXPAND | wx.ALL, 5)
-
-        # --- CAPTCHA Group ---
-        # Translators: Title of the settings group for CAPTCHA options
-        groupLabel = _("CAPTCHA")
-        capBox = wx.StaticBox(self, label=groupLabel)
-        capSizer = wx.StaticBoxSizer(capBox, wx.VERTICAL)
-        capHelper = gui.guiHelper.BoxSizerHelper(capBox, sizer=capSizer)
-        # Translators: Label for CAPTCHA capture method selection
-        self.captchaMode = capHelper.addLabeledControl(_("Capture Method:"), wx.Choice, choices=[
-            # Translators: A choice for capture method. Captures only the specific object under the NVDA navigator cursor.
-            _("Navigator Object"),
-            # Translators: A choice for capture method. Captures the entire visible screen area.
-            _("Full Screen")
-        ])
-        self.captchaMode.SetSelection(0 if config.conf["VisionAssistant"]["captcha_mode"] == 'navigator' else 1)
-        settingsSizer.Add(capSizer, 0, wx.EXPAND | wx.ALL, 5)
-
-        self.defaultPromptItems = get_configured_default_prompts()
-        self.customPromptItems = load_configured_custom_prompts()
-
-        # --- Prompt Manager Group ---
-        # Translators: Title of the settings group for prompt management
-        groupLabel = _("Prompts")
-        promptsBox = wx.StaticBox(self, label=groupLabel)
-        promptsSizer = wx.StaticBoxSizer(promptsBox, wx.VERTICAL)
-        pHelper = gui.guiHelper.BoxSizerHelper(promptsBox, sizer=promptsSizer)
-        # Translators: Description for the prompt manager button.
-        pHelper.addItem(wx.StaticText(promptsBox, label=_("Manage default and custom prompts.")))
-        # Translators: Button label to open prompt manager dialog.
-        self.managePromptsBtn = wx.Button(promptsBox, label=_("Manage Prompts..."))
-        self.managePromptsBtn.Bind(wx.EVT_BUTTON, self.onManagePrompts)
-        pHelper.addItem(self.managePromptsBtn)
-        self.promptsSummary = wx.StaticText(promptsBox)
-        pHelper.addItem(self.promptsSummary)
-        self._refreshPromptSummary()
-        settingsSizer.Add(promptsSizer, 0, wx.EXPAND | wx.ALL, 5)
-
-    def _refreshPromptSummary(self):
-        # Translators: Summary text for prompt counts in settings.
-        summary = _("Default prompts: {defaultCount}, Custom prompts: {customCount}").format(
-            defaultCount=len(self.defaultPromptItems),
-            customCount=len(self.customPromptItems),
-        )
-        self.promptsSummary.SetLabel(summary)
-
-    def onManagePrompts(self, event):
-        top = wx.GetTopLevelParent(self)
-        dlg = PromptManagerDialog(
-            self,
-            self.defaultPromptItems,
-            self.customPromptItems,
-            PROMPT_VARIABLES_GUIDE,
-        )
-        try:
-            if dlg.ShowModal() == wx.ID_OK:
-                self.defaultPromptItems = dlg.get_default_items()
-                self.customPromptItems = dlg.get_custom_items()
-                self._refreshPromptSummary()
-        finally:
-            dlg.Destroy()
-            if top:
-                top.Enable(True)
-                top.SetFocus()
-
-    def onToggleApiVisibility(self, event):
-        if self.showApiCheck.IsChecked():
-            self.apiKeyCtrl_visible.SetValue(self.apiKeyCtrl_hidden.GetValue())
-            self.apiKeyCtrl_hidden.Hide()
-            self.apiKeyCtrl_visible.Show()
-        else:
-            self.apiKeyCtrl_hidden.SetValue(self.apiKeyCtrl_visible.GetValue())
-            self.apiKeyCtrl_visible.Hide()
-            self.apiKeyCtrl_hidden.Show()
-        
-        self.connectionBox.GetParent().Layout()
-
-    def onSave(self):
-        val = self.apiKeyCtrl_visible.GetValue() if self.showApiCheck.IsChecked() else self.apiKeyCtrl_hidden.GetValue()
-        config.conf["VisionAssistant"]["api_key"] = val.strip()
-        config.conf["VisionAssistant"]["model_name"] = MODELS[self.model.GetSelection()][1]
-        config.conf["VisionAssistant"]["proxy_url"] = self.proxyUrl.Value.strip()
-        config.conf["VisionAssistant"]["source_language"] = SOURCE_NAMES[self.sourceLang.GetSelection()]
-        config.conf["VisionAssistant"]["target_language"] = TARGET_NAMES[self.targetLang.GetSelection()]
-        config.conf["VisionAssistant"]["ai_response_language"] = TARGET_NAMES[self.aiResponseLang.GetSelection()]
-        config.conf["VisionAssistant"]["smart_swap"] = self.smartSwap.Value
-        config.conf["VisionAssistant"]["check_update_startup"] = self.checkUpdateStartup.Value
-        config.conf["VisionAssistant"]["clean_markdown_chat"] = self.cleanMarkdown.Value
-        config.conf["VisionAssistant"]["copy_to_clipboard"] = self.copyToClipboard.Value
-        config.conf["VisionAssistant"]["skip_chat_dialog"] = self.skipChatDialog.Value
-        config.conf["VisionAssistant"]["captcha_mode"] = 'navigator' if self.captchaMode.GetSelection() == 0 else 'fullscreen'
-        config.conf["VisionAssistant"]["custom_prompts_v2"] = serialize_custom_prompts_v2(self.customPromptItems)
-        config.conf["VisionAssistant"]["custom_prompts"] = ""
-        config.conf["VisionAssistant"]["default_refine_prompts"] = serialize_default_prompt_overrides(self.defaultPromptItems)
-        config.conf["VisionAssistant"]["ocr_engine"] = OCR_ENGINES[self.ocr_sel.GetSelection()][1]
-        config.conf["VisionAssistant"]["tts_voice"] = GEMINI_VOICES[self.voice_sel.GetSelection()][0]
-
-class RangeDialog(wx.Dialog):
-    def __init__(self, parent, total_pages):
-        # Translators: Title of the PDF options dialog
-        super().__init__(parent, title=_("Options"), size=(350, 320))
-        sizer = wx.BoxSizer(wx.VERTICAL)
-        # Translators: Label showing total pages found
-        sizer.Add(wx.StaticText(self, label=_("Total Pages (All Files): {count}").format(count=total_pages)), 0, wx.ALL, 10)
-        
-        # Translators: Box title for page range selection
-        box_range = wx.StaticBoxSizer(wx.VERTICAL, self, _("Range"))
-        g_sizer = wx.FlexGridSizer(2, 2, 10, 10)
-        # Translators: Label for start page
-        g_sizer.Add(wx.StaticText(self, label=_("From:")), 0, wx.ALIGN_CENTER_VERTICAL)
-        self.spin_from = wx.SpinCtrl(self, min=1, max=total_pages, initial=1)
-        g_sizer.Add(self.spin_from, 1, wx.EXPAND)
-        # Translators: Label for end page
-        g_sizer.Add(wx.StaticText(self, label=_("To:")), 0, wx.ALIGN_CENTER_VERTICAL)
-        self.spin_to = wx.SpinCtrl(self, min=1, max=total_pages, initial=total_pages)
-        g_sizer.Add(self.spin_to, 1, wx.EXPAND)
-        box_range.Add(g_sizer, 1, wx.EXPAND | wx.ALL, 5)
-        sizer.Add(box_range, 0, wx.EXPAND | wx.ALL, 10)
-
-        # Translators: Box title for translation options
-        box_trans = wx.StaticBoxSizer(wx.VERTICAL, self, _("Translation"))
-        # Translators: Checkbox to enable translation
-        self.chk_trans = wx.CheckBox(self, label=_("Translate Output"))
-        box_trans.Add(self.chk_trans, 0, wx.ALL, 5)
-        h_sizer = wx.BoxSizer(wx.HORIZONTAL)
-        # Translators: Label for target language
-        h_sizer.Add(wx.StaticText(self, label=_("Target:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
-        self.cmb_lang = wx.Choice(self, choices=TARGET_NAMES)
-        self.cmb_lang.SetSelection(0)
-        h_sizer.Add(self.cmb_lang, 1)
-        box_trans.Add(h_sizer, 1, wx.EXPAND | wx.ALL, 5)
-        sizer.Add(box_trans, 0, wx.EXPAND | wx.ALL, 10)
-        
-        btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
-        # Translators: Button to start processing
-        btn_ok = wx.Button(self, wx.ID_OK, label=_("Start"))
-        btn_ok.SetDefault()
-        # Translators: Button to cancel
-        btn_cancel = wx.Button(self, wx.ID_CANCEL, label=_("Cancel"))
-        btn_sizer.Add(btn_ok, 0, wx.RIGHT, 10)
-        btn_sizer.Add(btn_cancel, 0)
-        sizer.Add(btn_sizer, 0, wx.ALIGN_CENTER | wx.ALL, 10)
-        self.SetSizer(sizer)
-
-        self.chk_trans.Bind(wx.EVT_CHECKBOX, self.on_check)
-        self.cmb_lang.Disable()
-
-    def on_check(self, event):
-        self.cmb_lang.Enable(self.chk_trans.IsChecked())
-
-    def get_settings(self):
-        return {
-            'start': self.spin_from.GetValue() - 1,
-            'end': self.spin_to.GetValue() - 1,
-            'translate': self.chk_trans.IsChecked(),
-            'lang': TARGET_NAMES[self.cmb_lang.GetSelection()]
-        }
-
-class ChatDialog(wx.Dialog):
-    instance = None
-
-    def __init__(self, parent, file_path):
-        # Translators: Title of the chat dialog
-        super().__init__(parent, title=_("Ask about Document"), size=(600, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
-        ChatDialog.instance = self
-        self.file_path = file_path
-        self.file_uri = None
-        self.mime_type = get_mime_type(file_path)
-        self.history = []
-        
-        sizer = wx.BoxSizer(wx.VERTICAL)
-        # Translators: Label showing the analyzed file name
-        lbl_info = wx.StaticText(self, label=_("File: {name}").format(name=os.path.basename(file_path)))
-        sizer.Add(lbl_info, 0, wx.ALL, 5)
-        self.display = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
-        sizer.Add(self.display, 1, wx.EXPAND | wx.ALL, 10)
-        # Translators: Status message while uploading
-        self.display.SetValue(_("Uploading to Gemini...\n"))
-        
-        input_sizer = wx.BoxSizer(wx.HORIZONTAL)
-        # Translators: Label for the chat input field
-        input_sizer.Add(wx.StaticText(self, label=_("Your Question:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
-        self.input = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
-        self.input.Bind(wx.EVT_TEXT_ENTER, self.on_send)
-        input_sizer.Add(self.input, 1, wx.EXPAND | wx.RIGHT, 5)
-        
-        # Translators: Button to send message
-        self.btn_send = wx.Button(self, label=_("Send"))
-        self.btn_send.Bind(wx.EVT_BUTTON, self.on_send)
-        self.btn_send.Disable()
-        input_sizer.Add(self.btn_send, 0)
-        sizer.Add(input_sizer, 0, wx.EXPAND | wx.ALL, 10)
-        self.SetSizer(sizer)
-        self.Bind(wx.EVT_CLOSE, self.on_close)
-        threading.Thread(target=self.init_upload, daemon=True).start()
-
-    def on_close(self, event):
-        ChatDialog.instance = None
-        self.Destroy()
-
-    def init_upload(self):
-        uri = GeminiHandler.upload_for_chat(self.file_path, self.mime_type)
-        if uri and not str(uri).startswith("ERROR:"):
-            self.file_uri = uri
-            wx.CallAfter(self.on_ready)
-        else:
-            err_msg = str(uri)[6:] if uri else _("Upload failed.")
-            wx.CallAfter(show_error_dialog, err_msg)
-            wx.CallAfter(self.Close)
-
-    def on_ready(self):
-        # Translators: Message when ready to chat
-        self.display.AppendText(_("Ready! Ask your questions.\n"))
-        self.btn_send.Enable()
-        self.input.SetFocus()
-
-    def on_send(self, event):
-        msg = self.input.GetValue().strip()
-        if not msg: return
-        self.input.Clear()
-        self.display.AppendText(f"You: {msg}\n")
-        # Translators: Message showing AI is thinking
-        ui.message(_("Thinking..."))
-        threading.Thread(target=self.do_chat, args=(msg,), daemon=True).start()
-
-    def do_chat(self, msg):
-        resp = GeminiHandler.chat(self.history, msg, self.file_uri, self.mime_type)
-        
-        if str(resp).startswith("ERROR:"):
-            show_error_dialog(resp[6:])
-            if _vision_assistant_instance:
-                # Translators: Initial status when the add-on is doing nothing
-                _vision_assistant_instance.current_status = _("Idle")
-            return
-
-        self.history.append({"role": "user", "parts": [{"text": msg}]})
-        self.history.append({"role": "model", "parts": [{"text": resp}]})
-        wx.CallAfter(self.display.AppendText, f"AI: {resp}\n\n")
-        # Translators: Spoken prefix for AI response
-        wx.CallAfter(ui.message, _("AI: ") + resp)
-
-class DocumentViewerDialog(wx.Dialog):
-    def __init__(self, parent, virtual_doc, settings):
-        # Translators: Title of the Document Reader window.
-        title_text = f"{ADDON_NAME} - {_('Document Reader')}"
-        super().__init__(parent, title=title_text, size=(800, 600), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.MAXIMIZE_BOX)
-        self.v_doc = virtual_doc
-        self.start_page = settings['start']
-        self.end_page = settings['end']
-        self.do_translate = settings['translate']
-        self.target_lang = settings['lang']
-        self.range_count = self.end_page - self.start_page + 1
-        self.page_cache = {}
-        self.current_page = self.start_page
-        self.thread_pool = ThreadPoolExecutor(max_workers=5)
-        
-        self.init_ui()
-        self.Centre()
-        threading.Thread(target=self.start_auto_processing, daemon=True).start()
-
-    def init_ui(self):
-        panel = wx.Panel(self)
-        vbox = wx.BoxSizer(wx.VERTICAL)
-        # Translators: Initial status message
-        self.lbl_status = wx.StaticText(panel, label=_("Initializing..."))
-        vbox.Add(self.lbl_status, 0, wx.ALL, 5)
-        self.txt_content = wx.TextCtrl(panel, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
-        vbox.Add(self.txt_content, 1, wx.EXPAND | wx.LEFT | wx.RIGHT, 10)
-        hbox_nav = wx.BoxSizer(wx.HORIZONTAL)
-        # Translators: Button to go to previous page
-        self.btn_prev = wx.Button(panel, label=_("Previous (Ctrl+PageUp)"))
-        self.btn_prev.Bind(wx.EVT_BUTTON, self.on_prev)
-        hbox_nav.Add(self.btn_prev, 0, wx.RIGHT, 5)
-        # Translators: Button to go to next page
-        self.btn_next = wx.Button(panel, label=_("Next (Ctrl+PageDown)"))
-        self.btn_next.Bind(wx.EVT_BUTTON, self.on_next)
-        hbox_nav.Add(self.btn_next, 0, wx.RIGHT, 15)
-        # Translators: Label for Go To Page
-        hbox_nav.Add(wx.StaticText(panel, label=_("Go to:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
-        choices = [str(i+1) for i in range(self.start_page, self.end_page + 1)]
-        self.cmb_pages = wx.Choice(panel, choices=choices)
-        self.cmb_pages.Bind(wx.EVT_CHOICE, self.on_page_select)
-        hbox_nav.Add(self.cmb_pages, 0, wx.RIGHT, 15)
-        vbox.Add(hbox_nav, 0, wx.ALIGN_CENTER | wx.ALL, 10)
-        hbox_actions = wx.BoxSizer(wx.HORIZONTAL)
-        # Translators: Button to Ask questions about the document
-        self.btn_ask = wx.Button(panel, label=_("Ask AI (Alt+A)"))
-        self.btn_ask.Bind(wx.EVT_BUTTON, self.on_ask)
-        hbox_actions.Add(self.btn_ask, 0, wx.RIGHT, 5)
-        
-        # Translators: Button to force re-scan
-        self.btn_gemini = wx.Button(panel, label=_("Re-scan with Gemini (Alt+R)"))
-        self.btn_gemini.Bind(wx.EVT_BUTTON, self.on_gemini_scan)
-        hbox_actions.Add(self.btn_gemini, 0, wx.RIGHT, 5)
-        
-        # Translators: Button to generate audio
-        self.btn_tts = wx.Button(panel, label=_("Generate Audio (Alt+G)"))
-        self.btn_tts.Bind(wx.EVT_BUTTON, self.on_tts)
-        hbox_actions.Add(self.btn_tts, 0, wx.RIGHT, 5)
-
-        # Translators: Button to view formatted content
-        self.btn_view = wx.Button(panel, label=_("View Formatted"))
-        self.btn_view.Bind(wx.EVT_BUTTON, self.on_view)
-        hbox_actions.Add(self.btn_view, 0, wx.RIGHT, 5)
-
-        # Translators: Button to save text
-        self.btn_save = wx.Button(panel, label=_("Save (Alt+S)"))
-        self.btn_save.Bind(wx.EVT_BUTTON, self.on_save_all)
-        hbox_actions.Add(self.btn_save, 0)
-        
-        vbox.Add(hbox_actions, 0, wx.ALIGN_CENTER | wx.ALL, 5)
-        btn_close = wx.Button(panel, wx.ID_CLOSE, label=_("Close"))
-        btn_close.Bind(wx.EVT_BUTTON, lambda e: self.Destroy())
-        vbox.Add(btn_close, 0, wx.ALIGN_RIGHT | wx.ALL, 10)
-        panel.SetSizer(vbox)
-        accel_tbl = wx.AcceleratorTable([
-            (wx.ACCEL_CTRL, wx.WXK_PAGEDOWN, self.btn_next.GetId()),
-            (wx.ACCEL_CTRL, wx.WXK_PAGEUP, self.btn_prev.GetId()),
-            (wx.ACCEL_CTRL, ord('S'), self.btn_save.GetId()),
-            (wx.ACCEL_ALT, ord('S'), self.btn_save.GetId()),
-            (wx.ACCEL_ALT, ord('A'), self.btn_ask.GetId()),
-            (wx.ACCEL_ALT, ord('R'), self.btn_gemini.GetId()),
-            (wx.ACCEL_ALT, ord('G'), self.btn_tts.GetId())
-        ])
-        self.SetAcceleratorTable(accel_tbl)
-        self.cmb_pages.SetSelection(0)
-        self.update_view()
-        self.txt_content.SetFocus()
-
-    def start_auto_processing(self):
-        engine = config.conf["VisionAssistant"]["ocr_engine"]
-        
-        if engine == 'gemini':
-            threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
-        else:
-            for i in range(self.start_page, self.end_page + 1):
-                self.thread_pool.submit(self.process_page_worker, i)
-
-    def process_page_worker(self, page_num):
-        if page_num in self.page_cache: return
-        text = self._get_page_text_logic(page_num)
-        self.page_cache[page_num] = text
-        if page_num == self.current_page:
-            wx.CallAfter(self.update_view)
-            # Translators: Spoken message when the current page is ready
-            wx.CallAfter(ui.message, _("Page {num} ready").format(num=page_num + 1))
-
-    def _get_page_text_logic(self, page_num):
-        file_path, page_idx = self.v_doc.get_page_info(page_num)
-        if not file_path: return ""
-        try:
-            doc = fitz.open(file_path)
-            page = doc.load_page(page_idx)
-            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-            img_bytes = pix.tobytes("jpg")
-            doc.close()
-            engine = config.conf["VisionAssistant"]["ocr_engine"]
-            text = None
-            if engine == 'gemini':
-                try: text = GeminiHandler.ocr_page(img_bytes)
-                except: text = None
-            if not text or not text.strip() or engine == 'chrome':
-                text = ChromeOCREngine.recognize(img_bytes)
-                if not text or not text.strip():
-                    text = SmartProgrammersOCREngine.recognize(img_bytes)
-            if not text or not text.strip():
-                # Translators: Placeholder text when OCR fails
-                text = _("[OCR failed. Try Gemini Re-scan.]")
-            if self.do_translate and text and "[OCR failed" not in text:
-                if engine == 'gemini':
-                    text = GeminiHandler.translate(text, self.target_lang)
-                else:
-                    text = GoogleTranslator.translate(text, self.target_lang)
-            return text
-        except: 
-            # Translators: Error message for page processing failure
-            return _("Error processing page.")
-
-    def update_view(self):
-        rel_page = self.current_page - self.start_page + 1
-        # Translators: Status label format
-        self.lbl_status.SetLabel(_("Page {current} of {total}").format(current=rel_page, total=self.range_count))
-        if self.current_page in self.page_cache:
-            self.txt_content.SetValue(self.page_cache[self.current_page])
-            self.txt_content.SetInsertionPoint(0)
-            self.txt_content.SetFocus()
-        else:
-            # Translators: Status when page is loading
-            self.txt_content.SetValue(_("Processing in background..."))
-            self.txt_content.SetInsertionPoint(0)
-            self.txt_content.SetFocus()
-        self.btn_prev.Enable(self.current_page > self.start_page)
-        self.btn_next.Enable(self.current_page < self.end_page)
-
-    def load_page(self, page_num):
-        if page_num < self.start_page or page_num > self.end_page: return
-        self.current_page = page_num
-        self.cmb_pages.SetSelection(page_num - self.start_page)
-        # Translators: Spoken message when switching pages
-        ui.message(_("Page {num}").format(num=page_num + 1))
-        self.update_view()
-
-    def on_prev(self, event):
-        if self.current_page > self.start_page: self.load_page(self.current_page - 1)
-
-    def on_next(self, event):
-        if self.current_page < self.end_page: self.load_page(self.current_page + 1)
-
-    def on_page_select(self, event):
-        self.load_page(self.start_page + self.cmb_pages.GetSelection())
-
-    def on_view(self, event):
-        full_html = []
-        for i in range(self.start_page, self.end_page + 1):
-            if i in self.page_cache:
-                page_text = self.page_cache[i]
-                page_content = markdown_to_html(page_text, full_page=False)
-                # Translators: Heading for each page in the formatted content view.
-                page_label = _("Page {num}").format(num=i+1)
-                full_html.append(f"<h2>{page_label}</h2>")
-                full_html.append(page_content)
-                full_html.append("<hr>")
-        
-        if not full_html:
-            text = self.txt_content.GetValue()
-            if not text: return
-            full_html.append(markdown_to_html(text, full_page=False))
-        
-        combined_html = "".join(full_html)
-        try:
-            # Translators: Title of the formatted result window
-            ui.browseableMessage(combined_html, _("Formatted Content"), isHtml=True)
-        except Exception as e:
-            show_error_dialog(str(e))
-
-    def on_gemini_scan(self, event):
-        if not config.conf["VisionAssistant"]["api_key"]:
-            wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
-            return
-        menu = wx.Menu()
-        # Translators: Menu option for current page
-        item_curr = menu.Append(wx.ID_ANY, _("Current Page"))
-        # Translators: Menu option for all pages
-        item_all = menu.Append(wx.ID_ANY, _("All Pages (In Range)"))
-        self.Bind(wx.EVT_MENU, self.do_rescan_current, item_curr)
-        self.Bind(wx.EVT_MENU, self.do_rescan_all, item_all)
-        self.PopupMenu(menu)
-        menu.Destroy()
-
-    def do_rescan_current(self, event):
-        if self.current_page in self.page_cache: del self.page_cache[self.current_page]
-        self.update_view()
-        # Translators: Message during manual scan
-        ui.message(_("Scanning with Gemini..."))
-        threading.Thread(target=self.gemini_scan_single_thread, args=(self.current_page,), daemon=True).start()
-
-    def gemini_scan_single_thread(self, page_num):
-        try:
-            file_path, page_idx = self.v_doc.get_page_info(page_num)
-            doc = fitz.open(file_path)
-            page = doc.load_page(page_idx)
-            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
-            text = GeminiHandler.ocr_page(pix.tobytes("jpg"))
-            doc.close()
-            if self.do_translate: text = GeminiHandler.translate(text, self.target_lang)
-            self.page_cache[page_num] = text
-            if self.current_page == page_num: 
-                wx.CallAfter(self.update_view)
-                # Translators: Message when scan is complete
-                wx.CallAfter(ui.message, _("Scan complete"))
-        except: pass
-
-    def do_rescan_all(self, event):
-        threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
-
-    def gemini_scan_batch_thread(self):
-        # Translators: Message when batch scan starts
-        msg = _("Batch Processing Started")
-        if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
-        wx.CallAfter(ui.message, msg)
-        
-        for i in range(self.start_page, self.end_page + 1):
-            if i in self.page_cache: del self.page_cache[i]
-        wx.CallAfter(self.update_view)
-        
-        upload_path = self.v_doc.create_merged_pdf(self.start_page, self.end_page)
-        if not upload_path:
-            # Translators: Error message if PDF creation fails
-            wx.CallAfter(self.lbl_status.SetLabel, _("Error creating temporary PDF."))
-            return
-
-        try:
-            count = (self.end_page - self.start_page) + 1
-            results = GeminiHandler.upload_and_process_batch(upload_path, "application/pdf", count)
-            
-            if not results or (len(results) == 1 and str(results[0]).startswith("ERROR:")):
-                err_msg = results[0][6:] if results else _("Unknown error")
-                # Translators: Message reported when batch scan fails
-                error_text = _("Scan failed: {err}").format(err=err_msg)
-                for i in range(self.start_page, self.end_page + 1):
-                    self.page_cache[i] = error_text
-                
-                wx.CallAfter(self.update_view)
-                wx.CallAfter(ui.message, error_text)
-                return
-
-            for i, text_part in enumerate(results):
-                if i >= count: break
-                idx = self.start_page + i
-                clean = text_part.strip()
-                if self.do_translate:
-                    clean = GeminiHandler.translate(clean, self.target_lang)
-                self.page_cache[idx] = clean
-
-            wx.CallAfter(self.update_view)
-            # Translators: Message when batch scan is complete
-            final_msg = _("Batch Scan Complete")
-            if _vision_assistant_instance: 
-                # Translators: Initial status when the add-on is doing nothing
-                _vision_assistant_instance.current_status = _("Idle")
-            wx.CallAfter(ui.message, final_msg)
-        finally:
-            if upload_path and os.path.exists(upload_path):
-                try: os.remove(upload_path)
-                except: pass
-
-    def on_tts(self, event):
-        if not config.conf["VisionAssistant"]["api_key"]:
-            wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
-            return
-        menu = wx.Menu()
-        # Translators: Menu option for TTS current page
-        item_curr = menu.Append(wx.ID_ANY, _("Generate for Current Page"))
-        # Translators: Menu option for TTS all pages
-        item_all = menu.Append(wx.ID_ANY, _("Generate for All Pages (In Range)"))
-        self.Bind(wx.EVT_MENU, self.do_tts_current, item_curr)
-        self.Bind(wx.EVT_MENU, self.do_tts_all, item_all)
-        self.PopupMenu(menu)
-        menu.Destroy()
-
-    def do_tts_current(self, event):
-        text = self.txt_content.GetValue().strip()
-        if not text: 
-            # Translators: Error message when text field is empty
-            wx.MessageBox(_("No text to read."), "Error")
-            return
-        self._save_tts(text)
-
-    def do_tts_all(self, event):
-        threading.Thread(target=self.tts_batch_thread, daemon=True).start()
-
-    def tts_batch_thread(self):
-        full_text = []
-        # Translators: Message while gathering text
-        wx.CallAfter(ui.message, _("Gathering text for audio..."))
-        for i in range(self.start_page, self.end_page + 1):
-            while i not in self.page_cache: time.sleep(0.1)
-            full_text.append(self.page_cache[i])
-        final_text = "\n".join(full_text).strip()
-        if not final_text: return
-        wx.CallAfter(self._save_tts, final_text)
-
-    def _save_tts(self, text):
-        # Translators: File dialog title for saving audio
-        path = get_file_path(_("Save Audio"), "MP3 Files (*.mp3)|*.mp3|WAV Files (*.wav)|*.wav", mode="save")
-        if path:
-            voice = config.conf["VisionAssistant"]["tts_voice"]
-            threading.Thread(target=self.tts_worker, args=(text, voice, path), daemon=True).start()
-
-    def tts_worker(self, text, voice, path):
-        # Translators: Message while generating audio
-        msg = _("Generating Audio...")
-        if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
-        wx.CallAfter(ui.message, msg)
-        try:
-            audio_b64 = GeminiHandler.generate_speech(text, voice)
-            if not audio_b64 or len(audio_b64) < 100:
-                 wx.CallAfter(wx.MessageBox, f"TTS Error: {audio_b64}", "Error", wx.ICON_ERROR)
-                 return
-            missing_padding = len(audio_b64) % 4
-            if missing_padding: audio_b64 += '=' * (4 - missing_padding)
-            pcm_data = base64.b64decode(audio_b64)
-
-            if path.lower().endswith(".mp3"):
-                import subprocess
-                lame_path = os.path.join(os.path.dirname(__file__), "lib", "lame.exe")
-                if not os.path.exists(lame_path):
-                    wx.CallAfter(wx.MessageBox, _("lame.exe not found in lib folder."), "Error", wx.ICON_ERROR)
-                    return
-                
-                process = subprocess.Popen(
-                    [lame_path, "-r", "-s", "24", "-m", "m", "-b", "128", "--bitwidth", "16", "--resample", "24", "-q", "0", "-", path],
-                    stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                    creationflags=getattr(subprocess, 'CREATE_NO_WINDOW', 0)
-                )
-
-                process.communicate(input=pcm_data)
-            else:
-                with wave.open(path, "wb") as wf:
-                    wf.setnchannels(1)
-                    wf.setsampwidth(2)
-                    wf.setframerate(24000)
-                    wf.writeframes(pcm_data)
-
-            # Translators: Spoken message when audio is saved
-            res_msg = _("Audio Saved")
-            if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
-            wx.CallAfter(ui.message, res_msg)
-            wx.CallAfter(wx.MessageBox, _("Audio file generated and saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
-        except Exception as e:
-            if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
-            wx.CallAfter(wx.MessageBox, f"TTS Error: {e}", "Error", wx.ICON_ERROR)
-
-    def on_ask(self, event):
-        if not config.conf["VisionAssistant"]["api_key"]:
-            wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
-            return
-        if ChatDialog.instance:
-            ChatDialog.instance.Raise()
-            ChatDialog.instance.SetFocus()
-            return
-        file_path, _ = self.v_doc.get_page_info(self.current_page)
-        if file_path: 
-            dlg = ChatDialog(self, file_path)
-            dlg.Show()
-
-    def on_save_all(self, event):
-        # Translators: File dialog filter for saving text/html
-        wildcard = "Text File (*.txt)|*.txt|HTML File (*.html)|*.html"
-        # Translators: File dialog title for saving
-        path = get_file_path(_("Save"), wildcard, mode="save")
-        if path:
-            is_html = path.lower().endswith('.html')
-            self.btn_save.Disable()
-            threading.Thread(target=self.save_thread, args=(path, is_html), daemon=True).start()
-
-    def save_thread(self, path, is_html):
-        full_content = []
-        try:
-            for i in range(self.start_page, self.end_page + 1):
-                # Translators: Message showing save progress
-                wx.CallAfter(self.lbl_status.SetLabel, _("Saving Page {num}...").format(num=i+1))
-                while i not in self.page_cache: time.sleep(0.1)
-                txt = self.page_cache[i]
-                if is_html:
-                    h = markdown_to_html(txt)
-                    if "<body>" in h: h = h.split("<body>")[1].split("</body>")[0]
-                    full_content.append(f"<hr><h2>Page {i+1}</h2>{h}")
-                else:
-                    full_content.append(f"--- Page {i+1} ---\n{txt}\n")
-            with open(path, "w", encoding="utf-8") as f:
-                if is_html: f.write(f"<html><body>{''.join(full_content)}</body></html>")
-                else: f.write("\n".join(full_content))
-            # Translators: Status label when save is complete
-            wx.CallAfter(self.lbl_status.SetLabel, _("Saved"))
-            # Translators: Message box content for successful save
-            wx.CallAfter(wx.MessageBox, _("File saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
-        except Exception as e:
-            wx.CallAfter(wx.MessageBox, f"Save Error: {e}", "Error", wx.ICON_ERROR)
-        finally: wx.CallAfter(self.btn_save.Enable)
+from .updater import UpdateDialog, UpdateManager
+
+from .dialogs import (
+    ChatDialog,
+    DocumentViewerDialog,
+    RangeDialog,
+    SettingsPanel,
+    VisionQADialog,
+    set_vision_assistant_instance,
+)
 
 class GlobalPlugin(globalPluginHandler.GlobalPlugin):
     scriptCategory = ADDON_NAME
@@ -2379,8 +134,7 @@ class GlobalPlugin(globalPluginHandler.GlobalPlugin):
 
     def __init__(self):
         super(GlobalPlugin, self).__init__()
-        global _vision_assistant_instance
-        _vision_assistant_instance = self
+        set_vision_assistant_instance(self)
         try:
             migrate_prompt_config_if_needed()
         except Exception as e:
@@ -2503,7 +257,6 @@ def script_activateLayer(self, gesture):
         tones.beep(500, 100)
 
     def terminate(self):
-        global _vision_assistant_instance
         try:
             if hasattr(self, 'va_submenu_item') and self.va_submenu_item:
                 self.tools_menu.Remove(self.va_submenu_item.GetId())
@@ -2529,7 +282,7 @@ def terminate(self):
         
         self.translation_cache = {}
         self._last_source_text = None
-        _vision_assistant_instance = None
+        set_vision_assistant_instance(None)
         gc.collect()
 
     def report_status(self, msg):
diff --git a/addon/globalPlugins/visionAssistant/constants.py b/addon/globalPlugins/visionAssistant/constants.py
new file mode 100644
index 0000000..bcd8ac2
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/constants.py
@@ -0,0 +1,393 @@
+# -*- coding: utf-8 -*-
+
+import addonHandler
+import config
+
+addonHandler.initTranslation()
+
+ADDON_NAME = addonHandler.getCodeAddon().manifest["summary"]
+GITHUB_REPO = "mahmoodhozhabri/VisionAssistantPro"
+
+CHROME_OCR_KEYS = [
+    "AIzaSyA2KlwBX3mkFo30om9LUFYQhpqLoa_BNhE",
+    "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
+]
+
+MODELS = [
+    # --- 1. Recommended (Auto-Updating) ---
+    # Translators: AI Model info. [Auto] = Automatic updates. (Latest) = Newest version.
+    (_("[Auto]") + " Gemini Flash " + _("(Latest)"), "gemini-flash-latest"),
+    (_("[Auto]") + " Gemini Flash Lite " + _("(Latest)"), "gemini-flash-lite-latest"),
+
+    # --- 2. Current Standard (Free & Fast) ---
+    # Translators: AI Model info. [Free] = Generous usage limits. (Preview) = Experimental or early-access version.
+    (_("[Free]") + " Gemini 3.0 Flash " + _("(Preview)"), "gemini-3-flash-preview"),
+    (_("[Free]") + " Gemini 2.5 Flash", "gemini-2.5-flash"),
+    (_("[Free]") + " Gemini 2.5 Flash Lite", "gemini-2.5-flash-lite"),
+
+    # --- 3. High Intelligence (Paid/Pro/Preview) ---
+    # Translators: AI Model info. [Pro] = High intelligence/Paid tier. (Preview) = Experimental version.
+    (_("[Pro]") + " Gemini 3.0 Pro " + _("(Preview)"), "gemini-3-pro-preview"),
+    (_("[Pro]") + " Gemini 2.5 Pro", "gemini-2.5-pro"),
+]
+
+GEMINI_VOICES = [
+    # Translators: Adjective describing a bright AI voice style.
+    ("Zephyr", _("Bright")),
+    # Translators: Adjective describing an upbeat AI voice style.
+    ("Puck", _("Upbeat")),
+    # Translators: Adjective describing an informative AI voice style.
+    ("Charon", _("Informative")),
+    # Translators: Adjective describing a firm AI voice style.
+    ("Kore", _("Firm")),
+    # Translators: Adjective describing an excitable AI voice style.
+    ("Fenrir", _("Excitable")),
+    # Translators: Adjective describing a youthful AI voice style.
+    ("Leda", _("Youthful")),
+    # Translators: Adjective describing a firm AI voice style.
+    ("Orus", _("Firm")),
+    # Translators: Adjective describing a breezy AI voice style.
+    ("Aoede", _("Breezy")),
+    # Translators: Adjective describing an easy-going AI voice style.
+    ("Callirrhoe", _("Easy-going")),
+    # Translators: Adjective describing a bright AI voice style.
+    ("Autonoe", _("Bright")),
+    # Translators: Adjective describing a breathy AI voice style.
+    ("Enceladus", _("Breathy")),
+    # Translators: Adjective describing a clear AI voice style.
+    ("Iapetus", _("Clear")),
+    # Translators: Adjective describing an easy-going AI voice style.
+    ("Umbriel", _("Easy-going")),
+    # Translators: Adjective describing a smooth AI voice style.
+    ("Algieba", _("Smooth")),
+    # Translators: Adjective describing a smooth AI voice style.
+    ("Despina", _("Smooth")),
+    # Translators: Adjective describing a clear AI voice style.
+    ("Erinome", _("Clear")),
+    # Translators: Adjective describing a gravelly AI voice style.
+    ("Algenib", _("Gravelly")),
+    # Translators: Adjective describing an informative AI voice style.
+    ("Rasalgethi", _("Informative")),
+    # Translators: Adjective describing an upbeat AI voice style.
+    ("Laomedeia", _("Upbeat")),
+    # Translators: Adjective describing a soft AI voice style.
+    ("Achernar", _("Soft")),
+    # Translators: Adjective describing a firm AI voice style.
+    ("Alnilam", _("Firm")),
+    # Translators: Adjective describing an even AI voice style.
+    ("Schedar", _("Even")),
+    # Translators: Adjective describing a mature AI voice style.
+    ("Gacrux", _("Mature")),
+    # Translators: Adjective describing a forward AI voice style.
+    ("Pulcherrima", _("Forward")),
+    # Translators: Adjective describing a friendly AI voice style.
+    ("Achird", _("Friendly")),
+    # Translators: Adjective describing a casual AI voice style.
+    ("Zubenelgenubi", _("Casual")),
+    # Translators: Adjective describing a gentle AI voice style.
+    ("Vindemiatrix", _("Gentle")),
+    # Translators: Adjective describing a lively AI voice style.
+    ("Sadachbia", _("Lively")),
+    # Translators: Adjective describing a knowledgeable AI voice style.
+    ("Sadaltager", _("Knowledgeable")),
+    # Translators: Adjective describing a warm AI voice style.
+    ("Sulafat", _("Warm"))
+]
+
+BASE_LANGUAGES = [
+    ("Arabic", "ar"), ("Bulgarian", "bg"), ("Chinese", "zh"), ("Czech", "cs"), ("Danish", "da"),
+    ("Dutch", "nl"), ("English", "en"), ("Finnish", "fi"), ("French", "fr"),
+    ("German", "de"), ("Greek", "el"), ("Hebrew", "he"), ("Hindi", "hi"),
+    ("Hungarian", "hu"), ("Indonesian", "id"), ("Italian", "it"), ("Japanese", "ja"),
+    ("Korean", "ko"), ("Nepali", "ne"), ("Norwegian", "no"), ("Persian", "fa"), ("Polish", "pl"),
+    ("Portuguese", "pt"), ("Romanian", "ro"), ("Russian", "ru"), ("Spanish", "es"),
+    ("Swedish", "sv"), ("Thai", "th"), ("Turkish", "tr"), ("Ukrainian", "uk"),
+    ("Vietnamese", "vi")
+]
+SOURCE_LIST = [("Auto-detect", "auto")] + BASE_LANGUAGES
+SOURCE_NAMES = [x[0] for x in SOURCE_LIST]
+TARGET_LIST = BASE_LANGUAGES
+TARGET_NAMES = [x[0] for x in TARGET_LIST]
+TARGET_CODES = {x[0]: x[1] for x in BASE_LANGUAGES}
+
+OCR_ENGINES = [
+    # Translators: OCR Engine option (Fast but less formatted)
+    (_("Chrome (Fast)"), "chrome"),
+    # Translators: OCR Engine option (Slower but better formatting)
+    (_("Gemini (Formatted)"), "gemini")
+]
+
+confspec = {
+    "proxy_url": "string(default='')",
+    "api_key": "string(default='')",
+    "model_name": "string(default='gemini-flash-lite-latest')",
+    "target_language": "string(default='English')",
+    "source_language": "string(default='Auto-detect')",
+    "ai_response_language": "string(default='English')",
+    "smart_swap": "boolean(default=True)",
+    "captcha_mode": "string(default='navigator')",
+    "custom_prompts": "string(default='')",
+    "custom_prompts_v2": "string(default='')",
+    "default_refine_prompts": "string(default='')",
+    "check_update_startup": "boolean(default=False)",
+    "clean_markdown_chat": "boolean(default=True)",
+    "copy_to_clipboard": "boolean(default=False)",
+    "skip_chat_dialog": "boolean(default=False)",
+    "ocr_engine": "string(default='chrome')",
+    "tts_voice": "string(default='Puck')"
+}
+
+config.conf.spec["VisionAssistant"] = confspec
+
+PROMPT_TRANSLATE = """
+Task: Translate the text below to "{target_lang}".
+
+Configuration:
+- Target Language: "{target_lang}"
+- Swap Language: "{swap_target}"
+- Smart Swap: {smart_swap}
+
+Rules:
+1. DEFAULT: Translate the input strictly to "{target_lang}".
+2. MIXED CONTENT: If the text contains mixed languages (e.g., Arabic content with English UI terms like 'Reply', 'From', 'Forwarded'), translate EVERYTHING to "{target_lang}".
+3. EXCEPTION: If (and ONLY if) the input is already completely in "{target_lang}" AND "Smart Swap" is True, then translate to "{swap_target}".
+
+Constraints:
+- Output ONLY the translation.
+- Do NOT translate actual programming code (Python, C++, etc.) or URLs.
+- Translate ALL UI elements, menus, and interface labels.
+
+Input Text:
+{text_content}
+"""
+
+PROMPT_UI_LOCATOR = "Analyze UI (Size: {width}x{height}). Request: '{query}'. Output JSON: {{\"x\": int, \"y\": int, \"found\": bool}}."
+
+REFINE_PROMPT_KEYS = ("summarize", "fix_grammar", "fix_translate", "explain")
+
+LEGACY_REFINER_TOKENS = {
+    "summarize": "[summarize]",
+    "fix_grammar": "[fix_grammar]",
+    "fix_translate": "[fix_translate]",
+    "explain": "[explain]",
+}
+
+DEFAULT_SYSTEM_PROMPTS = (
+    {
+        "key": "summarize",
+        # Translators: Section header for text refinement prompts in Prompt Manager.
+        "section": _("Refine"),
+        # Translators: Label for the text summarization prompt.
+        "label": _("Summarize"),
+        "prompt": "Summarize the text below in {response_lang}.",
+    },
+    {
+        "key": "fix_grammar",
+        # Translators: Section header for text refinement prompts in Prompt Manager.
+        "section": _("Refine"),
+        # Translators: Label for the grammar correction prompt.
+        "label": _("Fix Grammar"),
+        "prompt": "Fix grammar in the text below. Output ONLY the fixed text.",
+    },
+    {
+        "key": "fix_translate",
+        # Translators: Section header for text refinement prompts in Prompt Manager.
+        "section": _("Refine"),
+        # Translators: Label for the grammar correction and translation prompt.
+        "label": _("Fix Grammar & Translate"),
+        "prompt": "Fix grammar and translate to {target_lang}.{swap_instruction} Output ONLY the result.",
+    },
+    {
+        "key": "explain",
+        # Translators: Section header for text refinement prompts in Prompt Manager.
+        "section": _("Refine"),
+        # Translators: Label for the text explanation prompt.
+        "label": _("Explain"),
+        "prompt": "Explain the text below in {response_lang}.",
+    },
+    {
+        "key": "translate_main",
+        # Translators: Section header for translation-related prompts in Prompt Manager.
+        "section": _("Translation"),
+        # Translators: Label for the smart translation prompt.
+        "label": _("Smart Translation"),
+        "prompt": PROMPT_TRANSLATE.strip(),
+    },
+    {
+        "key": "translate_quick",
+        # Translators: Section header for translation-related prompts in Prompt Manager.
+        "section": _("Translation"),
+        # Translators: Label for the quick translation prompt.
+        "label": _("Quick Translation"),
+        "prompt": "Translate to {target_lang}. Output ONLY translation.",
+    },
+    {
+        "key": "document_chat_system",
+        # Translators: Section header for document-related prompts in Prompt Manager.
+        "section": _("Document"),
+        # Translators: Label for the initial context prompt in document chat.
+        "label": _("Document Chat Context"),
+        "prompt": "STRICTLY Respond in {response_lang}. Use Markdown formatting. Analyze the attached content to answer.",
+    },
+    {
+        "key": "document_chat_ack",
+        # Translators: Section header for advanced/internal prompts in Prompt Manager.
+        "section": _("Advanced"),
+        # Translators: Label for the AI's acknowledgement reply in document chat.
+        "label": _("Document Chat Bootstrap Reply"),
+        "internal": True,
+        "prompt": "Context received. Ready for questions.",
+    },
+    {
+        "key": "vision_navigator_object",
+        # Translators: Section header for image analysis prompts in Prompt Manager.
+        "section": _("Vision"),
+        # Translators: Label for the prompt used to analyze the current navigator object.
+        "label": _("Navigator Object Analysis"),
+        "prompt": (
+            "Analyze this image. Describe the layout, visible text, and UI elements. "
+            "Use Markdown formatting (headings, lists) to organize the description. "
+            "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
+            "IMPORTANT: Start directly with the description content. Do not add introductory "
+            "sentences like 'Here is the analysis' or 'The image shows'."
+        ),
+    },
+    {
+        "key": "vision_fullscreen",
+        # Translators: Section header for image analysis prompts in Prompt Manager.
+        "section": _("Vision"),
+        # Translators: Label for the prompt used to analyze the entire screen.
+        "label": _("Full Screen Analysis"),
+        "prompt": (
+            "Analyze this image. Describe the layout, visible text, and UI elements. "
+            "Use Markdown formatting (headings, lists) to organize the description. "
+            "Language: {response_lang}. Ensure the response is strictly in {response_lang}. "
+            "IMPORTANT: Start directly with the description content. Do not add introductory "
+            "sentences like 'Here is the analysis' or 'The image shows'."
+        ),
+    },
+    {
+        "key": "vision_followup_context",
+        # Translators: Section header for advanced/internal prompts in Prompt Manager.
+        "section": _("Advanced"),
+        # Translators: Label for the follow-up context in image analysis chat.
+        "label": _("Vision Follow-up Context"),
+        "internal": True,
+        "prompt": "Image Context. Target Language: {response_lang}",
+    },
+    {
+        "key": "vision_followup_suffix",
+        # Translators: Section header for advanced/internal prompts in Prompt Manager.
+        "section": _("Advanced"),
+        # Translators: Label for the rule enforced during image analysis follow-up questions.
+        "label": _("Vision Follow-up Answer Rule"),
+        "internal": True,
+        "prompt": "Answer strictly in {response_lang}",
+    },
+    {
+        "key": "video_analysis",
+        # Translators: Section header for video analysis prompts in Prompt Manager.
+        "section": _("Video"),
+        # Translators: Label for the video content analysis prompt.
+        "label": _("Video Analysis"),
+        "prompt": (
+            "Analyze this video. Provide a detailed description of the visual content and a "
+            "summary of the audio. IMPORTANT: Write the entire response STRICTLY in "
+            "{response_lang} language."
+        ),
+    },
+    {
+        "key": "audio_transcription",
+        # Translators: Section header for audio-related prompts in Prompt Manager.
+        "section": _("Audio"),
+        # Translators: Label for the audio file transcription prompt.
+        "label": _("Audio Transcription"),
+        "prompt": "Transcribe this audio in {response_lang}.",
+    },
+    {
+        "key": "dictation_transcribe",
+        # Translators: Section header for audio-related prompts in Prompt Manager.
+        "section": _("Audio"),
+        # Translators: Label for the smart voice dictation prompt.
+        "label": _("Smart Dictation"),
+        "prompt": (
+            "Transcribe speech. Use native script. Fix stutters. If there is no speech, silence, "
+            "or background noise only, write exactly: [[[NOSPEECH]]]"
+        ),
+    },
+    {
+        "key": "ocr_image_extract",
+        # Translators: Section header for OCR-related prompts in Prompt Manager.
+        "section": _("OCR"),
+        # Translators: Label for the OCR prompt used for image text extraction.
+        "label": _("OCR Image Extraction"),
+        "prompt": (
+            "Extract all visible text from this image. Strictly preserve original formatting "
+            "(headings, lists, tables) using Markdown. Do not output any system messages or "
+            "code block backticks (```). Output ONLY the raw content."
+        ),
+    },
+    {
+        "key": "ocr_document_extract",
+        # Translators: Section header for OCR-related prompts in Prompt Manager.
+        "section": _("OCR"),
+        # Translators: Label for the OCR prompt used for document text extraction.
+        "label": _("OCR Document Extraction"),
+        "prompt": (
+            "Extract all visible text from this document. Strictly preserve original formatting "
+            "(headings, lists, tables) using Markdown. You MUST insert the exact delimiter "
+            "'[[[PAGE_SEP]]]' immediately after the content of every single page. Do not output "
+            "any system messages or code block backticks (```). Output ONLY the raw content."
+        ),
+    },
+    {
+        "key": "ocr_document_translate",
+        # Translators: Section header for document-related prompts in Prompt Manager.
+        "section": _("Document"),
+        # Translators: Label for the combined OCR and translation prompt for documents.
+        "label": _("Document OCR + Translate"),
+        "prompt": (
+            "Extract all text from this document. Preserve formatting (Markdown). Then translate "
+            "the content to {target_lang}. Output ONLY the translated content. Do not add "
+            "explanations."
+        ),
+    },
+    {
+        "key": "captcha_solver_base",
+        # Translators: Section header for CAPTCHA-related prompts in Prompt Manager.
+        "section": _("CAPTCHA"),
+        # Translators: Label for the CAPTCHA solving prompt.
+        "label": _("CAPTCHA Solver"),
+        "internal": True,
+        "prompt": (
+            "Blind user. Return CAPTCHA code only. If NO CAPTCHA is detected in the image, "
+            "strictly return: [[[NO_CAPTCHA]]].{captcha_extra}"
+        ),
+    },
+    {
+        "key": "refine_files_only",
+        # Translators: Section header for advanced/internal prompts in Prompt Manager.
+        "section": _("Advanced"),
+        # Translators: Label for the fallback prompt when only files are provided in Refine.
+        "label": _("Refine Files-Only Fallback"),
+        "internal": True,
+        "prompt": "Analyze these files.",
+    },
+)
+
+PROMPT_VARIABLES_GUIDE = (
+    # Translators: Description and input type for the [selection] variable in the Variables Guide.
+    ("[selection]", _("Currently selected text"), _("Text")),
+    # Translators: Description for the [clipboard] variable in the Variables Guide.
+    ("[clipboard]", _("Clipboard content"), _("Text")),
+    # Translators: Description and input type for the [screen_obj] variable in the Variables Guide.
+    ("[screen_obj]", _("Screenshot of the navigator object"), _("Image")),
+    # Translators: Description for the [screen_full] variable in the Variables Guide.
+    ("[screen_full]", _("Screenshot of the entire screen"), _("Image")),
+    # Translators: Description and input type for the [file_ocr] variable in the Variables Guide.
+    ("[file_ocr]", _("Select image/PDF/TIFF for text extraction"), _("Image, PDF, TIFF")),
+    # Translators: Description and input type for the [file_read] variable in the Variables Guide.
+    ("[file_read]", _("Select document for reading"), _("TXT, Code, PDF")),
+    # Translators: Description and input type for the [file_audio] variable in the Variables Guide.
+    ("[file_audio]", _("Select audio file for analysis"), _("MP3, WAV, OGG")),
+)
diff --git a/addon/globalPlugins/visionAssistant/dialogs.py b/addon/globalPlugins/visionAssistant/dialogs.py
new file mode 100644
index 0000000..8d2e6fd
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/dialogs.py
@@ -0,0 +1,1014 @@
+# -*- coding: utf-8 -*-
+
+import os
+import json
+import io
+import tempfile
+import threading
+import time
+import gc
+import wave
+import logging
+import base64
+from concurrent.futures import ThreadPoolExecutor
+
+import wx
+
+import addonHandler
+import config
+import gui
+import ui
+import api
+import textInfos
+import tones
+import scriptHandler
+
+from .prompt_manager_dialog import PromptManagerDialog
+from .constants import (
+    ADDON_NAME,
+    GEMINI_VOICES,
+    MODELS,
+    OCR_ENGINES,
+    PROMPT_VARIABLES_GUIDE,
+    SOURCE_NAMES,
+    TARGET_NAMES,
+)
+from .markdown_utils import clean_markdown, markdown_to_html
+from .prompt_helpers import (
+    get_configured_default_prompts,
+    load_configured_custom_prompts,
+    serialize_custom_prompts_v2,
+    serialize_default_prompt_overrides,
+)
+from .services import (
+    ChromeOCREngine,
+    GeminiHandler,
+    GoogleTranslator,
+    SmartProgrammersOCREngine,
+    get_file_path,
+    get_mime_type,
+    show_error_dialog,
+)
+
+try:
+    import fitz
+except ImportError:
+    fitz = None
+
+log = logging.getLogger(__name__)
+addonHandler.initTranslation()
+_vision_assistant_instance = None
+
+
+def set_vision_assistant_instance(instance):
+    global _vision_assistant_instance
+    _vision_assistant_instance = instance
+
+class VisionQADialog(wx.Dialog):
+    def __init__(self, parent, title, initial_text, context_data, callback_fn, extra_info=None, raw_content=None, status_callback=None, announce_on_open=True, allow_questions=True):
+        super(VisionQADialog, self).__init__(parent, title=title, size=(550, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
+        self.context_data = context_data 
+        self.callback_fn = callback_fn
+        self.extra_info = extra_info
+        self.chat_history = [] 
+        self.raw_content = raw_content
+        self.status_callback = status_callback
+        self.announce_on_open = announce_on_open
+        self.allow_questions = allow_questions
+        
+        mainSizer = wx.BoxSizer(wx.VERTICAL)
+        # Translators: Label for the AI response text area in a chat dialog
+        lbl_text = _("AI Response:")
+        lbl = wx.StaticText(self, label=lbl_text)
+        mainSizer.Add(lbl, 0, wx.ALL, 5)
+        self.outputArea = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY)
+        mainSizer.Add(self.outputArea, 1, wx.EXPAND | wx.ALL, 5)
+        
+        self.should_clean = config.conf["VisionAssistant"]["clean_markdown_chat"]
+        display_text = clean_markdown(initial_text) if self.should_clean else initial_text
+        if display_text:
+            # Translators: Format for displaying AI message in a chat dialog
+            init_msg = _("AI: {text}\n").format(text=display_text)
+            self.outputArea.AppendText(init_msg)
+            if config.conf["VisionAssistant"]["copy_to_clipboard"]:
+                api.copyToClip(raw_content if raw_content else display_text)
+        
+        if not (extra_info and extra_info.get('skip_init_history')):
+             self.chat_history.append({"role": "model", "parts": [{"text": initial_text}]})
+
+        self.inputArea = None
+        if allow_questions:
+            # Translators: Label for user input field in a chat dialog
+            ask_text = _("Ask:")
+            inputLbl = wx.StaticText(self, label=ask_text)
+            mainSizer.Add(inputLbl, 0, wx.ALL, 5)
+            self.inputArea = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
+            mainSizer.Add(self.inputArea, 0, wx.EXPAND | wx.ALL, 5)
+        
+        btnSizer = wx.BoxSizer(wx.HORIZONTAL)
+        self.askBtn = None
+        if allow_questions:
+            # Translators: Button to send message in a chat dialog
+            self.askBtn = wx.Button(self, label=_("Send"))
+        # Translators: Button to view the content in a formatted HTML window
+        self.viewBtn = wx.Button(self, label=_("View Formatted"))
+        self.viewBtn.Bind(wx.EVT_BUTTON, self.onView)
+        # Translators: Button to save only the result content without chat history
+        self.saveContentBtn = wx.Button(self, label=_("Save Content"))
+        self.saveContentBtn.Bind(wx.EVT_BUTTON, self.onSaveContent)
+        # Translators: Button to save chat in a chat dialog
+        self.saveBtn = wx.Button(self, label=_("Save Chat"))
+        # Translators: Button to close chat dialog
+        self.closeBtn = wx.Button(self, wx.ID_CANCEL, label=_("Close"))
+        
+        self.saveBtn.Enable(bool(initial_text.strip()))
+        self.viewBtn.Enable(bool(self.raw_content))
+        self.saveContentBtn.Enable(bool(self.raw_content))
+
+        if self.askBtn:
+            btnSizer.Add(self.askBtn, 0, wx.ALL, 5)
+        btnSizer.Add(self.viewBtn, 0, wx.ALL, 5)
+        btnSizer.Add(self.saveContentBtn, 0, wx.ALL, 5)
+        btnSizer.Add(self.saveBtn, 0, wx.ALL, 5)
+        btnSizer.Add(self.closeBtn, 0, wx.ALL, 5)
+        mainSizer.Add(btnSizer, 0, wx.ALIGN_RIGHT)
+        
+        self.SetSizer(mainSizer)
+        if self.inputArea:
+            self.inputArea.SetFocus()
+        else:
+            self.outputArea.SetFocus()
+        if self.askBtn:
+            self.askBtn.Bind(wx.EVT_BUTTON, self.onAsk)
+        self.saveBtn.Bind(wx.EVT_BUTTON, self.onSave)
+        if self.inputArea:
+            self.inputArea.Bind(wx.EVT_TEXT_ENTER, self.onAsk)
+        if display_text and self.announce_on_open:
+            wx.CallLater(300, ui.message, display_text)
+
+    def onAsk(self, event):
+        if not self.inputArea:
+            return
+        question = self.inputArea.Value
+        if not question.strip(): return
+        # Translators: Format for displaying User message in a chat dialog
+        user_msg = _("\nYou: {text}\n").format(text=question)
+        self.outputArea.AppendText(user_msg)
+        self.inputArea.Clear()
+        # Translators: Message shown while processing in a chat dialog
+        msg = _("Thinking...")
+        ui.message(msg)
+        threading.Thread(target=self.process_question, args=(question,), daemon=True).start()
+
+    def process_question(self, question):
+        result_tuple = self.callback_fn(self.context_data, question, self.chat_history, self.extra_info)
+        response_text, _ = result_tuple
+        if response_text:
+            if not (self.extra_info and self.extra_info.get('file_context')):
+                 self.chat_history.append({"role": "user", "parts": [{"text": question}]})
+                 self.chat_history.append({"role": "model", "parts": [{"text": response_text}]})
+            final_text = clean_markdown(response_text) if self.should_clean else response_text
+            wx.CallAfter(self.update_response, final_text, response_text)
+
+    def update_response(self, display_text, raw_text=None):
+        if raw_text:
+            self.raw_content = raw_text
+            self.viewBtn.Enable(True)
+            self.saveContentBtn.Enable(True)
+        # Translators: Format for displaying AI message in a chat dialog
+        ai_msg = _("AI: {text}\n").format(text=display_text)
+        self.outputArea.AppendText(ai_msg)
+        self.saveBtn.Enable(True)
+        if config.conf["VisionAssistant"]["copy_to_clipboard"]:
+            api.copyToClip(raw_text if raw_text else display_text)
+        self.outputArea.ShowPosition(self.outputArea.GetLastPosition())
+        ui.message(display_text)
+
+    def report_save(self, msg):
+        if self.status_callback: self.status_callback(msg)
+        else: ui.message(msg)
+
+    def onView(self, event):
+        full_html = ""
+        # Translators: Format for displaying User message in a chat dialog
+        user_label = _("\nYou: {text}\n").format(text="").strip()
+        # Translators: Format for displaying AI message in a chat dialog
+        ai_label = _("AI: {text}\n").format(text="").strip()
+        
+        if self.chat_history:
+            for item in self.chat_history:
+                role = item.get("role", "")
+                text = item.get("parts", [{}])[0].get("text", "")
+                if role == "user":
+                    safe_text = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+                    full_html += f"<h2>{user_label}</h2><p>{safe_text}</p>"
+                elif role == "model":
+                    formatted_text = markdown_to_html(text, full_page=False)
+                    full_html += f"<h2>{ai_label}</h2>{formatted_text}<hr>"
+        
+        if not full_html and self.raw_content:
+             formatted_text = markdown_to_html(self.raw_content, full_page=False)
+             full_html += f"<h2>{ai_label}</h2>{formatted_text}"
+
+        if not full_html: return
+        try:
+            # Translators: Title of the formatted result window
+            ui.browseableMessage(full_html, _("Formatted Conversation"), isHtml=True)
+        except Exception as e:
+            # Translators: Error message if viewing fails
+            msg = _("Error displaying content: {error}").format(error=e)
+            show_error_dialog(msg)
+
+    def onSave(self, event):
+        # Translators: Save dialog title
+        path = get_file_path(_("Save Chat Log"), "Text files (*.txt)|*.txt", mode="save")
+        if path:
+            try:
+                with open(path, "w", encoding="utf-8") as f: f.write(self.outputArea.GetValue())
+                # Translators: Message shown on successful save of a file.
+                self.report_save(_("Saved."))
+            except Exception as e:
+                # Translators: Message in the error dialog when saving fails.
+                msg = _("Save failed: {error}").format(error=e)
+                show_error_dialog(msg)
+
+    def onSaveContent(self, event):
+        # Translators: Save dialog title
+        path = get_file_path(_("Save Result"), "HTML files (*.html)|*.html", mode="save")
+        if path:
+            try:
+                full_html = markdown_to_html(self.raw_content, full_page=True)
+                with open(path, "w", encoding="utf-8") as f: f.write(full_html)
+                # Translators: Message on successful save
+                self.report_save(_("Saved."))
+            except Exception as e:
+                # Translators: Message in the error dialog when saving fails.
+                msg = _("Save failed: {error}").format(error=e)
+                show_error_dialog(msg)
+
+class SettingsPanel(gui.settingsDialogs.SettingsPanel):
+    title = ADDON_NAME
+    def makeSettings(self, settingsSizer):
+        # --- Connection Group ---
+        # Translators: Title of the settings group for connection and updates
+        groupLabel = _("Connection")
+        self.connectionBox = wx.StaticBox(self, label=groupLabel)
+        connectionSizer = wx.StaticBoxSizer(self.connectionBox, wx.VERTICAL)
+        cHelper = gui.guiHelper.BoxSizerHelper(self.connectionBox, sizer=connectionSizer)
+
+        # Translators: Label for API Key input
+        apiLabel = wx.StaticText(self.connectionBox, label=_("Gemini API Key (Separate multiple keys with comma or newline):"))
+        cHelper.addItem(apiLabel)
+        
+        api_value = config.conf["VisionAssistant"]["api_key"]
+        
+        self.apiKeyCtrl_hidden = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_PASSWORD, size=(-1, -1))
+        
+        self.apiKeyCtrl_visible = wx.TextCtrl(self.connectionBox, value=api_value, style=wx.TE_MULTILINE | wx.TE_DONTWRAP, size=(-1, 60))
+        self.apiKeyCtrl_visible.Hide()
+        
+        cHelper.addItem(self.apiKeyCtrl_hidden)
+        cHelper.addItem(self.apiKeyCtrl_visible)
+        
+        # Translators: Checkbox to toggle API Key visibility
+        self.showApiCheck = wx.CheckBox(self.connectionBox, label=_("Show API Key"))
+        self.showApiCheck.Bind(wx.EVT_CHECKBOX, self.onToggleApiVisibility)
+        cHelper.addItem(self.showApiCheck)
+        
+        model_display_names = [opt[0] for opt in MODELS]
+        # Translators: Label for Model selection
+        self.model = cHelper.addLabeledControl(_("AI Model:"), wx.Choice, choices=model_display_names)
+        current_id = config.conf["VisionAssistant"]["model_name"]
+        try:
+            index = next(i for i, v in enumerate(MODELS) if v[1] == current_id)
+            self.model.SetSelection(index)
+        except StopIteration: self.model.SetSelection(0)
+
+        # Translators: Label for Proxy URL input
+        self.proxyUrl = cHelper.addLabeledControl(_("Proxy URL:"), wx.TextCtrl)
+        self.proxyUrl.Value = config.conf["VisionAssistant"]["proxy_url"]
+
+        # Translators: Checkbox to enable/disable automatic update checks on NVDA startup
+        self.checkUpdateStartup = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Check for updates on startup")))
+        self.checkUpdateStartup.Value = config.conf["VisionAssistant"]["check_update_startup"]
+        # Translators: Checkbox to toggle markdown cleaning in chat windows
+        self.cleanMarkdown = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Clean Markdown in Chat")))
+        self.cleanMarkdown.Value = config.conf["VisionAssistant"]["clean_markdown_chat"]
+        # Translators: Checkbox to enable copying AI responses to clipboard
+        self.copyToClipboard = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Copy AI responses to clipboard")))
+        self.copyToClipboard.Value = config.conf["VisionAssistant"]["copy_to_clipboard"]
+        # Translators: Checkbox to skip chat window and only speak AI responses
+        self.skipChatDialog = cHelper.addItem(wx.CheckBox(self.connectionBox, label=_("Direct Output (No Chat Window)")))
+        self.skipChatDialog.Value = config.conf["VisionAssistant"]["skip_chat_dialog"]
+        settingsSizer.Add(connectionSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+        # --- Translation Languages Group ---
+        # Translators: Title of the settings group for translation languages configuration
+        groupLabel = _("Translation Languages")
+        langBox = wx.StaticBox(self, label=groupLabel)
+        langSizer = wx.StaticBoxSizer(langBox, wx.VERTICAL)
+        lHelper = gui.guiHelper.BoxSizerHelper(langBox, sizer=langSizer)
+
+        # Translators: Label for Source Language selection
+        self.sourceLang = lHelper.addLabeledControl(_("Source:"), wx.Choice, choices=SOURCE_NAMES)
+        try: self.sourceLang.SetSelection(SOURCE_NAMES.index(config.conf["VisionAssistant"]["source_language"]))
+        except: self.sourceLang.SetSelection(0)
+        
+        # Translators: Label for Target Language selection
+        self.targetLang = lHelper.addLabeledControl(_("Target:"), wx.Choice, choices=TARGET_NAMES)
+        try: self.targetLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["target_language"]))
+        except: self.targetLang.SetSelection(0)
+        
+        # Translators: Label for AI Response Language selection
+        self.aiResponseLang = lHelper.addLabeledControl(_("AI Response:"), wx.Choice, choices=TARGET_NAMES)
+        try: self.aiResponseLang.SetSelection(TARGET_NAMES.index(config.conf["VisionAssistant"]["ai_response_language"]))
+        except: self.aiResponseLang.SetSelection(0)
+
+        # Translators: Checkbox for Smart Swap feature
+        self.smartSwap = lHelper.addItem(wx.CheckBox(langBox, label=_("Smart Swap")))
+        self.smartSwap.Value = config.conf["VisionAssistant"]["smart_swap"]
+        settingsSizer.Add(langSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+        # --- Document Reader Settings ---
+        # Translators: Title of settings group for Document Reader features
+        groupLabel = _("Document Reader")
+        docBox = wx.StaticBox(self, label=groupLabel)
+        docSizer = wx.StaticBoxSizer(docBox, wx.VERTICAL)
+        dHelper = gui.guiHelper.BoxSizerHelper(docBox, sizer=docSizer)
+
+        # Translators: Label for OCR Engine selection
+        self.ocr_sel = dHelper.addLabeledControl(_("OCR Engine:"), wx.Choice, choices=[x[0] for x in OCR_ENGINES])
+        curr_ocr = config.conf["VisionAssistant"]["ocr_engine"]
+        try:
+            o_idx = next(i for i, v in enumerate(OCR_ENGINES) if v[1] == curr_ocr)
+            self.ocr_sel.SetSelection(o_idx)
+        except: self.ocr_sel.SetSelection(0)
+
+        voice_choices = [f"{v[0]} - {v[1]}" for v in GEMINI_VOICES]
+        # Translators: Label for TTS Voice selection
+        self.voice_sel = dHelper.addLabeledControl(_("TTS Voice:"), wx.Choice, choices=voice_choices)
+        curr_voice = config.conf["VisionAssistant"]["tts_voice"]
+        try:
+            v_idx = next(i for i, v in enumerate(GEMINI_VOICES) if v[0] == curr_voice)
+            self.voice_sel.SetSelection(v_idx)
+        except: self.voice_sel.SetSelection(1)
+        settingsSizer.Add(docSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+        # --- CAPTCHA Group ---
+        # Translators: Title of the settings group for CAPTCHA options
+        groupLabel = _("CAPTCHA")
+        capBox = wx.StaticBox(self, label=groupLabel)
+        capSizer = wx.StaticBoxSizer(capBox, wx.VERTICAL)
+        capHelper = gui.guiHelper.BoxSizerHelper(capBox, sizer=capSizer)
+        # Translators: Label for CAPTCHA capture method selection
+        self.captchaMode = capHelper.addLabeledControl(_("Capture Method:"), wx.Choice, choices=[
+            # Translators: A choice for capture method. Captures only the specific object under the NVDA navigator cursor.
+            _("Navigator Object"),
+            # Translators: A choice for capture method. Captures the entire visible screen area.
+            _("Full Screen")
+        ])
+        self.captchaMode.SetSelection(0 if config.conf["VisionAssistant"]["captcha_mode"] == 'navigator' else 1)
+        settingsSizer.Add(capSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+        self.defaultPromptItems = get_configured_default_prompts()
+        self.customPromptItems = load_configured_custom_prompts()
+
+        # --- Prompt Manager Group ---
+        # Translators: Title of the settings group for prompt management
+        groupLabel = _("Prompts")
+        promptsBox = wx.StaticBox(self, label=groupLabel)
+        promptsSizer = wx.StaticBoxSizer(promptsBox, wx.VERTICAL)
+        pHelper = gui.guiHelper.BoxSizerHelper(promptsBox, sizer=promptsSizer)
+        # Translators: Description for the prompt manager button.
+        pHelper.addItem(wx.StaticText(promptsBox, label=_("Manage default and custom prompts.")))
+        # Translators: Button label to open prompt manager dialog.
+        self.managePromptsBtn = wx.Button(promptsBox, label=_("Manage Prompts..."))
+        self.managePromptsBtn.Bind(wx.EVT_BUTTON, self.onManagePrompts)
+        pHelper.addItem(self.managePromptsBtn)
+        self.promptsSummary = wx.StaticText(promptsBox)
+        pHelper.addItem(self.promptsSummary)
+        self._refreshPromptSummary()
+        settingsSizer.Add(promptsSizer, 0, wx.EXPAND | wx.ALL, 5)
+
+    def _refreshPromptSummary(self):
+        # Translators: Summary text for prompt counts in settings.
+        summary = _("Default prompts: {defaultCount}, Custom prompts: {customCount}").format(
+            defaultCount=len(self.defaultPromptItems),
+            customCount=len(self.customPromptItems),
+        )
+        self.promptsSummary.SetLabel(summary)
+
+    def onManagePrompts(self, event):
+        top = wx.GetTopLevelParent(self)
+        dlg = PromptManagerDialog(
+            self,
+            self.defaultPromptItems,
+            self.customPromptItems,
+            PROMPT_VARIABLES_GUIDE,
+        )
+        try:
+            if dlg.ShowModal() == wx.ID_OK:
+                self.defaultPromptItems = dlg.get_default_items()
+                self.customPromptItems = dlg.get_custom_items()
+                self._refreshPromptSummary()
+        finally:
+            dlg.Destroy()
+            if top:
+                top.Enable(True)
+                top.SetFocus()
+
+    def onToggleApiVisibility(self, event):
+        if self.showApiCheck.IsChecked():
+            self.apiKeyCtrl_visible.SetValue(self.apiKeyCtrl_hidden.GetValue())
+            self.apiKeyCtrl_hidden.Hide()
+            self.apiKeyCtrl_visible.Show()
+        else:
+            self.apiKeyCtrl_hidden.SetValue(self.apiKeyCtrl_visible.GetValue())
+            self.apiKeyCtrl_visible.Hide()
+            self.apiKeyCtrl_hidden.Show()
+        
+        self.connectionBox.GetParent().Layout()
+
+    def onSave(self):
+        val = self.apiKeyCtrl_visible.GetValue() if self.showApiCheck.IsChecked() else self.apiKeyCtrl_hidden.GetValue()
+        config.conf["VisionAssistant"]["api_key"] = val.strip()
+        config.conf["VisionAssistant"]["model_name"] = MODELS[self.model.GetSelection()][1]
+        config.conf["VisionAssistant"]["proxy_url"] = self.proxyUrl.Value.strip()
+        config.conf["VisionAssistant"]["source_language"] = SOURCE_NAMES[self.sourceLang.GetSelection()]
+        config.conf["VisionAssistant"]["target_language"] = TARGET_NAMES[self.targetLang.GetSelection()]
+        config.conf["VisionAssistant"]["ai_response_language"] = TARGET_NAMES[self.aiResponseLang.GetSelection()]
+        config.conf["VisionAssistant"]["smart_swap"] = self.smartSwap.Value
+        config.conf["VisionAssistant"]["check_update_startup"] = self.checkUpdateStartup.Value
+        config.conf["VisionAssistant"]["clean_markdown_chat"] = self.cleanMarkdown.Value
+        config.conf["VisionAssistant"]["copy_to_clipboard"] = self.copyToClipboard.Value
+        config.conf["VisionAssistant"]["skip_chat_dialog"] = self.skipChatDialog.Value
+        config.conf["VisionAssistant"]["captcha_mode"] = 'navigator' if self.captchaMode.GetSelection() == 0 else 'fullscreen'
+        config.conf["VisionAssistant"]["custom_prompts_v2"] = serialize_custom_prompts_v2(self.customPromptItems)
+        config.conf["VisionAssistant"]["custom_prompts"] = ""
+        config.conf["VisionAssistant"]["default_refine_prompts"] = serialize_default_prompt_overrides(self.defaultPromptItems)
+        config.conf["VisionAssistant"]["ocr_engine"] = OCR_ENGINES[self.ocr_sel.GetSelection()][1]
+        config.conf["VisionAssistant"]["tts_voice"] = GEMINI_VOICES[self.voice_sel.GetSelection()][0]
+
+class RangeDialog(wx.Dialog):
+    def __init__(self, parent, total_pages):
+        # Translators: Title of the PDF options dialog
+        super().__init__(parent, title=_("Options"), size=(350, 320))
+        sizer = wx.BoxSizer(wx.VERTICAL)
+        # Translators: Label showing total pages found
+        sizer.Add(wx.StaticText(self, label=_("Total Pages (All Files): {count}").format(count=total_pages)), 0, wx.ALL, 10)
+        
+        # Translators: Box title for page range selection
+        box_range = wx.StaticBoxSizer(wx.VERTICAL, self, _("Range"))
+        g_sizer = wx.FlexGridSizer(2, 2, 10, 10)
+        # Translators: Label for start page
+        g_sizer.Add(wx.StaticText(self, label=_("From:")), 0, wx.ALIGN_CENTER_VERTICAL)
+        self.spin_from = wx.SpinCtrl(self, min=1, max=total_pages, initial=1)
+        g_sizer.Add(self.spin_from, 1, wx.EXPAND)
+        # Translators: Label for end page
+        g_sizer.Add(wx.StaticText(self, label=_("To:")), 0, wx.ALIGN_CENTER_VERTICAL)
+        self.spin_to = wx.SpinCtrl(self, min=1, max=total_pages, initial=total_pages)
+        g_sizer.Add(self.spin_to, 1, wx.EXPAND)
+        box_range.Add(g_sizer, 1, wx.EXPAND | wx.ALL, 5)
+        sizer.Add(box_range, 0, wx.EXPAND | wx.ALL, 10)
+
+        # Translators: Box title for translation options
+        box_trans = wx.StaticBoxSizer(wx.VERTICAL, self, _("Translation"))
+        # Translators: Checkbox to enable translation
+        self.chk_trans = wx.CheckBox(self, label=_("Translate Output"))
+        box_trans.Add(self.chk_trans, 0, wx.ALL, 5)
+        h_sizer = wx.BoxSizer(wx.HORIZONTAL)
+        # Translators: Label for target language
+        h_sizer.Add(wx.StaticText(self, label=_("Target:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
+        self.cmb_lang = wx.Choice(self, choices=TARGET_NAMES)
+        self.cmb_lang.SetSelection(0)
+        h_sizer.Add(self.cmb_lang, 1)
+        box_trans.Add(h_sizer, 1, wx.EXPAND | wx.ALL, 5)
+        sizer.Add(box_trans, 0, wx.EXPAND | wx.ALL, 10)
+        
+        btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
+        # Translators: Button to start processing
+        btn_ok = wx.Button(self, wx.ID_OK, label=_("Start"))
+        btn_ok.SetDefault()
+        # Translators: Button to cancel
+        btn_cancel = wx.Button(self, wx.ID_CANCEL, label=_("Cancel"))
+        btn_sizer.Add(btn_ok, 0, wx.RIGHT, 10)
+        btn_sizer.Add(btn_cancel, 0)
+        sizer.Add(btn_sizer, 0, wx.ALIGN_CENTER | wx.ALL, 10)
+        self.SetSizer(sizer)
+
+        self.chk_trans.Bind(wx.EVT_CHECKBOX, self.on_check)
+        self.cmb_lang.Disable()
+
+    def on_check(self, event):
+        self.cmb_lang.Enable(self.chk_trans.IsChecked())
+
+    def get_settings(self):
+        return {
+            'start': self.spin_from.GetValue() - 1,
+            'end': self.spin_to.GetValue() - 1,
+            'translate': self.chk_trans.IsChecked(),
+            'lang': TARGET_NAMES[self.cmb_lang.GetSelection()]
+        }
+
+class ChatDialog(wx.Dialog):
+    instance = None
+
+    def __init__(self, parent, file_path):
+        # Translators: Title of the chat dialog
+        super().__init__(parent, title=_("Ask about Document"), size=(600, 500), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER)
+        ChatDialog.instance = self
+        self.file_path = file_path
+        self.file_uri = None
+        self.mime_type = get_mime_type(file_path)
+        self.history = []
+        
+        sizer = wx.BoxSizer(wx.VERTICAL)
+        # Translators: Label showing the analyzed file name
+        lbl_info = wx.StaticText(self, label=_("File: {name}").format(name=os.path.basename(file_path)))
+        sizer.Add(lbl_info, 0, wx.ALL, 5)
+        self.display = wx.TextCtrl(self, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
+        sizer.Add(self.display, 1, wx.EXPAND | wx.ALL, 10)
+        # Translators: Status message while uploading
+        self.display.SetValue(_("Uploading to Gemini...\n"))
+        
+        input_sizer = wx.BoxSizer(wx.HORIZONTAL)
+        # Translators: Label for the chat input field
+        input_sizer.Add(wx.StaticText(self, label=_("Your Question:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
+        self.input = wx.TextCtrl(self, style=wx.TE_PROCESS_ENTER, size=(-1, 30))
+        self.input.Bind(wx.EVT_TEXT_ENTER, self.on_send)
+        input_sizer.Add(self.input, 1, wx.EXPAND | wx.RIGHT, 5)
+        
+        # Translators: Button to send message
+        self.btn_send = wx.Button(self, label=_("Send"))
+        self.btn_send.Bind(wx.EVT_BUTTON, self.on_send)
+        self.btn_send.Disable()
+        input_sizer.Add(self.btn_send, 0)
+        sizer.Add(input_sizer, 0, wx.EXPAND | wx.ALL, 10)
+        self.SetSizer(sizer)
+        self.Bind(wx.EVT_CLOSE, self.on_close)
+        threading.Thread(target=self.init_upload, daemon=True).start()
+
+    def on_close(self, event):
+        ChatDialog.instance = None
+        self.Destroy()
+
+    def init_upload(self):
+        uri = GeminiHandler.upload_for_chat(self.file_path, self.mime_type)
+        if uri and not str(uri).startswith("ERROR:"):
+            self.file_uri = uri
+            wx.CallAfter(self.on_ready)
+        else:
+            err_msg = str(uri)[6:] if uri else _("Upload failed.")
+            wx.CallAfter(show_error_dialog, err_msg)
+            wx.CallAfter(self.Close)
+
+    def on_ready(self):
+        # Translators: Message when ready to chat
+        self.display.AppendText(_("Ready! Ask your questions.\n"))
+        self.btn_send.Enable()
+        self.input.SetFocus()
+
+    def on_send(self, event):
+        msg = self.input.GetValue().strip()
+        if not msg: return
+        self.input.Clear()
+        self.display.AppendText(f"You: {msg}\n")
+        # Translators: Message showing AI is thinking
+        ui.message(_("Thinking..."))
+        threading.Thread(target=self.do_chat, args=(msg,), daemon=True).start()
+
+    def do_chat(self, msg):
+        resp = GeminiHandler.chat(self.history, msg, self.file_uri, self.mime_type)
+        
+        if str(resp).startswith("ERROR:"):
+            show_error_dialog(resp[6:])
+            if _vision_assistant_instance:
+                # Translators: Initial status when the add-on is doing nothing
+                _vision_assistant_instance.current_status = _("Idle")
+            return
+
+        self.history.append({"role": "user", "parts": [{"text": msg}]})
+        self.history.append({"role": "model", "parts": [{"text": resp}]})
+        wx.CallAfter(self.display.AppendText, f"AI: {resp}\n\n")
+        # Translators: Spoken prefix for AI response
+        wx.CallAfter(ui.message, _("AI: ") + resp)
+
+class DocumentViewerDialog(wx.Dialog):
+    def __init__(self, parent, virtual_doc, settings):
+        # Translators: Title of the Document Reader window.
+        title_text = f"{ADDON_NAME} - {_('Document Reader')}"
+        super().__init__(parent, title=title_text, size=(800, 600), style=wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.MAXIMIZE_BOX)
+        self.v_doc = virtual_doc
+        self.start_page = settings['start']
+        self.end_page = settings['end']
+        self.do_translate = settings['translate']
+        self.target_lang = settings['lang']
+        self.range_count = self.end_page - self.start_page + 1
+        self.page_cache = {}
+        self.current_page = self.start_page
+        self.thread_pool = ThreadPoolExecutor(max_workers=5)
+        
+        self.init_ui()
+        self.Centre()
+        threading.Thread(target=self.start_auto_processing, daemon=True).start()
+
+    def init_ui(self):
+        panel = wx.Panel(self)
+        vbox = wx.BoxSizer(wx.VERTICAL)
+        # Translators: Initial status message
+        self.lbl_status = wx.StaticText(panel, label=_("Initializing..."))
+        vbox.Add(self.lbl_status, 0, wx.ALL, 5)
+        self.txt_content = wx.TextCtrl(panel, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
+        vbox.Add(self.txt_content, 1, wx.EXPAND | wx.LEFT | wx.RIGHT, 10)
+        hbox_nav = wx.BoxSizer(wx.HORIZONTAL)
+        # Translators: Button to go to previous page
+        self.btn_prev = wx.Button(panel, label=_("Previous (Ctrl+PageUp)"))
+        self.btn_prev.Bind(wx.EVT_BUTTON, self.on_prev)
+        hbox_nav.Add(self.btn_prev, 0, wx.RIGHT, 5)
+        # Translators: Button to go to next page
+        self.btn_next = wx.Button(panel, label=_("Next (Ctrl+PageDown)"))
+        self.btn_next.Bind(wx.EVT_BUTTON, self.on_next)
+        hbox_nav.Add(self.btn_next, 0, wx.RIGHT, 15)
+        # Translators: Label for Go To Page
+        hbox_nav.Add(wx.StaticText(panel, label=_("Go to:")), 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
+        choices = [str(i+1) for i in range(self.start_page, self.end_page + 1)]
+        self.cmb_pages = wx.Choice(panel, choices=choices)
+        self.cmb_pages.Bind(wx.EVT_CHOICE, self.on_page_select)
+        hbox_nav.Add(self.cmb_pages, 0, wx.RIGHT, 15)
+        vbox.Add(hbox_nav, 0, wx.ALIGN_CENTER | wx.ALL, 10)
+        hbox_actions = wx.BoxSizer(wx.HORIZONTAL)
+        # Translators: Button to Ask questions about the document
+        self.btn_ask = wx.Button(panel, label=_("Ask AI (Alt+A)"))
+        self.btn_ask.Bind(wx.EVT_BUTTON, self.on_ask)
+        hbox_actions.Add(self.btn_ask, 0, wx.RIGHT, 5)
+        
+        # Translators: Button to force re-scan
+        self.btn_gemini = wx.Button(panel, label=_("Re-scan with Gemini (Alt+R)"))
+        self.btn_gemini.Bind(wx.EVT_BUTTON, self.on_gemini_scan)
+        hbox_actions.Add(self.btn_gemini, 0, wx.RIGHT, 5)
+        
+        # Translators: Button to generate audio
+        self.btn_tts = wx.Button(panel, label=_("Generate Audio (Alt+G)"))
+        self.btn_tts.Bind(wx.EVT_BUTTON, self.on_tts)
+        hbox_actions.Add(self.btn_tts, 0, wx.RIGHT, 5)
+
+        # Translators: Button to view formatted content
+        self.btn_view = wx.Button(panel, label=_("View Formatted"))
+        self.btn_view.Bind(wx.EVT_BUTTON, self.on_view)
+        hbox_actions.Add(self.btn_view, 0, wx.RIGHT, 5)
+
+        # Translators: Button to save text
+        self.btn_save = wx.Button(panel, label=_("Save (Alt+S)"))
+        self.btn_save.Bind(wx.EVT_BUTTON, self.on_save_all)
+        hbox_actions.Add(self.btn_save, 0)
+        
+        vbox.Add(hbox_actions, 0, wx.ALIGN_CENTER | wx.ALL, 5)
+        btn_close = wx.Button(panel, wx.ID_CLOSE, label=_("Close"))
+        btn_close.Bind(wx.EVT_BUTTON, lambda e: self.Destroy())
+        vbox.Add(btn_close, 0, wx.ALIGN_RIGHT | wx.ALL, 10)
+        panel.SetSizer(vbox)
+        accel_tbl = wx.AcceleratorTable([
+            (wx.ACCEL_CTRL, wx.WXK_PAGEDOWN, self.btn_next.GetId()),
+            (wx.ACCEL_CTRL, wx.WXK_PAGEUP, self.btn_prev.GetId()),
+            (wx.ACCEL_CTRL, ord('S'), self.btn_save.GetId()),
+            (wx.ACCEL_ALT, ord('S'), self.btn_save.GetId()),
+            (wx.ACCEL_ALT, ord('A'), self.btn_ask.GetId()),
+            (wx.ACCEL_ALT, ord('R'), self.btn_gemini.GetId()),
+            (wx.ACCEL_ALT, ord('G'), self.btn_tts.GetId())
+        ])
+        self.SetAcceleratorTable(accel_tbl)
+        self.cmb_pages.SetSelection(0)
+        self.update_view()
+        self.txt_content.SetFocus()
+
+    def start_auto_processing(self):
+        engine = config.conf["VisionAssistant"]["ocr_engine"]
+        
+        if engine == 'gemini':
+            threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
+        else:
+            for i in range(self.start_page, self.end_page + 1):
+                self.thread_pool.submit(self.process_page_worker, i)
+
+    def process_page_worker(self, page_num):
+        if page_num in self.page_cache: return
+        text = self._get_page_text_logic(page_num)
+        self.page_cache[page_num] = text
+        if page_num == self.current_page:
+            wx.CallAfter(self.update_view)
+            # Translators: Spoken message when the current page is ready
+            wx.CallAfter(ui.message, _("Page {num} ready").format(num=page_num + 1))
+
+    def _get_page_text_logic(self, page_num):
+        file_path, page_idx = self.v_doc.get_page_info(page_num)
+        if not file_path: return ""
+        try:
+            doc = fitz.open(file_path)
+            page = doc.load_page(page_idx)
+            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+            img_bytes = pix.tobytes("jpg")
+            doc.close()
+            engine = config.conf["VisionAssistant"]["ocr_engine"]
+            text = None
+            if engine == 'gemini':
+                try: text = GeminiHandler.ocr_page(img_bytes)
+                except: text = None
+            if not text or not text.strip() or engine == 'chrome':
+                text = ChromeOCREngine.recognize(img_bytes)
+                if not text or not text.strip():
+                    text = SmartProgrammersOCREngine.recognize(img_bytes)
+            if not text or not text.strip():
+                # Translators: Placeholder text when OCR fails
+                text = _("[OCR failed. Try Gemini Re-scan.]")
+            if self.do_translate and text and "[OCR failed" not in text:
+                if engine == 'gemini':
+                    text = GeminiHandler.translate(text, self.target_lang)
+                else:
+                    text = GoogleTranslator.translate(text, self.target_lang)
+            return text
+        except: 
+            # Translators: Error message for page processing failure
+            return _("Error processing page.")
+
+    def update_view(self):
+        rel_page = self.current_page - self.start_page + 1
+        # Translators: Status label format
+        self.lbl_status.SetLabel(_("Page {current} of {total}").format(current=rel_page, total=self.range_count))
+        if self.current_page in self.page_cache:
+            self.txt_content.SetValue(self.page_cache[self.current_page])
+            self.txt_content.SetInsertionPoint(0)
+            self.txt_content.SetFocus()
+        else:
+            # Translators: Status when page is loading
+            self.txt_content.SetValue(_("Processing in background..."))
+            self.txt_content.SetInsertionPoint(0)
+            self.txt_content.SetFocus()
+        self.btn_prev.Enable(self.current_page > self.start_page)
+        self.btn_next.Enable(self.current_page < self.end_page)
+
+    def load_page(self, page_num):
+        if page_num < self.start_page or page_num > self.end_page: return
+        self.current_page = page_num
+        self.cmb_pages.SetSelection(page_num - self.start_page)
+        # Translators: Spoken message when switching pages
+        ui.message(_("Page {num}").format(num=page_num + 1))
+        self.update_view()
+
+    def on_prev(self, event):
+        if self.current_page > self.start_page: self.load_page(self.current_page - 1)
+
+    def on_next(self, event):
+        if self.current_page < self.end_page: self.load_page(self.current_page + 1)
+
+    def on_page_select(self, event):
+        self.load_page(self.start_page + self.cmb_pages.GetSelection())
+
+    def on_view(self, event):
+        full_html = []
+        for i in range(self.start_page, self.end_page + 1):
+            if i in self.page_cache:
+                page_text = self.page_cache[i]
+                page_content = markdown_to_html(page_text, full_page=False)
+                # Translators: Heading for each page in the formatted content view.
+                page_label = _("Page {num}").format(num=i+1)
+                full_html.append(f"<h2>{page_label}</h2>")
+                full_html.append(page_content)
+                full_html.append("<hr>")
+        
+        if not full_html:
+            text = self.txt_content.GetValue()
+            if not text: return
+            full_html.append(markdown_to_html(text, full_page=False))
+        
+        combined_html = "".join(full_html)
+        try:
+            # Translators: Title of the formatted result window
+            ui.browseableMessage(combined_html, _("Formatted Content"), isHtml=True)
+        except Exception as e:
+            show_error_dialog(str(e))
+
+    def on_gemini_scan(self, event):
+        if not config.conf["VisionAssistant"]["api_key"]:
+            wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
+            return
+        menu = wx.Menu()
+        # Translators: Menu option for current page
+        item_curr = menu.Append(wx.ID_ANY, _("Current Page"))
+        # Translators: Menu option for all pages
+        item_all = menu.Append(wx.ID_ANY, _("All Pages (In Range)"))
+        self.Bind(wx.EVT_MENU, self.do_rescan_current, item_curr)
+        self.Bind(wx.EVT_MENU, self.do_rescan_all, item_all)
+        self.PopupMenu(menu)
+        menu.Destroy()
+
+    def do_rescan_current(self, event):
+        if self.current_page in self.page_cache: del self.page_cache[self.current_page]
+        self.update_view()
+        # Translators: Message during manual scan
+        ui.message(_("Scanning with Gemini..."))
+        threading.Thread(target=self.gemini_scan_single_thread, args=(self.current_page,), daemon=True).start()
+
+    def gemini_scan_single_thread(self, page_num):
+        try:
+            file_path, page_idx = self.v_doc.get_page_info(page_num)
+            doc = fitz.open(file_path)
+            page = doc.load_page(page_idx)
+            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+            text = GeminiHandler.ocr_page(pix.tobytes("jpg"))
+            doc.close()
+            if self.do_translate: text = GeminiHandler.translate(text, self.target_lang)
+            self.page_cache[page_num] = text
+            if self.current_page == page_num: 
+                wx.CallAfter(self.update_view)
+                # Translators: Message when scan is complete
+                wx.CallAfter(ui.message, _("Scan complete"))
+        except: pass
+
+    def do_rescan_all(self, event):
+        threading.Thread(target=self.gemini_scan_batch_thread, daemon=True).start()
+
+    def gemini_scan_batch_thread(self):
+        # Translators: Message when batch scan starts
+        msg = _("Batch Processing Started")
+        if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
+        wx.CallAfter(ui.message, msg)
+        
+        for i in range(self.start_page, self.end_page + 1):
+            if i in self.page_cache: del self.page_cache[i]
+        wx.CallAfter(self.update_view)
+        
+        upload_path = self.v_doc.create_merged_pdf(self.start_page, self.end_page)
+        if not upload_path:
+            # Translators: Error message if PDF creation fails
+            wx.CallAfter(self.lbl_status.SetLabel, _("Error creating temporary PDF."))
+            return
+
+        try:
+            count = (self.end_page - self.start_page) + 1
+            results = GeminiHandler.upload_and_process_batch(upload_path, "application/pdf", count)
+            
+            if not results or (len(results) == 1 and str(results[0]).startswith("ERROR:")):
+                err_msg = results[0][6:] if results else _("Unknown error")
+                # Translators: Message reported when batch scan fails
+                error_text = _("Scan failed: {err}").format(err=err_msg)
+                for i in range(self.start_page, self.end_page + 1):
+                    self.page_cache[i] = error_text
+                
+                wx.CallAfter(self.update_view)
+                wx.CallAfter(ui.message, error_text)
+                return
+
+            for i, text_part in enumerate(results):
+                if i >= count: break
+                idx = self.start_page + i
+                clean = text_part.strip()
+                if self.do_translate:
+                    clean = GeminiHandler.translate(clean, self.target_lang)
+                self.page_cache[idx] = clean
+
+            wx.CallAfter(self.update_view)
+            # Translators: Message when batch scan is complete
+            final_msg = _("Batch Scan Complete")
+            if _vision_assistant_instance: 
+                # Translators: Initial status when the add-on is doing nothing
+                _vision_assistant_instance.current_status = _("Idle")
+            wx.CallAfter(ui.message, final_msg)
+        finally:
+            if upload_path and os.path.exists(upload_path):
+                try: os.remove(upload_path)
+                except: pass
+
+    def on_tts(self, event):
+        if not config.conf["VisionAssistant"]["api_key"]:
+            wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
+            return
+        menu = wx.Menu()
+        # Translators: Menu option for TTS current page
+        item_curr = menu.Append(wx.ID_ANY, _("Generate for Current Page"))
+        # Translators: Menu option for TTS all pages
+        item_all = menu.Append(wx.ID_ANY, _("Generate for All Pages (In Range)"))
+        self.Bind(wx.EVT_MENU, self.do_tts_current, item_curr)
+        self.Bind(wx.EVT_MENU, self.do_tts_all, item_all)
+        self.PopupMenu(menu)
+        menu.Destroy()
+
+    def do_tts_current(self, event):
+        text = self.txt_content.GetValue().strip()
+        if not text: 
+            # Translators: Error message when text field is empty
+            wx.MessageBox(_("No text to read."), "Error")
+            return
+        self._save_tts(text)
+
+    def do_tts_all(self, event):
+        threading.Thread(target=self.tts_batch_thread, daemon=True).start()
+
+    def tts_batch_thread(self):
+        full_text = []
+        # Translators: Message while gathering text
+        wx.CallAfter(ui.message, _("Gathering text for audio..."))
+        for i in range(self.start_page, self.end_page + 1):
+            while i not in self.page_cache: time.sleep(0.1)
+            full_text.append(self.page_cache[i])
+        final_text = "\n".join(full_text).strip()
+        if not final_text: return
+        wx.CallAfter(self._save_tts, final_text)
+
+    def _save_tts(self, text):
+        # Translators: File dialog title for saving audio
+        path = get_file_path(_("Save Audio"), "MP3 Files (*.mp3)|*.mp3|WAV Files (*.wav)|*.wav", mode="save")
+        if path:
+            voice = config.conf["VisionAssistant"]["tts_voice"]
+            threading.Thread(target=self.tts_worker, args=(text, voice, path), daemon=True).start()
+
+    def tts_worker(self, text, voice, path):
+        # Translators: Message while generating audio
+        msg = _("Generating Audio...")
+        if _vision_assistant_instance: _vision_assistant_instance.current_status = msg
+        wx.CallAfter(ui.message, msg)
+        try:
+            audio_b64 = GeminiHandler.generate_speech(text, voice)
+            if not audio_b64 or len(audio_b64) < 100:
+                 wx.CallAfter(wx.MessageBox, f"TTS Error: {audio_b64}", "Error", wx.ICON_ERROR)
+                 return
+            missing_padding = len(audio_b64) % 4
+            if missing_padding: audio_b64 += '=' * (4 - missing_padding)
+            pcm_data = base64.b64decode(audio_b64)
+
+            if path.lower().endswith(".mp3"):
+                import subprocess
+                lame_path = os.path.join(os.path.dirname(__file__), "lib", "lame.exe")
+                if not os.path.exists(lame_path):
+                    wx.CallAfter(wx.MessageBox, _("lame.exe not found in lib folder."), "Error", wx.ICON_ERROR)
+                    return
+                
+                process = subprocess.Popen(
+                    [lame_path, "-r", "-s", "24", "-m", "m", "-b", "128", "--bitwidth", "16", "--resample", "24", "-q", "0", "-", path],
+                    stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                    creationflags=getattr(subprocess, 'CREATE_NO_WINDOW', 0)
+                )
+
+                process.communicate(input=pcm_data)
+            else:
+                with wave.open(path, "wb") as wf:
+                    wf.setnchannels(1)
+                    wf.setsampwidth(2)
+                    wf.setframerate(24000)
+                    wf.writeframes(pcm_data)
+
+            # Translators: Spoken message when audio is saved
+            res_msg = _("Audio Saved")
+            if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
+            wx.CallAfter(ui.message, res_msg)
+            wx.CallAfter(wx.MessageBox, _("Audio file generated and saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
+        except Exception as e:
+            if _vision_assistant_instance: _vision_assistant_instance.current_status = _("Idle")
+            wx.CallAfter(wx.MessageBox, f"TTS Error: {e}", "Error", wx.ICON_ERROR)
+
+    def on_ask(self, event):
+        if not config.conf["VisionAssistant"]["api_key"]:
+            wx.MessageBox(_("Please configure Gemini API Key."), _("Error"), wx.ICON_ERROR)
+            return
+        if ChatDialog.instance:
+            ChatDialog.instance.Raise()
+            ChatDialog.instance.SetFocus()
+            return
+        file_path, _ = self.v_doc.get_page_info(self.current_page)
+        if file_path: 
+            dlg = ChatDialog(self, file_path)
+            dlg.Show()
+
+    def on_save_all(self, event):
+        # Translators: File dialog filter for saving text/html
+        wildcard = "Text File (*.txt)|*.txt|HTML File (*.html)|*.html"
+        # Translators: File dialog title for saving
+        path = get_file_path(_("Save"), wildcard, mode="save")
+        if path:
+            is_html = path.lower().endswith('.html')
+            self.btn_save.Disable()
+            threading.Thread(target=self.save_thread, args=(path, is_html), daemon=True).start()
+
+    def save_thread(self, path, is_html):
+        full_content = []
+        try:
+            for i in range(self.start_page, self.end_page + 1):
+                # Translators: Message showing save progress
+                wx.CallAfter(self.lbl_status.SetLabel, _("Saving Page {num}...").format(num=i+1))
+                while i not in self.page_cache: time.sleep(0.1)
+                txt = self.page_cache[i]
+                if is_html:
+                    h = markdown_to_html(txt)
+                    if "<body>" in h: h = h.split("<body>")[1].split("</body>")[0]
+                    full_content.append(f"<hr><h2>Page {i+1}</h2>{h}")
+                else:
+                    full_content.append(f"--- Page {i+1} ---\n{txt}\n")
+            with open(path, "w", encoding="utf-8") as f:
+                if is_html: f.write(f"<html><body>{''.join(full_content)}</body></html>")
+                else: f.write("\n".join(full_content))
+            # Translators: Status label when save is complete
+            wx.CallAfter(self.lbl_status.SetLabel, _("Saved"))
+            # Translators: Message box content for successful save
+            wx.CallAfter(wx.MessageBox, _("File saved successfully."), _("Success"), wx.OK | wx.ICON_INFORMATION)
+        except Exception as e:
+            wx.CallAfter(wx.MessageBox, f"Save Error: {e}", "Error", wx.ICON_ERROR)
+        finally: wx.CallAfter(self.btn_save.Enable)
diff --git a/addon/globalPlugins/visionAssistant/markdown_utils.py b/addon/globalPlugins/visionAssistant/markdown_utils.py
new file mode 100644
index 0000000..ba438a6
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/markdown_utils.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+
+def clean_markdown(text):
+    if not text:
+        return ""
+    text = re.sub(r'\*\*|__|[*_]', '', text)
+    text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE)
+    text = re.sub(r'```', '', text)
+    text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
+    text = re.sub(r'^\s*-\s+', '', text, flags=re.MULTILINE)
+    return text.strip()
+
+
+def markdown_to_html(text, full_page=False):
+    if not text:
+        return ""
+
+    html = text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+    html = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', html)
+    html = re.sub(r'__(.*?)__', r'<i>\1</i>', html)
+    html = re.sub(r'^### (.*)', r'<h3>\1</h3>', html, flags=re.M)
+    html = re.sub(r'^## (.*)', r'<h2>\1</h2>', html, flags=re.M)
+    html = re.sub(r'^# (.*)', r'<h1>\1</h1>', html, flags=re.M)
+
+    lines = html.split('\n')
+    in_table = False
+    new_lines = []
+    table_style = 'border="1" style="border-collapse: collapse; width: 100%; margin-bottom: 10px;"'
+    td_style = 'style="padding: 5px; border: 1px solid #ccc;"'
+
+    for line in lines:
+        stripped = line.strip()
+        if stripped.startswith('|') or (stripped.count('|') > 1 and len(stripped) > 5):
+            if not in_table:
+                new_lines.append(f'<table {table_style}>')
+                in_table = True
+            if '---' in stripped:
+                continue
+            row_content = stripped.strip('|').split('|')
+            cells = "".join([f'<td {td_style}>{c.strip()}</td>' for c in row_content])
+            new_lines.append(f'<tr>{cells}</tr>')
+        else:
+            if in_table:
+                new_lines.append('</table>')
+                in_table = False
+            if stripped:
+                new_lines.append(line + "<br>")
+            else:
+                new_lines.append("<br>")
+    if in_table:
+        new_lines.append('</table>')
+    html_body = "".join(new_lines)
+
+    if not full_page:
+        return html_body
+    return f"""<!DOCTYPE html><html lang=\"en\"><head><meta charset=\"UTF-8\"><style>body{{font-family:\"Segoe UI\",Arial,sans-serif;line-height:1.6;padding:20px;color:#333;max-width:800px;margin:0 auto}}h1,h2,h3{{color:#2c3e50;border-bottom:1px solid #eee;padding-bottom:5px}}pre{{background-color:#f4f4f4;padding:10px;border-radius:5px;overflow-x:auto;font-family:Consolas,monospace}}code{{background-color:#f4f4f4;padding:2px 5px;border-radius:3px;font-family:Consolas,monospace}}table{{border-collapse:collapse;width:100%;margin-bottom:10px}}td,th{{border:1px solid #ccc;padding:8px;text-align:left}}strong,b{{color:#000;font-weight:bold}}li{{margin-bottom:5px}}</style></head><body>{html_body}</body></html>"""
diff --git a/addon/globalPlugins/visionAssistant/prompt_helpers.py b/addon/globalPlugins/visionAssistant/prompt_helpers.py
new file mode 100644
index 0000000..64d8c01
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/prompt_helpers.py
@@ -0,0 +1,283 @@
+# -*- coding: utf-8 -*-
+
+import json
+import logging
+
+import addonHandler
+import config
+
+from .constants import DEFAULT_SYSTEM_PROMPTS, LEGACY_REFINER_TOKENS, REFINE_PROMPT_KEYS
+
+addonHandler.initTranslation()
+log = logging.getLogger(__name__)
+
+
+def get_builtin_default_prompts():
+    builtins = []
+    for item in DEFAULT_SYSTEM_PROMPTS:
+        p = str(item["prompt"]).strip()
+        builtins.append({
+            "key": item["key"],
+            "section": item["section"],
+            "label": item["label"],
+            "display_label": f"{item['section']} - {item['label']}",
+            "internal": bool(item.get("internal")),
+            "prompt": p,
+            "default": p,
+        })
+    return builtins
+
+
+def get_builtin_default_prompt_map():
+    return {item["key"]: item for item in get_builtin_default_prompts()}
+
+
+def _normalize_custom_prompt_items(items):
+    normalized = []
+    if not isinstance(items, list):
+        return normalized
+
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        name = item.get("name")
+        content = item.get("content")
+        if not isinstance(name, str) or not isinstance(content, str):
+            continue
+        name = name.strip()
+        content = content.strip()
+        if name and content:
+            normalized.append({"name": name, "content": content})
+    return normalized
+
+
+def parse_custom_prompts_legacy(raw_value):
+    items = []
+    if not raw_value:
+        return items
+
+    normalized = raw_value.replace("\r\n", "\n").replace("\r", "\n")
+    for line in normalized.split("\n"):
+        for segment in line.split("|"):
+            segment = segment.strip()
+            if not segment or ":" not in segment:
+                continue
+            name, content = segment.split(":", 1)
+            name = name.strip()
+            content = content.strip()
+            if name and content:
+                items.append({"name": name, "content": content})
+    return items
+
+
+def parse_custom_prompts_v2(raw_value):
+    if not isinstance(raw_value, str) or not raw_value.strip():
+        return None
+    try:
+        data = json.loads(raw_value)
+    except Exception as e:
+        log.warning(f"Invalid custom_prompts_v2 config, falling back to legacy format: {e}")
+        return None
+    return _normalize_custom_prompt_items(data)
+
+
+def serialize_custom_prompts_v2(items):
+    normalized = _normalize_custom_prompt_items(items)
+    if not normalized:
+        return ""
+    return json.dumps(normalized, ensure_ascii=False)
+
+
+def load_configured_custom_prompts():
+    try:
+        raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
+    except Exception:
+        raw_v2 = ""
+    items_v2 = parse_custom_prompts_v2(raw_v2)
+    if items_v2 is not None:
+        return items_v2
+    return parse_custom_prompts_legacy(config.conf["VisionAssistant"]["custom_prompts"])
+
+
+def _sanitize_default_prompt_overrides(data):
+    if not isinstance(data, dict):
+        return {}, False
+
+    changed = False
+    mutable = dict(data)
+    # Migrate old key used in previous versions.
+    legacy_vision = mutable.pop("vision_image_analysis", None)
+    if legacy_vision is not None:
+        changed = True
+    if isinstance(legacy_vision, str) and legacy_vision.strip():
+        legacy_text = legacy_vision.strip()
+        nav_value = mutable.get("vision_navigator_object")
+        if not isinstance(nav_value, str) or not nav_value.strip():
+            mutable["vision_navigator_object"] = legacy_text
+            changed = True
+        full_value = mutable.get("vision_fullscreen")
+        if not isinstance(full_value, str) or not full_value.strip():
+            mutable["vision_fullscreen"] = legacy_text
+            changed = True
+
+    valid_keys = set(get_builtin_default_prompt_map().keys())
+    sanitized = {}
+    for key, value in mutable.items():
+        if key not in valid_keys or not isinstance(value, str):
+            changed = True
+            continue
+        prompt_text = value.strip()
+        if not prompt_text:
+            changed = True
+            continue
+        if key in LEGACY_REFINER_TOKENS and prompt_text == LEGACY_REFINER_TOKENS[key]:
+            # Drop old token-only overrides and fallback to current built-ins.
+            changed = True
+            continue
+        if prompt_text != value:
+            changed = True
+        sanitized[key] = prompt_text
+    return sanitized, changed
+
+
+def migrate_prompt_config_if_needed():
+    changed = False
+
+    try:
+        raw_v2 = config.conf["VisionAssistant"]["custom_prompts_v2"]
+    except Exception:
+        raw_v2 = ""
+    raw_legacy = config.conf["VisionAssistant"]["custom_prompts"]
+
+    v2_items = parse_custom_prompts_v2(raw_v2)
+    if v2_items is None:
+        target_items = parse_custom_prompts_legacy(raw_legacy)
+    else:
+        target_items = v2_items
+
+    serialized_v2 = serialize_custom_prompts_v2(target_items)
+    if serialized_v2 != (raw_v2 or ""):
+        config.conf["VisionAssistant"]["custom_prompts_v2"] = serialized_v2
+        changed = True
+
+    # Legacy mirror is disabled. Clear old storage to prevent stale fallback data.
+    if raw_legacy:
+        config.conf["VisionAssistant"]["custom_prompts"] = ""
+        changed = True
+
+    try:
+        raw_defaults = config.conf["VisionAssistant"]["default_refine_prompts"]
+    except Exception:
+        raw_defaults = ""
+    if isinstance(raw_defaults, str) and raw_defaults.strip():
+        try:
+            defaults_data = json.loads(raw_defaults)
+        except Exception:
+            defaults_data = None
+        if isinstance(defaults_data, dict):
+            sanitized, migrated = _sanitize_default_prompt_overrides(defaults_data)
+            if migrated:
+                config.conf["VisionAssistant"]["default_refine_prompts"] = (
+                    json.dumps(sanitized, ensure_ascii=False) if sanitized else ""
+                )
+                changed = True
+
+    return changed
+
+
+def load_default_prompt_overrides():
+    try:
+        raw = config.conf["VisionAssistant"]["default_refine_prompts"]
+    except Exception:
+        raw = ""
+    if not isinstance(raw, str) or not raw.strip():
+        return {}
+
+    try:
+        data = json.loads(raw)
+    except Exception as e:
+        log.warning(f"Invalid default_refine_prompts config, using built-ins: {e}")
+        return {}
+
+    overrides, _ = _sanitize_default_prompt_overrides(data)
+    return overrides
+
+
+def get_configured_default_prompt_map():
+    prompt_map = get_builtin_default_prompt_map()
+    overrides = load_default_prompt_overrides()
+    for key, override in overrides.items():
+        if key not in prompt_map:
+            continue
+        if key in LEGACY_REFINER_TOKENS and override == LEGACY_REFINER_TOKENS[key]:
+            continue
+        prompt_map[key]["prompt"] = override
+    return prompt_map
+
+
+def get_configured_default_prompts():
+    prompt_map = get_configured_default_prompt_map()
+    items = []
+    for item in DEFAULT_SYSTEM_PROMPTS:
+        if item.get("internal"):
+            continue
+        key = item["key"]
+        if key in prompt_map:
+            items.append(dict(prompt_map[key]))
+    items.sort(key=lambda item: item.get("display_label", "").casefold())
+    return items
+
+
+def get_prompt_text(prompt_key):
+    prompt_map = get_configured_default_prompt_map()
+    item = prompt_map.get(prompt_key)
+    if item:
+        return item["prompt"]
+    return ""
+
+
+def serialize_default_prompt_overrides(items):
+    if not items:
+        return ""
+
+    base_map = {item["key"]: item["prompt"] for item in get_builtin_default_prompts()}
+    overrides = {}
+    for item in items:
+        key = item.get("key")
+        prompt_text = item.get("prompt", "")
+        if key not in base_map:
+            continue
+        if not isinstance(prompt_text, str):
+            continue
+        prompt_text = prompt_text.strip()
+        if prompt_text and prompt_text != base_map[key]:
+            overrides[key] = prompt_text
+
+    if not overrides:
+        return ""
+    return json.dumps(overrides, ensure_ascii=False)
+
+
+def get_refine_menu_options():
+    options = []
+    prompt_map = get_configured_default_prompt_map()
+    for key in REFINE_PROMPT_KEYS:
+        item = prompt_map.get(key)
+        if item:
+            options.append((item["label"], item["prompt"]))
+
+    for item in load_configured_custom_prompts():
+        # Translators: Prefix for custom prompts in the Refine menu
+        options.append((_("Custom: ") + item["name"], item["content"]))
+    return options
+
+
+def apply_prompt_template(template, replacements):
+    if not isinstance(template, str):
+        return ""
+
+    text = template
+    for key, value in replacements:
+        text = text.replace("{" + key + "}", str(value))
+
+    return text.strip()
diff --git a/addon/globalPlugins/visionAssistant/services.py b/addon/globalPlugins/visionAssistant/services.py
new file mode 100644
index 0000000..b944d78
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/services.py
@@ -0,0 +1,586 @@
+# -*- coding: utf-8 -*-
+
+import os
+import json
+import base64
+import tempfile
+import time
+import ctypes
+import re
+import logging
+
+import wx
+from urllib import request, error, parse
+from urllib.parse import quote, urlencode
+from http import cookiejar
+from uuid import uuid4
+
+try:
+    import fitz
+except ImportError:
+    fitz = None
+
+import addonHandler
+import config
+import gui
+
+from .constants import ADDON_NAME, CHROME_OCR_KEYS, TARGET_CODES
+from .prompt_helpers import apply_prompt_template, get_prompt_text
+
+log = logging.getLogger(__name__)
+addonHandler.initTranslation()
+
+def get_mime_type(path):
+    ext = os.path.splitext(path)[1].lower()
+    if ext == '.pdf': return 'application/pdf'
+    if ext in ['.jpg', '.jpeg']: return 'image/jpeg'
+    if ext == '.png': return 'image/png'
+    if ext == '.webp': return 'image/webp'
+    if ext in ['.tif', '.tiff']: return 'image/jpeg'
+    if ext == '.mp3': return 'audio/mpeg'
+    if ext == '.wav': return 'audio/wav'
+    if ext == '.ogg': return 'audio/ogg'
+    if ext == '.mp4': return 'video/mp4'
+    return 'application/octet-stream'
+
+def show_error_dialog(message):
+    # Translators: Title of the error dialog box
+    title = _("{name} Error").format(name=ADDON_NAME)
+    wx.CallAfter(gui.messageBox, message, title, wx.OK | wx.ICON_ERROR)
+
+def send_ctrl_v():
+    try:
+        user32 = ctypes.windll.user32
+        VK_CONTROL = 0x11; VK_V = 0x56; KEYEVENTF_KEYUP = 0x0002
+        user32.keybd_event(VK_CONTROL, 0, 0, 0)
+        user32.keybd_event(VK_V, 0, 0, 0)
+        user32.keybd_event(VK_V, 0, KEYEVENTF_KEYUP, 0)
+        user32.keybd_event(VK_CONTROL, 0, KEYEVENTF_KEYUP, 0)
+        return True
+    except Exception:
+        log.warning("Failed to send Ctrl+V", exc_info=True)
+        return False
+
+def get_proxy_opener():
+    proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+    if proxy_url:
+        if "127.0.0.1" in proxy_url or "localhost" in proxy_url or ":" in proxy_url.split("/")[-1]:
+             handler = request.ProxyHandler({'http': proxy_url, 'https': proxy_url})
+             return request.build_opener(handler)
+    return request.build_opener()
+
+def get_twitter_download_link(tweet_url):
+    cj = cookiejar.CookieJar()
+    opener = request.build_opener(request.HTTPCookieProcessor(cj))
+    base_url = "https://savetwitter.net/en4"
+    api_url = "https://savetwitter.net/api/ajaxSearch"
+    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Referer': base_url}
+    try:
+        req_init = request.Request(base_url, headers=headers)
+        opener.open(req_init)
+        params = {'q': tweet_url, 'lang': 'en', 'cftoken': ''}
+        data = urlencode(params).encode('utf-8')
+        req_post = request.Request(api_url, data=data, headers=headers, method='POST')
+        with opener.open(req_post) as response:
+            res_data = json.loads(response.read().decode('utf-8'))
+            if res_data.get('status') == 'ok':
+                html = res_data.get('data', '')
+                match = re.search(r'href="(https?://dl\.snapcdn\.app/[^"]+)"', html)
+                if match: return match.group(1)
+    except Exception:
+        log.warning("Failed to fetch Twitter download link", exc_info=True)
+    return None
+
+def get_instagram_download_link(insta_url):
+    cj = cookiejar.CookieJar()
+    opener = request.build_opener(request.HTTPCookieProcessor(cj))
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36',
+        'X-Requested-With': 'XMLHttpRequest',
+        'Referer': 'https://anon-viewer.com/',
+        'Accept': '*/*'
+    }
+    opener.addheaders = list(headers.items())
+    try:
+        opener.open("https://anon-viewer.com/", timeout=30)
+        
+        if "/stories/" in insta_url:
+            parts = insta_url.split("/")
+            username = parts[parts.index("stories") + 1]
+            api_url = f"https://anon-viewer.com/content.php?url={username}&method=allstories"
+        else:
+            encoded_url = quote(insta_url, safe='')
+            api_url = f"https://anon-viewer.com/content.php?url={encoded_url}"
+
+        response = opener.open(api_url, timeout=60)
+        if response.getcode() == 200:
+            res_content = response.read().decode('utf-8')
+            data = json.loads(res_content)
+            html_text = data.get('html', '')
+            
+            match = re.search(r'href="([^"]+anon-viewer\.com/media\.php\?media=[^"]+)"', html_text)
+            if match:
+                return match.group(1).replace('&amp;', '&')
+            
+            source_match = re.search(r'<source src="([^"]+)"', html_text)
+            if source_match:
+                return source_match.group(1).replace('&amp;', '&')
+    except Exception:
+        log.debug("Failed to fetch Instagram download link", exc_info=True)
+    return None
+
+def get_tiktok_download_link(tiktok_url):
+    api_url = "https://www.tikwm.com/api/"
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+        'X-Requested-With': 'XMLHttpRequest'
+    }
+    try:
+        params = {'url': tiktok_url, 'hd': '1'}
+        data = urlencode(params).encode('utf-8')
+        req = request.Request(api_url, data=data, headers=headers, method='POST')
+        opener = get_proxy_opener()
+        with opener.open(req, timeout=120) as response:
+            res = json.loads(response.read().decode('utf-8'))
+            if res.get('code') == 0:
+                play_url = res['data']['play']
+                return play_url if play_url.startswith('http') else "https://www.tikwm.com" + play_url
+    except Exception:
+        log.warning("Failed to fetch TikTok download link", exc_info=True)
+    return None
+
+def _download_temp_video(url):
+    try:
+        req = request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+        with request.urlopen(req, timeout=120) as response:
+            fd, path = tempfile.mkstemp(suffix=".mp4")
+            os.close(fd)
+            with open(path, 'wb') as f:
+                while True:
+                    chunk = response.read(8192)
+                    if not chunk: break
+                    f.write(chunk)
+            return path
+    except Exception:
+        log.warning("Failed to download temporary video", exc_info=True)
+    return None
+
+def get_file_path(title, wildcard, mode="open", multiple=False):
+    style = wx.FD_OPEN | wx.FD_FILE_MUST_EXIST if mode == "open" else wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT
+    if multiple: style |= wx.FD_MULTIPLE
+    with wx.FileDialog(gui.mainFrame, title, wildcard=wildcard, style=style) as dlg:
+        if dlg.ShowModal() == wx.ID_OK:
+            return dlg.GetPaths() if multiple else dlg.GetPath()
+    return None
+
+class VirtualDocument:
+    def __init__(self, file_paths):
+        self.file_paths = file_paths
+        self.page_map = [] 
+        self.total_pages = 0
+        self.is_single_pdf = (len(file_paths) == 1 and file_paths[0].lower().endswith('.pdf'))
+        self.single_pdf_path = file_paths[0] if self.is_single_pdf else None
+
+    def scan(self):
+        if not fitz: return
+        for path in self.file_paths:
+            try:
+                doc = fitz.open(path)
+                count = len(doc)
+                for i in range(count):
+                    self.page_map.append((path, i))
+                doc.close()
+            except Exception as e:
+                log.error(f"Error scanning file {path}: {e}", exc_info=True)
+        self.total_pages = len(self.page_map)
+
+    def get_page_info(self, global_page_index):
+        if 0 <= global_page_index < self.total_pages:
+            return self.page_map[global_page_index]
+        return None, None
+
+    def create_merged_pdf(self, start_page, end_page):
+        if not fitz: return None
+        try:
+            out_doc = fitz.open()
+            for i in range(start_page, end_page + 1):
+                f_path, f_idx = self.get_page_info(i)
+                src_doc = fitz.open(f_path)
+                if src_doc.is_pdf:
+                    out_doc.insert_pdf(src_doc, from_page=f_idx, to_page=f_idx)
+                else:
+                    pdf_bytes = src_doc.convert_to_pdf(from_page=f_idx, to_page=f_idx)
+                    img_pdf = fitz.open("pdf", pdf_bytes)
+                    out_doc.insert_pdf(img_pdf)
+                src_doc.close()
+            
+            fd, temp_path = tempfile.mkstemp(suffix=".pdf")
+            os.close(fd)
+            out_doc.save(temp_path)
+            out_doc.close()
+            return temp_path
+        except Exception as e:
+            log.error(f"Error merging PDF: {e}", exc_info=True)
+            return None
+
+class ChromeOCREngine:
+    @staticmethod
+    def recognize(image_bytes):
+        key = CHROME_OCR_KEYS[0]
+        url = "https://ckintersect-pa.googleapis.com/v1/intersect/pixels"
+        headers = {"Content-Type": "application/json", "User-Agent": "Mozilla/5.0", "x-goog-api-key": key}
+        payload = {"imageRequests": [{"engineParameters": [{"ocrParameters": {}}], "imageBytes": base64.b64encode(image_bytes).decode('utf-8'), "imageId": str(uuid4())}]}
+        try:
+            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers=headers, method="POST")
+            opener = get_proxy_opener()
+            with opener.open(req, timeout=120) as response:
+                if response.status == 200:
+                    data = json.loads(response.read().decode('utf-8'))
+                    regions = data['results'][0]['engineResults'][0]['ocrEngine'].get('ocrRegions', [])
+                    lines = []
+                    for reg in regions:
+                        line_text = " ".join([w.get('detectedText', '') for w in reg.get('words', [])])
+                        if line_text.strip(): lines.append(line_text)
+                    return "\n".join(lines)
+        except Exception as e:
+            log.error(f"Chrome OCR Failed: {e}", exc_info=True)
+            return None
+        return None
+
+class SmartProgrammersOCREngine:
+    @staticmethod
+    def recognize(image_bytes):
+        url = "https://ubsa.in/smartprogrammers/SP%20Reader/extract.php"
+        boundary = uuid4().hex.encode('utf-8')
+        body = []
+        body.append(b'--' + boundary)
+        body.append(f'Content-Disposition: form-data; name="file"; filename="p.jpg"'.encode('utf-8'))
+        body.append(b'Content-Type: image/jpeg')
+        body.append(b'')
+        body.append(image_bytes)
+        body.append(b'--' + boundary + b'--')
+        body.append(b'')
+        try:
+            req = request.Request(url, data=b'\r\n'.join(body), headers={'Content-Type': f"multipart/form-data; boundary={boundary.decode('utf-8')}"}, method="POST")
+            opener = get_proxy_opener()
+            with opener.open(req, timeout=120) as response:
+                if response.status == 200:
+                    data = json.loads(response.read().decode('utf-8'))
+                    text = data.get("text", "").replace("\\n", "\n")
+                    return text if text.strip() else None
+        except error.HTTPError as e:
+            if e.code == 400:
+                return None
+        except Exception:
+            pass
+        return None
+
+class GoogleTranslator:
+    @staticmethod
+    def translate(text, target_lang):
+        try:
+            target_code = TARGET_CODES.get(target_lang, 'en')
+            base_url = "https://translate.googleapis.com/translate_a/single"
+            params = {"client": "gtx", "sl": "auto", "tl": target_code, "dt": "t", "q": text}
+            url = f"{base_url}?{parse.urlencode(params)}"
+            req = request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
+            opener = get_proxy_opener()
+            with opener.open(req, timeout=120) as response:
+                if response.status == 200:
+                    data = json.loads(response.read().decode('utf-8'))
+                    if data and isinstance(data, list) and len(data) > 0:
+                        result_parts = [x[0] for x in data[0] if x[0]]
+                        return "".join(result_parts)
+        except Exception as e:
+            log.error(f"Google Translate Failed: {e}", exc_info=True)
+            return text 
+        return text
+
+class GeminiHandler:
+    _working_key_idx = 0 
+    _file_uri_keys = {}
+    _max_retries = 5
+
+    @staticmethod
+    def _get_api_keys():
+        raw = config.conf["VisionAssistant"]["api_key"]
+        clean_raw = raw.replace('\r\n', ',').replace('\n', ',')
+        return [k.strip() for k in clean_raw.split(',') if k.strip()]
+
+    @staticmethod
+    def _get_opener():
+        return get_proxy_opener()
+
+    @staticmethod
+    def _handle_error(e):
+        if hasattr(e, 'code'):
+            # Translators: Error message for Bad Request (400)
+            if e.code == 400: return _("Error 400: Bad Request (Check API Key)")
+            # Translators: Error message for Forbidden (403)
+            if e.code == 403: return _("Error 403: Forbidden (Check Region)")
+            if e.code == 429: return "QUOTA_EXCEEDED"
+            if e.code >= 500: return "SERVER_ERROR"
+        return str(e)
+
+    @staticmethod
+    def _call_with_retry(func_logic, key, *args):
+        last_exc = None
+        for attempt in range(GeminiHandler._max_retries):
+            try:
+                return func_logic(key, *args)
+            except error.HTTPError as e:
+                err_msg = GeminiHandler._handle_error(e)
+                if err_msg not in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
+                    raise
+                last_exc = e
+            except error.URLError as e:
+                last_exc = e
+            if attempt < GeminiHandler._max_retries - 1:
+                time.sleep(0.5 * (attempt + 1))
+        raise last_exc
+
+    @staticmethod
+    def _register_file_uri(uri, key):
+        if uri and key:
+            GeminiHandler._file_uri_keys[uri] = key
+            while len(GeminiHandler._file_uri_keys) > 200:
+                GeminiHandler._file_uri_keys.pop(next(iter(GeminiHandler._file_uri_keys)))
+
+    @staticmethod
+    def _get_registered_key(uri):
+        if not uri:
+            return None
+        return GeminiHandler._file_uri_keys.get(uri)
+
+    @staticmethod
+    def _call_with_key(func_logic, key, *args):
+        try:
+            return GeminiHandler._call_with_retry(func_logic, key, *args)
+        except error.HTTPError as e:
+            err_msg = GeminiHandler._handle_error(e)
+            if err_msg == "QUOTA_EXCEEDED":
+                # Translators: Message of a dialog which may pop up while performing an AI call
+                err_msg = _("Error 429: Quota Exceeded (Try later)")
+            elif err_msg == "SERVER_ERROR":
+                # Translators: Message of a dialog which may pop up while performing an AI call
+                err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
+            return "ERROR:" + err_msg
+        except Exception as e:
+            return "ERROR:" + str(e)
+
+    @staticmethod
+    def _call_with_rotation(func_logic, *args):
+        keys = GeminiHandler._get_api_keys()
+        if not keys: 
+            # Translators: Error when no API keys are found in settings
+            return "ERROR:" + _("No API Keys configured.")
+        
+        num_keys = len(keys)
+        for i in range(num_keys):
+            idx = (GeminiHandler._working_key_idx + i) % num_keys
+            key = keys[idx]
+            try:
+                res = GeminiHandler._call_with_retry(func_logic, key, *args)
+                GeminiHandler._working_key_idx = idx 
+                return res
+            except error.HTTPError as e:
+                err_msg = GeminiHandler._handle_error(e)
+                if err_msg in ["QUOTA_EXCEEDED", "SERVER_ERROR"]:
+                    if i < num_keys - 1: continue
+                    # Translators: Error when all available API keys fail
+                    return "ERROR:" + _("All API Keys failed (Quota/Server).")
+                return "ERROR:" + err_msg
+            except Exception as e:
+                return "ERROR:" + str(e)
+        return "ERROR:" + _("Unknown error occurred.")
+
+    @staticmethod
+    def translate(text, target_lang):
+        def _logic(key, txt, lang):
+            model = config.conf["VisionAssistant"]["model_name"]
+            url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
+            quick_template = get_prompt_text("translate_quick") or "Translate to {target_lang}. Output ONLY translation."
+            quick_prompt = apply_prompt_template(quick_template, [("target_lang", lang)])
+            payload = {"contents": [{"parts": [{"text": quick_prompt}, {"text": txt}]}]}
+            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+            with GeminiHandler._get_opener().open(req, timeout=90) as r:
+                return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
+        return GeminiHandler._call_with_rotation(_logic, text, target_lang)
+
+    @staticmethod
+    def ocr_page(image_bytes):
+        def _logic(key, img_data):
+            model = config.conf["VisionAssistant"]["model_name"]
+            url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
+            ocr_image_prompt = get_prompt_text("ocr_image_extract")
+            payload = {"contents": [{"parts": [{"inline_data": {"mime_type": "image/jpeg", "data": base64.b64encode(img_data).decode('utf-8')}}, {"text": ocr_image_prompt}]}]}
+            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+            with GeminiHandler._get_opener().open(req, timeout=120) as r:
+                return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
+        return GeminiHandler._call_with_rotation(_logic, image_bytes)
+
+    @staticmethod
+    def upload_and_process_batch(file_path, mime_type, page_count):
+        keys = GeminiHandler._get_api_keys()
+        if not keys: 
+            # Translators: Error message for missing API Keys
+            return [ "ERROR:" + _("No API Keys.") ]
+        model = config.conf["VisionAssistant"]["model_name"]
+        
+        opener = GeminiHandler._get_opener()
+        proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+        base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+        
+        for i, key in enumerate(keys):
+            try:
+                f_size = os.path.getsize(file_path)
+                init_url = f"{base_url}/upload/v1beta/files"
+                headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
+                
+                req = request.Request(init_url, data=json.dumps({"file": {"display_name": "batch"}}).encode(), headers=headers, method="POST")
+                with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
+                
+                with open(file_path, 'rb') as f: f_data = f.read()
+                req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
+                with opener.open(req_up, timeout=180) as r:
+                    res = json.loads(r.read().decode())
+                    uri, name = res['file']['uri'], res['file']['name']
+                
+                active = False
+                for attempt in range(30):
+                    req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
+                    with opener.open(req_check, timeout=30) as r:
+                        state = json.loads(r.read().decode()).get('state')
+                        if state == "ACTIVE":
+                            active = True
+                            break
+                        if state == "FAILED":
+                            break
+                    time.sleep(2)
+
+                if not active:
+                    if i < len(keys) - 1:
+                        continue
+                    return [ "ERROR:" + _("Upload failed.") ]
+
+                GeminiHandler._register_file_uri(uri, key)
+                
+                url = f"{base_url}/v1beta/models/{model}:generateContent"
+                prompt = get_prompt_text("ocr_document_extract")
+                contents = [{"parts": [{"file_data": {"mime_type": mime_type, "file_uri": uri}}, {"text": prompt}]}]
+                
+                req_gen = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+                with opener.open(req_gen, timeout=180) as r:
+                    res = json.loads(r.read().decode())
+                    text = res['candidates'][0]['content']['parts'][0]['text']
+                    return text.split('[[[PAGE_SEP]]]')
+                    
+            except error.HTTPError as e:
+                err_code = GeminiHandler._handle_error(e)
+                if err_code in ["QUOTA_EXCEEDED", "SERVER_ERROR"] and i < len(keys) - 1:
+                    continue
+                if err_code == "QUOTA_EXCEEDED":
+                    # Translators: Message of a dialog which may pop up while performing an AI call
+                    err_msg = _("Error 429: Quota Exceeded (Try later)")
+                elif err_code == "SERVER_ERROR":
+                    # Translators: Message of a dialog which may pop up while performing an AI call
+                    err_msg = _("Server Error {code}: {reason}").format(code=e.code, reason=e.reason)
+                else:
+                    err_msg = err_code
+                return ["ERROR:" + err_msg]
+            except Exception as e:
+                return ["ERROR:" + str(e)]
+        return ["ERROR:" + _("All keys failed.")]
+
+    @staticmethod
+    def chat(history, new_msg, file_uri, mime_type):
+        def _logic(key, hist, msg, uri, mime):
+            model = config.conf["VisionAssistant"]["model_name"]
+            proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+            base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+            url = f"{base_url}/v1beta/models/{model}:generateContent"
+            
+            contents = list(hist)
+            if uri: 
+                user_parts = [{"file_data": {"mime_type": mime, "file_uri": uri}}]
+            else:
+                user_parts = []
+            user_parts.append({"text": msg})
+            contents.append({"role": "user", "parts": user_parts})
+            
+            req = request.Request(url, data=json.dumps({"contents": contents}).encode(), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+            with GeminiHandler._get_opener().open(req, timeout=120) as r:
+                return json.loads(r.read().decode())['candidates'][0]['content']['parts'][0]['text']
+        forced_key = GeminiHandler._get_registered_key(file_uri) if file_uri else None
+        if forced_key:
+            return GeminiHandler._call_with_key(_logic, forced_key, history, new_msg, file_uri, mime_type)
+        return GeminiHandler._call_with_rotation(_logic, history, new_msg, file_uri, mime_type)
+
+    @staticmethod
+    def upload_for_chat(file_path, mime_type):
+        keys = GeminiHandler._get_api_keys()
+        if not keys: return None
+        opener = GeminiHandler._get_opener()
+        proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+        base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+        
+        for key in keys:
+            try:
+                f_size = os.path.getsize(file_path)
+                init_url = f"{base_url}/upload/v1beta/files"
+                headers = {"X-Goog-Upload-Protocol": "resumable", "X-Goog-Upload-Command": "start", "X-Goog-Upload-Header-Content-Length": str(f_size), "X-Goog-Upload-Header-Content-Type": mime_type, "Content-Type": "application/json", "x-goog-api-key": key}
+                req = request.Request(init_url, data=json.dumps({"file": {"display_name": os.path.basename(file_path)}}).encode(), headers=headers, method="POST")
+                with opener.open(req, timeout=120) as r: upload_url = r.headers.get("x-goog-upload-url")
+                with open(file_path, 'rb') as f: f_data = f.read()
+                req_up = request.Request(upload_url, data=f_data, headers={"Content-Length": str(f_size), "X-Goog-Upload-Offset": "0", "X-Goog-Upload-Command": "upload, finalize"}, method="POST")
+                with opener.open(req_up, timeout=180) as r:
+                    res = json.loads(r.read().decode())
+                    uri, name = res['file']['uri'], res['file']['name']
+                for attempt in range(30):
+                    req_check = request.Request(f"{base_url}/v1beta/{name}", headers={"x-goog-api-key": key})
+                    with opener.open(req_check, timeout=30) as r:
+                        state = json.loads(r.read().decode()).get('state')
+                        if state == "ACTIVE":
+                            GeminiHandler._register_file_uri(uri, key)
+                            return uri
+                    time.sleep(2)
+                return None 
+            except Exception:
+                log.debug("Failed to upload file for chat with current key", exc_info=True)
+                continue
+        return None
+
+    @staticmethod
+    def generate_speech(text, voice_name):
+        def _logic(key, txt, voice):
+            main_model = config.conf["VisionAssistant"]["model_name"]
+            if "pro" in main_model.lower():
+                tts_model = "gemini-2.5-pro-preview-tts"
+            else:
+                tts_model = "gemini-2.5-flash-preview-tts"
+
+            proxy_url = config.conf["VisionAssistant"]["proxy_url"].strip()
+            base_url = proxy_url.rstrip('/') if proxy_url else "https://generativelanguage.googleapis.com"
+            url = f"{base_url}/v1beta/models/{tts_model}:generateContent"
+            
+            payload = {
+                "contents": [{"parts": [{"text": txt}]}],
+                "generationConfig": {
+                    "responseModalities": ["AUDIO"],
+                    "speechConfig": {"voiceConfig": {"prebuiltVoiceConfig": {"voiceName": voice}}}
+                }
+            }
+            req = request.Request(url, data=json.dumps(payload).encode('utf-8'), headers={"Content-Type": "application/json", "x-goog-api-key": key})
+            with GeminiHandler._get_opener().open(req, timeout=600) as r:
+                res = json.loads(r.read().decode())
+                candidates = res.get('candidates', [])
+                if not candidates: raise Exception("No candidates returned")
+                content = candidates[0].get('content', {})
+                parts = content.get('parts', [])
+                if not parts: raise Exception("No parts in response")
+                part = parts[0]
+                if 'inlineData' in part: return part['inlineData']['data']
+                if 'inline_data' in part: return part['inline_data']['data']
+                if 'text' in part: raise Exception(f"Model refused audio: {part['text']}")
+                raise Exception("Unknown response format")
+        return GeminiHandler._call_with_rotation(_logic, text, voice_name)
diff --git a/addon/globalPlugins/visionAssistant/updater.py b/addon/globalPlugins/visionAssistant/updater.py
new file mode 100644
index 0000000..16e899d
--- /dev/null
+++ b/addon/globalPlugins/visionAssistant/updater.py
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+import json
+import threading
+import logging
+import os
+import re
+import tempfile
+
+import wx
+from urllib import request
+
+import addonHandler
+import gui
+import ui
+from .constants import ADDON_NAME
+from .markdown_utils import clean_markdown
+from .services import show_error_dialog
+
+log = logging.getLogger(__name__)
+addonHandler.initTranslation()
+
+class UpdateDialog(wx.Dialog):
+    def __init__(self, parent, version, name, changes):
+        # Translators: Title of update confirmation dialog
+        super().__init__(parent, title=_("Update Available"), size=(500, 450))
+        self.Centre()
+        
+        panel = wx.Panel(self)
+        vbox = wx.BoxSizer(wx.VERTICAL)
+        
+        # Translators: Message asking user to update. {version} is version number.
+        msg = _("A new version ({version}) of {name} is available.").format(version=version, name=name)
+        header = wx.StaticText(panel, label=msg)
+        vbox.Add(header, 0, wx.ALL, 15)
+        
+        # Translators: Label for the changes text box
+        change_lbl = wx.StaticText(panel, label=_("Changes:"))
+        vbox.Add(change_lbl, 0, wx.LEFT | wx.RIGHT, 15)
+        
+        self.changes_ctrl = wx.TextCtrl(panel, value=changes, style=wx.TE_MULTILINE | wx.TE_READONLY | wx.TE_RICH2)
+        vbox.Add(self.changes_ctrl, 1, wx.EXPAND | wx.ALL, 15)
+        
+        # Translators: Question to download and install
+        question = wx.StaticText(panel, label=_("Download and Install?"))
+        vbox.Add(question, 0, wx.LEFT | wx.RIGHT | wx.BOTTOM, 15)
+        
+        btn_sizer = wx.BoxSizer(wx.HORIZONTAL)
+        # Translators: Button to accept update
+        self.yes_btn = wx.Button(panel, wx.ID_YES, label=_("&Yes"))
+        # Translators: Button to reject update
+        self.no_btn = wx.Button(panel, wx.ID_NO, label=_("&No"))
+        
+        btn_sizer.Add(self.yes_btn, 0, wx.RIGHT, 10)
+        btn_sizer.Add(self.no_btn, 0)
+        vbox.Add(btn_sizer, 0, wx.ALIGN_RIGHT | wx.ALL, 15)
+        
+        panel.SetSizer(vbox)
+        self.yes_btn.SetDefault()
+        self.yes_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_YES))
+        self.no_btn.Bind(wx.EVT_BUTTON, lambda e: self.EndModal(wx.ID_NO))
+
+class UpdateManager:
+    def __init__(self, repo_name):
+        self.repo_name = repo_name
+        self.current_version = addonHandler.getCodeAddon().manifest['version']
+
+    def check_for_updates(self, silent=True):
+        threading.Thread(target=self._check_thread, args=(silent,), daemon=True).start()
+
+    def _check_thread(self, silent):
+        try:
+            url = f"https://api.github.com/repos/{self.repo_name}/releases/latest"
+            req = request.Request(url, headers={"User-Agent": "NVDA-Addon"})
+            with request.urlopen(req, timeout=60) as response:
+                if response.status == 200:
+                    data = json.loads(response.read().decode('utf-8'))
+                    latest_tag = data.get("tag_name", "").lstrip("v")
+                    if self._compare_versions(latest_tag, self.current_version) > 0:
+                        download_url = None
+                        for asset in data.get("assets", []):
+                            if asset["name"].endswith(".nvda-addon"):
+                                download_url = asset["browser_download_url"]
+                                break
+                        if download_url:
+                            raw_changes = data.get("body", "")
+                            
+                            clean_changes = re.split(r'SHA256|Checklist|---', raw_changes, flags=re.I)[0].strip()
+                            clean_changes = clean_markdown(clean_changes)
+                            
+                            wx.CallAfter(self._prompt_update, latest_tag, download_url, clean_changes)
+                        elif not silent:
+                            # Translators: Error message when an update is found but the addon file is missing from GitHub.
+                            msg = _("Update found but no .nvda-addon file in release.")
+                            show_error_dialog(msg)
+                    elif not silent:
+                        # Translators: Status message informing the user they are already on the latest version.
+                        msg = _("You have the latest version.")
+                        wx.CallAfter(ui.message, msg)
+        except Exception as e:
+            if not silent:
+                msg = _("Update check failed: {error}").format(error=e)
+                show_error_dialog(msg)
+
+    def _compare_versions(self, v1, v2):
+        try:
+            parts1 = [int(x) for x in v1.split('.')]
+            parts2 = [int(x) for x in v2.split('.')]
+            return (parts1 > parts2) - (parts1 < parts2)
+        except Exception:
+            return 0 if v1 == v2 else 1
+
+    def _prompt_update(self, version, url, changes):
+        dlg = UpdateDialog(gui.mainFrame, version, ADDON_NAME, changes)
+        if dlg.ShowModal() == wx.ID_YES:
+            threading.Thread(target=self._download_install_worker, args=(url,), daemon=True).start()
+        dlg.Destroy()
+
+    def _download_install_worker(self, url):
+        try:
+            # Translators: Message shown while downloading update
+            msg = _("Downloading update...")
+            wx.CallAfter(ui.message, msg)
+            temp_dir = tempfile.gettempdir()
+            file_path = os.path.join(temp_dir, "VisionAssistant_Update.nvda-addon")
+            with request.urlopen(url) as response, open(file_path, 'wb') as out_file:
+                out_file.write(response.read())
+            wx.CallAfter(os.startfile, file_path)
+        except Exception as e:
+            # Translators: Error message for download failure
+            msg = _("Download failed: {error}").format(error=e)
+            show_error_dialog(msg)
+
+# --- UI Classes ---