diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 61196c0..68f6f65 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -4,7 +4,7 @@ on: [ push, pull_request ]
 
 env:
   SWOOLE_VERSION: '5.1.5'
-  SWOW_VERSION: 'v1.2.0'
+  SWOW_VERSION: 'v1.6.2'
 
 jobs:
   ci:
diff --git a/composer.json b/composer.json
index 75d7424..e24a6b7 100644
--- a/composer.json
+++ b/composer.json
@@ -16,6 +16,9 @@
         ],
         "exclude-from-classmap": [
             "vendor/aws/aws-sdk-php/src/Api/Validator.php"
+        ],
+        "files": [
+            "src/Api/Transport/SimpleCURLClient.php"
         ]
     },
     "autoload-dev": {
@@ -28,6 +31,7 @@
         "ext-bcmath": "*",
         "ext-mbstring": "*",
         "aws/aws-sdk-php": "^3.0",
+        "ext-curl": "*",
         "dtyq/php-mcp": "0.1.*",
         "guzzlehttp/guzzle": "^7.0|^6.0",
         "hyperf/cache": "~2.2.0 || 3.0.* || 3.1.*",
@@ -35,12 +39,14 @@
         "hyperf/di": "~2.2.0 || 3.0.* || 3.1.*",
         "hyperf/logger": "~2.2.0 || 3.0.* || 3.1.*",
         "hyperf/retry": "~2.2.0 || 3.0.* || 3.1.*",
+        "hyperf/event": "~2.2.0 || 3.0.* || 3.1.*",
         "hyperf/qdrant-client": "*",
         "justinrainbow/json-schema": "^6.3",
         "yethee/tiktoken": "^0.1.2"
     },
     "require-dev": {
         "friendsofphp/php-cs-fixer": "^3.0",
+        "hyperf/engine": "^2.0",
         "mockery/mockery": "^1.0",
         "phpstan/phpstan": "^1.0",
         "phpunit/phpunit": ">=7.0",
diff --git a/examples/aws/aws_chat.php b/examples/aws/aws_chat.php
index 7fb256a..1881bbe 100644
--- a/examples/aws/aws_chat.php
+++ b/examples/aws/aws_chat.php
@@ -73,3 +73,12 @@
 
 echo PHP_EOL;
 echo '耗时' . (microtime(true) - $start) . '秒' . PHP_EOL;
+
+// Display usage information
+$usage = $response->getUsage();
+if ($usage) {
+    echo PHP_EOL . '=== Token 使用情况 ===' . PHP_EOL;
+    echo '输入 Tokens: ' . $usage->getPromptTokens() . PHP_EOL;
+    echo '输出 Tokens: ' . $usage->getCompletionTokens() . PHP_EOL;
+    echo '总计 Tokens: ' . $usage->getTotalTokens() . PHP_EOL;
+}
diff --git a/examples/aws/aws_chat_custom.php b/examples/aws/aws_chat_custom.php
new file mode 100644
index 0000000..00dac85
--- /dev/null
+++ b/examples/aws/aws_chat_custom.php
@@ -0,0 +1,89 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Api\Providers\AwsBedrock\AwsType;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Model\AwsBedrockModel;
+
+use function Hyperf\Support\env;
+
+ClassLoader::init();
+
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+echo '=== AWS Bedrock Custom Client Test (Without AWS SDK) ===' . PHP_EOL . PHP_EOL;
+
+// Create AWS Bedrock model instance with CONVERSE_CUSTOM type
+// This uses custom Guzzle + SigV4 implementation instead of AWS SDK
+$model = new AwsBedrockModel(
+    'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
+    [
+        'access_key' => env('AWS_ACCESS_KEY'),
+        'secret_key' => env('AWS_SECRET_KEY'),
+        'region' => env('AWS_REGION', 'us-east-1'),
+        'type' => AwsType::CONVERSE_CUSTOM, // Use custom client without AWS SDK
+    ],
+    new Logger(),
+);
+$model->setApiRequestOptions(new ApiOptions([
+    'proxy' => env('HTTP_CLIENT_PROXY'),
+    'http_handler' => env('ODIN_HTTP_HANDLER', 'auto'),
+]));
+
+$messages = [
+    new SystemMessage('You are a helpful AI assistant. Always include emoji in your responses.'),
+    new UserMessage('Explain quantum entanglement in simple terms.'),
+];
+
+$start = microtime(true);
+
+// Use non-streaming API
+$request = new ChatCompletionRequest($messages);
+$request->setThinking([
+    'type' => 'enabled',
+    'budget_tokens' => 4000,
+]);
+$response = $model->chatWithRequest($request);
+
+// Output full response
+$message = $response->getFirstChoice()->getMessage();
+if ($message instanceof AssistantMessage) {
+    echo 'Response: ' . ($message->getReasoningContent() ?? $message->getContent()) . PHP_EOL;
+}
+
+echo PHP_EOL . 'Duration: ' . round(microtime(true) - $start, 2) . ' seconds' . PHP_EOL;
+
+// Output usage information
+$usage = $response->getUsage();
+echo PHP_EOL . '=== Token Usage ===' . PHP_EOL;
+echo 'Input Tokens: ' . $usage->getPromptTokens() . PHP_EOL;
+echo 'Output Tokens: ' . $usage->getCompletionTokens() . PHP_EOL;
+echo 'Total Tokens: ' . $usage->getTotalTokens() . PHP_EOL;
+
+if ($usage->getCachedTokens() > 0) {
+    echo PHP_EOL . 'Cache Hit: ' . $usage->getCachedTokens() . ' tokens' . PHP_EOL;
+    echo 'Cache Hit Rate: ' . $usage->getCacheHitRatePercentage() . '%' . PHP_EOL;
+}
+
+echo PHP_EOL . '✅ Custom client (without AWS SDK) works perfectly!' . PHP_EOL;
diff --git a/examples/aws/aws_chat_stream.php b/examples/aws/aws_chat_stream.php
index 213e1f7..4c003e6 100644
--- a/examples/aws/aws_chat_stream.php
+++ b/examples/aws/aws_chat_stream.php
@@ -74,3 +74,12 @@
 }
 
 echo PHP_EOL . '耗时: ' . round(microtime(true) - $start, 2) . ' 秒' . PHP_EOL;
+
+// Display usage information
+$usage = $streamResponse->getUsage();
+if ($usage) {
+    echo PHP_EOL . '=== Token 使用情况 ===' . PHP_EOL;
+    echo '输入 Tokens: ' . $usage->getPromptTokens() . PHP_EOL;
+    echo '输出 Tokens: ' . $usage->getCompletionTokens() . PHP_EOL;
+    echo '总计 Tokens: ' . $usage->getTotalTokens() . PHP_EOL;
+}
diff --git a/examples/aws/aws_tool_use_agent_cache.php b/examples/aws/aws_tool_use_agent_cache.php
new file mode 100644
index 0000000..f5777d9
--- /dev/null
+++ b/examples/aws/aws_tool_use_agent_cache.php
@@ -0,0 +1,330 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Agent\Tool\ToolUseAgent;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Factory\ModelFactory;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Memory\MemoryManager;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Model\AwsBedrockModel;
+use Hyperf\Odin\Model\ModelOptions;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+use function Hyperf\Support\env;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// 创建日志记录器
+$logger = new Logger();
+
+// 初始化模型
+$model = ModelFactory::create(
+    implementation: AwsBedrockModel::class,
+    modelName: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
+    config: [
+        'access_key' => env('AWS_ACCESS_KEY'),
+        'secret_key' => env('AWS_SECRET_KEY'),
+        'region' => env('AWS_REGION', 'us-east-1'),
+        'auto_cache' => true,
+    ],
+    modelOptions: ModelOptions::fromArray([
+        'chat' => true,
+        'function_call' => true,
+        'embedding' => false,
+        'multi_modal' => true,
+        'vector_size' => 0,
+    ]),
+    apiOptions: ApiOptions::fromArray([
+        'timeout' => [
+            'connection' => 5.0,  // 连接超时（秒）
+            'write' => 10.0,      // 写入超时（秒）
+            'read' => 300.0,      // 读取超时（秒）
+            'total' => 350.0,     // 总体超时（秒）
+            'thinking' => 120.0,  // 思考超时（秒）
+            'stream_chunk' => 30.0, // 流式块间超时（秒）
+            'stream_first' => 60.0, // 首个流式块超时（秒）
+        ],
+        'proxy' => env('HTTP_CLIENT_PROXY'),
+        'custom_error_mapping_rules' => [],
+    ]),
+    logger: $logger
+);
+$systemPrompt = '你是一个专业且智能的AI助手，具备丰富的知识库和强大的工具使用能力。你的主要职责是帮助用户解决各种问题，并在需要时合理使用可用的工具来提供准确、及时的信息和服务。
+
+## 工具使用原则
+
+### 1. 工具选择策略
+- 当用户的需求需要实时数据、精确计算或特定功能时，优先考虑使用相应的工具
+- 在使用工具前，先分析用户需求，选择最合适的工具组合
+- 对于复杂任务，可以按逻辑顺序使用多个工具
+- 如果某个工具无法满足需求，主动说明原因并提供替代方案
+
+### 2. 工具调用规范
+- 使用工具前，向用户清楚说明将要使用的工具及其作用
+- 调用工具时确保参数正确完整，避免错误调用
+- 工具返回结果后，对结果进行解读和总结
+- 如果工具返回错误，要向用户说明错误原因并提供解决建议
+
+### 3. 响应格式要求
+- 回复结构清晰，逻辑层次分明
+- 使用工具时采用以下格式：
+  1. 说明即将使用的工具和原因
+  2. 调用工具并展示结果
+  3. 对结果进行分析和解释
+  4. 根据结果给出最终答案或建议
+
+## 可用工具说明
+
+### 计算器工具 (calculator)
+功能：执行基本数学运算（加、减、乘、除）
+使用场景：需要进行精确数学计算时
+参数要求：
+- operation: 运算类型（add/subtract/multiply/divide）
+- a: 第一个操作数
+- b: 第二个操作数
+
+### 天气查询工具 (weather)
+功能：查询指定城市的天气信息
+使用场景：用户询问天气情况时
+参数要求：
+- city: 城市名称
+注意：当前支持北京、上海、广州、深圳等主要城市
+
+### 翻译工具 (translate)
+功能：将文本从一种语言翻译成另一种语言
+使用场景：用户需要翻译服务时
+参数要求：
+- text: 要翻译的文本内容
+- target_language: 目标语言
+
+## 交互指导原则
+
+### 1. 用户体验优先
+- 始终保持友好、专业的对话态度
+- 主动了解用户需求，提供个性化服务
+- 回复要简洁明了，避免冗余信息
+- 对于复杂问题，提供分步解决方案
+
+### 2. 准确性保证
+- 使用工具获得的数据要如实呈现
+- 对于无法确定的信息，明确说明不确定性
+- 区分事实信息和推测内容
+- 承认知识局限性，必要时建议用户咨询专业人士
+
+### 3. 安全和隐私
+- 保护用户隐私，不泄露敏感信息
+- 对于涉及安全的操作，提供必要的警告和建议
+- 拒绝执行可能造成危害的请求
+- 遵守相关法律法规和道德规范
+
+### 4. 持续学习
+- 从用户反馈中改进服务质量
+- 灵活应对各种场景和需求
+- 保持开放心态，接受新的挑战
+- 不断优化工具使用效率
+
+## 特殊情况处理
+
+### 工具故障处理
+- 如果工具调用失败，立即向用户说明情况
+- 提供人工替代方案或建议重试
+- 记录问题详情，便于后续改进
+
+### 多工具协作
+- 合理规划工具使用顺序
+- 确保前一个工具的输出能为下一个工具提供有效输入
+- 对整个工具链的执行过程进行监控和优化
+
+### 异常情况应对
+- 面对超出工具能力范围的需求，诚实说明限制
+- 提供可行的替代解决方案
+- 引导用户调整需求或寻求其他帮助渠道
+
+通过以上原则和规范，我将为你提供高质量、可靠的智能助手服务。请随时告诉我你的需求，我会选择最合适的方式来帮助你。';
+// 初始化内存管理器
+$memory = new MemoryManager();
+$memory->addSystemMessage(new SystemMessage($systemPrompt));
+
+// 定义多个工具
+// 计算器工具
+$calculatorTool = new ToolDefinition(
+    name: 'calculator',
+    description: '用于执行基本数学运算的计算器工具',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'operation' => [
+                'type' => 'string',
+                'enum' => ['add', 'subtract', 'multiply', 'divide'],
+                'description' => '要执行的数学运算类型',
+            ],
+            'a' => [
+                'type' => 'number',
+                'description' => '第一个操作数',
+            ],
+            'b' => [
+                'type' => 'number',
+                'description' => '第二个操作数',
+            ],
+        ],
+        'required' => ['operation', 'a', 'b'],
+    ]),
+    toolHandler: function ($params) {
+        $a = $params['a'];
+        $b = $params['b'];
+        switch ($params['operation']) {
+            case 'add':
+                return ['result' => $a + $b];
+            case 'subtract':
+                return ['result' => $a - $b];
+            case 'multiply':
+                return ['result' => $a * $b];
+            case 'divide':
+                if ($b == 0) {
+                    return ['error' => '除数不能为零'];
+                }
+                return ['result' => $a / $b];
+            default:
+                return ['error' => '未知操作'];
+        }
+    }
+);
+
+// 天气查询工具 (模拟)
+$weatherTool = new ToolDefinition(
+    name: 'weather',
+    description: '查询指定城市的天气信息',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'city' => [
+                'type' => 'string',
+                'description' => '要查询天气的城市名称',
+            ],
+        ],
+        'required' => ['city'],
+    ]),
+    toolHandler: function ($params) {
+        $city = $params['city'];
+        // 模拟天气数据
+        $weatherData = [
+            '北京' => ['temperature' => '25°C', 'condition' => '晴朗', 'humidity' => '45%'],
+            '上海' => ['temperature' => '28°C', 'condition' => '多云', 'humidity' => '60%'],
+            '广州' => ['temperature' => '30°C', 'condition' => '阵雨', 'humidity' => '75%'],
+            '深圳' => ['temperature' => '29°C', 'condition' => '晴朗', 'humidity' => '65%'],
+        ];
+
+        if (isset($weatherData[$city])) {
+            return $weatherData[$city];
+        }
+        return ['error' => '没有找到该城市的天气信息'];
+    }
+);
+
+// 翻译工具 (模拟)
+$translateTool = new ToolDefinition(
+    name: 'translate',
+    description: '将文本从一种语言翻译成另一种语言',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'text' => [
+                'type' => 'string',
+                'description' => '要翻译的文本',
+            ],
+            'target_language' => [
+                'type' => 'string',
+                'description' => '目标语言，例如：英语、中文、日语等',
+            ],
+        ],
+        'required' => ['text', 'target_language'],
+    ]),
+    toolHandler: function ($params) {
+        $text = $params['text'];
+        $targetLanguage = $params['target_language'];
+
+        // 模拟翻译结果
+        $translations = [
+            '你好' => [
+                '英语' => 'Hello',
+                '日语' => 'こんにちは',
+                '法语' => 'Bonjour',
+            ],
+            'Hello' => [
+                '中文' => '你好',
+                '日语' => 'こんにちは',
+                '法语' => 'Bonjour',
+            ],
+        ];
+
+        if (isset($translations[$text][$targetLanguage])) {
+            return ['translated_text' => $translations[$text][$targetLanguage]];
+        }
+
+        // 如果没有预设的翻译，返回原文加上模拟的后缀
+        return ['translated_text' => $text . ' (已翻译为' . $targetLanguage . ')', 'note' => '这是模拟翻译'];
+    }
+);
+
+$taskTool = new ToolDefinition(
+    name: 'trigger_task',
+    description: '触发任务执行',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [],
+        'required' => [],
+    ]),
+    toolHandler: function () {
+        return ['status' => 'success', 'message' => '任务 已触发'];
+    }
+);
+
+// 创建带有所有工具的代理
+$agent = new ToolUseAgent(
+    model: $model,
+    memory: $memory,
+    tools: [
+        $calculatorTool->getName() => $calculatorTool,
+        $weatherTool->getName() => $weatherTool,
+        $translateTool->getName() => $translateTool,
+        $taskTool->getName() => $taskTool,
+    ],
+    temperature: 0.6,
+    logger: $logger
+);
+
+// 顺序调用示例
+echo "===== 顺序工具调用示例 =====\n";
+$start = microtime(true);
+
+$userMessage = new UserMessage('请计算 23 × 45，然后查询北京的天气，最后将"你好"翻译成英语，和触发任务。请详细说明每一步。');
+$response = $agent->chat($userMessage);
+
+$message = $response->getFirstChoice()->getMessage();
+if ($message instanceof AssistantMessage) {
+    echo $message->getContent();
+}
+
+echo "\n";
+echo '顺序调用耗时：' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/dashscope/dashscope_tool_use_agent.php b/examples/dashscope/dashscope_tool_use_agent.php
new file mode 100644
index 0000000..4d5bc44
--- /dev/null
+++ b/examples/dashscope/dashscope_tool_use_agent.php
@@ -0,0 +1,320 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Agent\Tool\ToolUseAgent;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Factory\ModelFactory;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Memory\MemoryManager;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Model\DashScopeModel;
+use Hyperf\Odin\Model\ModelOptions;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+use function Hyperf\Support\env;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// 创建日志记录器
+$logger = new Logger();
+
+// 初始化模型
+$model = ModelFactory::create(
+    implementation: DashScopeModel::class,
+    modelName: env('QWEN3_CODER_PLUS_MODEL'),
+    config: [
+        'api_key' => env('QWEN_API_KEY'),
+        'base_url' => env('QWEN_API_BASE_URL'),
+        'auto_cache_config' => [
+            'auto_enabled' => true,  // 启用自动缓存
+            'min_cache_tokens' => 1024,
+            'supported_models' => ['qwen3-coder-plus', 'qwen-max'],
+        ],
+    ],
+    modelOptions: ModelOptions::fromArray([
+        'chat' => true,
+        'function_call' => true,
+        'embedding' => false,
+        'multi_modal' => true,
+        'vector_size' => 0,
+    ]),
+    apiOptions: ApiOptions::fromArray([
+        'timeout' => [
+            'connection' => 5.0,  // 连接超时（秒）
+            'write' => 10.0,      // 写入超时（秒）
+            'read' => 300.0,      // 读取超时（秒）
+            'total' => 350.0,     // 总体超时（秒）
+            'thinking' => 120.0,  // 思考超时（秒）
+            'stream_chunk' => 30.0, // 流式块间超时（秒）
+            'stream_first' => 60.0, // 首个流式块超时（秒）
+        ],
+        'custom_error_mapping_rules' => [],
+    ]),
+    logger: $logger
+);
+
+// 初始化内存管理器
+$memory = new MemoryManager();
+$systemPrompt = '你是一个专业且智能的AI助手，具备丰富的知识库和强大的工具使用能力。你的主要职责是帮助用户解决各种问题，并在需要时合理使用可用的工具来提供准确、及时的信息和服务。
+
+## 工具使用原则
+
+### 1. 工具选择策略
+- 当用户的需求需要实时数据、精确计算或特定功能时，优先考虑使用相应的工具
+- 在使用工具前，先分析用户需求，选择最合适的工具组合
+- 对于复杂任务，可以按逻辑顺序使用多个工具
+- 如果某个工具无法满足需求，主动说明原因并提供替代方案
+
+### 2. 工具调用规范
+- 使用工具前，向用户清楚说明将要使用的工具及其作用
+- 调用工具时确保参数正确完整，避免错误调用
+- 工具返回结果后，对结果进行解读和总结
+- 如果工具返回错误，要向用户说明错误原因并提供解决建议
+
+### 3. 响应格式要求
+- 回复结构清晰，逻辑层次分明
+- 使用工具时采用以下格式：
+  1. 说明即将使用的工具和原因
+  2. 调用工具并展示结果
+  3. 对结果进行分析和解释
+  4. 根据结果给出最终答案或建议
+
+## 可用工具说明
+
+### 计算器工具 (calculator)
+功能：执行基本数学运算（加、减、乘、除）
+使用场景：需要进行精确数学计算时
+参数要求：
+- operation: 运算类型（add/subtract/multiply/divide）
+- a: 第一个操作数
+- b: 第二个操作数
+
+### 天气查询工具 (weather)
+功能：查询指定城市的天气信息
+使用场景：用户询问天气情况时
+参数要求：
+- city: 城市名称
+注意：当前支持北京、上海、广州、深圳等主要城市
+
+### 翻译工具 (translate)
+功能：将文本从一种语言翻译成另一种语言
+使用场景：用户需要翻译服务时
+参数要求：
+- text: 要翻译的文本内容
+- target_language: 目标语言
+
+## 交互指导原则
+
+### 1. 用户体验优先
+- 始终保持友好、专业的对话态度
+- 主动了解用户需求，提供个性化服务
+- 回复要简洁明了，避免冗余信息
+- 对于复杂问题，提供分步解决方案
+
+### 2. 准确性保证
+- 使用工具获得的数据要如实呈现
+- 对于无法确定的信息，明确说明不确定性
+- 区分事实信息和推测内容
+- 承认知识局限性，必要时建议用户咨询专业人士
+
+### 3. 安全和隐私
+- 保护用户隐私，不泄露敏感信息
+- 对于涉及安全的操作，提供必要的警告和建议
+- 拒绝执行可能造成危害的请求
+- 遵守相关法律法规和道德规范
+
+### 4. 持续学习
+- 从用户反馈中改进服务质量
+- 灵活应对各种场景和需求
+- 保持开放心态，接受新的挑战
+- 不断优化工具使用效率
+
+## 特殊情况处理
+
+### 工具故障处理
+- 如果工具调用失败，立即向用户说明情况
+- 提供人工替代方案或建议重试
+- 记录问题详情，便于后续改进
+
+### 多工具协作
+- 合理规划工具使用顺序
+- 确保前一个工具的输出能为下一个工具提供有效输入
+- 对整个工具链的执行过程进行监控和优化
+
+### 异常情况应对
+- 面对超出工具能力范围的需求，诚实说明限制
+- 提供可行的替代解决方案
+- 引导用户调整需求或寻求其他帮助渠道
+
+通过以上原则和规范，我将为你提供高质量、可靠的智能助手服务。请随时告诉我你的需求，我会选择最合适的方式来帮助你。';
+
+$memory->addSystemMessage(new SystemMessage($systemPrompt));
+
+// 定义多个工具
+// 计算器工具
+$calculatorTool = new ToolDefinition(
+    name: 'calculator',
+    description: '用于执行基本数学运算的计算器工具',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'operation' => [
+                'type' => 'string',
+                'enum' => ['add', 'subtract', 'multiply', 'divide'],
+                'description' => '要执行的数学运算类型',
+            ],
+            'a' => [
+                'type' => 'number',
+                'description' => '第一个操作数',
+            ],
+            'b' => [
+                'type' => 'number',
+                'description' => '第二个操作数',
+            ],
+        ],
+        'required' => ['operation', 'a', 'b'],
+    ]),
+    toolHandler: function ($params) {
+        $a = $params['a'];
+        $b = $params['b'];
+        switch ($params['operation']) {
+            case 'add':
+                return ['result' => $a + $b];
+            case 'subtract':
+                return ['result' => $a - $b];
+            case 'multiply':
+                return ['result' => $a * $b];
+            case 'divide':
+                if ($b == 0) {
+                    return ['error' => '除数不能为零'];
+                }
+                return ['result' => $a / $b];
+            default:
+                return ['error' => '未知操作'];
+        }
+    }
+);
+
+// 天气查询工具 (模拟)
+$weatherTool = new ToolDefinition(
+    name: 'weather',
+    description: '查询指定城市的天气信息',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'city' => [
+                'type' => 'string',
+                'description' => '要查询天气的城市名称',
+            ],
+        ],
+        'required' => ['city'],
+    ]),
+    toolHandler: function ($params) {
+        $city = $params['city'];
+        // 模拟天气数据
+        $weatherData = [
+            '北京' => ['temperature' => '25°C', 'condition' => '晴朗', 'humidity' => '45%'],
+            '上海' => ['temperature' => '28°C', 'condition' => '多云', 'humidity' => '60%'],
+            '广州' => ['temperature' => '30°C', 'condition' => '阵雨', 'humidity' => '75%'],
+            '深圳' => ['temperature' => '29°C', 'condition' => '晴朗', 'humidity' => '65%'],
+        ];
+
+        if (isset($weatherData[$city])) {
+            return $weatherData[$city];
+        }
+        return ['error' => '没有找到该城市的天气信息'];
+    }
+);
+
+// 翻译工具 (模拟)
+$translateTool = new ToolDefinition(
+    name: 'translate',
+    description: '将文本从一种语言翻译成另一种语言',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'text' => [
+                'type' => 'string',
+                'description' => '要翻译的文本',
+            ],
+            'target_language' => [
+                'type' => 'string',
+                'description' => '目标语言，例如：英语、中文、日语等',
+            ],
+        ],
+        'required' => ['text', 'target_language'],
+    ]),
+    toolHandler: function ($params) {
+        $text = $params['text'];
+        $targetLanguage = $params['target_language'];
+
+        // 模拟翻译结果
+        $translations = [
+            '你好' => [
+                '英语' => 'Hello',
+                '日语' => 'こんにちは',
+                '法语' => 'Bonjour',
+            ],
+            'Hello' => [
+                '中文' => '你好',
+                '日语' => 'こんにちは',
+                '法语' => 'Bonjour',
+            ],
+        ];
+
+        if (isset($translations[$text][$targetLanguage])) {
+            return ['translated_text' => $translations[$text][$targetLanguage]];
+        }
+
+        // 如果没有预设的翻译，返回原文加上模拟的后缀
+        return ['translated_text' => $text . ' (已翻译为' . $targetLanguage . ')', 'note' => '这是模拟翻译'];
+    }
+);
+
+// 创建带有所有工具的代理
+$agent = new ToolUseAgent(
+    model: $model,
+    memory: $memory,
+    tools: [
+        $calculatorTool->getName() => $calculatorTool,
+        $weatherTool->getName() => $weatherTool,
+        $translateTool->getName() => $translateTool,
+    ],
+    temperature: 0.6,
+    logger: $logger
+);
+
+// 顺序调用示例
+echo "===== 顺序工具调用示例 =====\n";
+$start = microtime(true);
+
+$userMessage = new UserMessage('请计算 23 × 45，然后查询北京的天气，最后将"你好"翻译成英语。请详细说明每一步。');
+$response = $agent->chat($userMessage);
+
+$message = $response->getFirstChoice()->getMessage();
+if ($message instanceof AssistantMessage) {
+    echo $message->getContent();
+}
+
+echo "\n";
+echo '顺序调用耗时：' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/dashscope/dashscope_tool_use_agent_stream.php b/examples/dashscope/dashscope_tool_use_agent_stream.php
new file mode 100644
index 0000000..ec320ad
--- /dev/null
+++ b/examples/dashscope/dashscope_tool_use_agent_stream.php
@@ -0,0 +1,414 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Agent\Tool\ToolUseAgent;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Api\Response\ChatCompletionChoice;
+use Hyperf\Odin\Factory\ModelFactory;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Memory\MemoryManager;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Model\DashScopeModel;
+use Hyperf\Odin\Model\ModelOptions;
+use Hyperf\Odin\Tool\AbstractTool;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+use function Hyperf\Support\env;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// 创建日志记录器
+$logger = new Logger();
+
+// 初始化模型
+$model = ModelFactory::create(
+    implementation: DashScopeModel::class,
+    modelName: env('QWEN3_CODER_PLUS_MODEL'),
+    config: [
+        'api_key' => env('QWEN_API_KEY'),
+        'base_url' => env('QWEN_API_BASE_URL'),
+        'auto_cache_config' => [
+            'auto_enabled' => true,  // 启用自动缓存
+            'min_cache_tokens' => 1024,
+            'supported_models' => ['qwen3-coder-plus', 'qwen-max'],
+        ],
+    ],
+    modelOptions: ModelOptions::fromArray([
+        'chat' => true,
+        'function_call' => true,
+        'embedding' => false,
+        'multi_modal' => true,
+        'vector_size' => 0,
+    ]),
+    apiOptions: ApiOptions::fromArray([
+        'timeout' => [
+            'connection' => 5.0,  // 连接超时（秒）
+            'write' => 10.0,      // 写入超时（秒）
+            'read' => 300.0,      // 读取超时（秒）
+            'total' => 350.0,     // 总体超时（秒）
+            'thinking' => 120.0,  // 思考超时（秒）
+            'stream_chunk' => 30.0, // 流式块间超时（秒）
+            'stream_first' => 60.0, // 首个流式块超时（秒）
+        ],
+        'custom_error_mapping_rules' => [],
+    ]),
+    logger: $logger
+);
+
+$systemPrompt = '你是一个专业且智能的AI助手，具备丰富的知识库和强大的工具使用能力。你的主要职责是帮助用户解决各种问题，并在需要时合理使用可用的工具来提供准确、及时的信息和服务。
+
+## 工具使用原则
+
+### 1. 工具选择策略
+- 当用户的需求需要实时数据、精确计算或特定功能时，优先考虑使用相应的工具
+- 在使用工具前，先分析用户需求，选择最合适的工具组合
+- 对于复杂任务，可以按逻辑顺序使用多个工具
+- 如果某个工具无法满足需求，主动说明原因并提供替代方案
+
+### 2. 工具调用规范
+- 使用工具前，向用户清楚说明将要使用的工具及其作用
+- 调用工具时确保参数正确完整，避免错误调用
+- 工具返回结果后，对结果进行解读和总结
+- 如果工具返回错误，要向用户说明错误原因并提供解决建议
+
+### 3. 响应格式要求
+- 回复结构清晰，逻辑层次分明
+- 使用工具时采用以下格式：
+  1. 说明即将使用的工具和原因
+  2. 调用工具并展示结果
+  3. 对结果进行分析和解释
+  4. 根据结果给出最终答案或建议
+
+## 可用工具说明
+
+### 计算器工具 (calculator)
+功能：执行基本数学运算（加、减、乘、除）
+使用场景：需要进行精确数学计算时
+参数要求：
+- operation: 运算类型（add/subtract/multiply/divide）
+- a: 第一个操作数
+- b: 第二个操作数
+
+### 天气查询工具 (weather)
+功能：查询指定城市的天气信息
+使用场景：用户询问天气情况时
+参数要求：
+- city: 城市名称
+注意：当前支持北京、上海、广州、深圳等主要城市
+
+### 翻译工具 (translate)
+功能：将文本从一种语言翻译成另一种语言
+使用场景：用户需要翻译服务时
+参数要求：
+- text: 要翻译的文本内容
+- target_language: 目标语言
+
+## 交互指导原则
+
+### 1. 用户体验优先
+- 始终保持友好、专业的对话态度
+- 主动了解用户需求，提供个性化服务
+- 回复要简洁明了，避免冗余信息
+- 对于复杂问题，提供分步解决方案
+
+### 2. 准确性保证
+- 使用工具获得的数据要如实呈现
+- 对于无法确定的信息，明确说明不确定性
+- 区分事实信息和推测内容
+- 承认知识局限性，必要时建议用户咨询专业人士
+
+### 3. 安全和隐私
+- 保护用户隐私，不泄露敏感信息
+- 对于涉及安全的操作，提供必要的警告和建议
+- 拒绝执行可能造成危害的请求
+- 遵守相关法律法规和道德规范
+
+### 4. 持续学习
+- 从用户反馈中改进服务质量
+- 灵活应对各种场景和需求
+- 保持开放心态，接受新的挑战
+- 不断优化工具使用效率
+
+## 特殊情况处理
+
+### 工具故障处理
+- 如果工具调用失败，立即向用户说明情况
+- 提供人工替代方案或建议重试
+- 记录问题详情，便于后续改进
+
+### 多工具协作
+- 合理规划工具使用顺序
+- 确保前一个工具的输出能为下一个工具提供有效输入
+- 对整个工具链的执行过程进行监控和优化
+
+### 异常情况应对
+- 面对超出工具能力范围的需求，诚实说明限制
+- 提供可行的替代解决方案
+- 引导用户调整需求或寻求其他帮助渠道
+
+通过以上原则和规范，我将为你提供高质量、可靠的智能助手服务。请随时告诉我你的需求，我会选择最合适的方式来帮助你。';
+
+// 初始化内存管理器
+$memory = new MemoryManager();
+$memory->addSystemMessage(new SystemMessage($systemPrompt));
+
+// 定义多个工具
+// 计算器工具
+$calculatorTool = new ToolDefinition(
+    name: 'calculator',
+    description: '用于执行基本数学运算的计算器工具',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'operation' => [
+                'type' => 'string',
+                'enum' => ['add', 'subtract', 'multiply', 'divide', 'power'],
+                'description' => '要执行的数学运算类型',
+            ],
+            'a' => [
+                'type' => 'number',
+                'description' => '第一个操作数',
+            ],
+            'b' => [
+                'type' => 'number',
+                'description' => '第二个操作数',
+            ],
+        ],
+        'required' => ['operation', 'a', 'b'],
+    ]),
+    toolHandler: function ($params) {
+        $a = $params['a'];
+        $b = $params['b'];
+        switch ($params['operation']) {
+            case 'add':
+                return ['result' => $a + $b];
+            case 'subtract':
+                return ['result' => $a - $b];
+            case 'multiply':
+                return ['result' => $a * $b];
+            case 'divide':
+                if ($b == 0) {
+                    return ['error' => '除数不能为零'];
+                }
+                return ['result' => $a / $b];
+            case 'power':
+                return ['result' => pow($a, $b)];
+            default:
+                return ['error' => '未知操作'];
+        }
+    }
+);
+
+// 数据库查询工具 (模拟)
+$databaseTool = new ToolDefinition(
+    name: 'database',
+    description: '查询数据库中的信息',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'table' => [
+                'type' => 'string',
+                'enum' => ['users', 'products', 'orders'],
+                'description' => '要查询的数据表',
+            ],
+            'id' => [
+                'type' => 'integer',
+                'description' => '记录ID',
+            ],
+        ],
+        'required' => ['table', 'id'],
+    ]),
+    toolHandler: function ($params) {
+        $table = $params['table'];
+        $id = $params['id'];
+
+        // 模拟数据库表
+        $database = [
+            'users' => [
+                1 => ['name' => '张三', 'age' => 28, 'email' => 'zhangsan@example.com'],
+                2 => ['name' => '李四', 'age' => 32, 'email' => 'lisi@example.com'],
+                3 => ['name' => '王五', 'age' => 45, 'email' => 'wangwu@example.com'],
+            ],
+            'products' => [
+                1 => ['name' => '笔记本电脑', 'price' => 6999, 'stock' => 50],
+                2 => ['name' => '智能手机', 'price' => 3999, 'stock' => 100],
+                3 => ['name' => '平板电脑', 'price' => 2999, 'stock' => 75],
+            ],
+            'orders' => [
+                1 => ['user_id' => 1, 'product_id' => 2, 'quantity' => 1, 'total' => 3999],
+                2 => ['user_id' => 2, 'product_id' => 1, 'quantity' => 2, 'total' => 13998],
+                3 => ['user_id' => 3, 'product_id' => 3, 'quantity' => 1, 'total' => 2999],
+            ],
+        ];
+
+        if (isset($database[$table][$id])) {
+            return ['data' => $database[$table][$id]];
+        }
+
+        return ['error' => "在表 {$table} 中未找到ID为 {$id} 的记录"];
+    }
+);
+
+// 内容推荐工具 (模拟)
+$recommendTool = new ToolDefinition(
+    name: 'recommend',
+    description: '根据用户偏好推荐内容',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'category' => [
+                'type' => 'string',
+                'enum' => ['电影', '书籍', '音乐', '餐厅'],
+                'description' => '推荐类别',
+            ],
+            'user_preference' => [
+                'type' => 'string',
+                'description' => '用户偏好关键词',
+            ],
+            'limit' => [
+                'type' => 'integer',
+                'description' => '返回推荐数量',
+                'default' => 3,
+            ],
+        ],
+        'required' => ['category', 'user_preference'],
+    ]),
+    toolHandler: function ($params) {
+        $category = $params['category'];
+        $preference = $params['user_preference'];
+        $limit = $params['limit'] ?? 3;
+
+        // 模拟推荐系统
+        $recommendations = [
+            '电影' => [
+                '科幻' => ['星际穿越', '银翼杀手2049', '头号玩家', '火星救援', '黑客帝国'],
+                '动作' => ['速度与激情', '碟中谍', '复仇者联盟', '黑暗骑士', '007:幽灵党'],
+                '剧情' => ['肖申克的救赎', '阿甘正传', '当幸福来敲门', '楚门的世界', '绿皮书'],
+            ],
+            '书籍' => [
+                '科幻' => ['三体', '基地', '沙丘', '神经漫游者', '火星救援'],
+                '小说' => ['百年孤独', '追风筝的人', '活着', '围城', '平凡的世界'],
+                '历史' => ['人类简史', '枪炮、病菌与钢铁', '第三帝国的兴亡', '明朝那些事', '万历十五年'],
+            ],
+            '音乐' => [
+                '流行' => ['Bad Guy - Billie Eilish', 'Blinding Lights - The Weeknd', '起风了 - 买辣椒也用券', '锦鲤 - 王俊凯', 'Dynamite - BTS'],
+                '摇滚' => ['Numb - Linkin Park', 'Yellow - Coldplay', '不再犹豫 - Beyond', '光辉岁月 - Beyond', 'Bohemian Rhapsody - Queen'],
+                '古典' => ['月光奏鸣曲 - 贝多芬', '四季 - 维瓦尔第', '土耳其进行曲 - 莫扎特', '命运交响曲 - 贝多芬', '天鹅湖 - 柴可夫斯基'],
+            ],
+            '餐厅' => [
+                '中餐' => ['鼎泰丰', '外婆家', '海底捞', '眉州东坡', '小龙坎'],
+                '西餐' => ['必胜客', '麦当劳', '汉堡王', '赛百味', 'KFC'],
+                '日料' => ['吉野家', '松屋', '味千拉面', '寿司郎', '大渔铁板烧'],
+            ],
+        ];
+
+        $result = [];
+        if (isset($recommendations[$category])) {
+            foreach ($recommendations[$category] as $key => $items) {
+                // 简单模拟：如果偏好词是分类的子集，或者分类是偏好词的子集，就认为匹配
+                if (str_contains($key, $preference) || str_contains($preference, $key)) {
+                    $result = array_slice($items, 0, $limit);
+                    break;
+                }
+            }
+
+            // 如果没有匹配到分类，返回第一个分类的推荐
+            if (empty($result)) {
+                $firstCategory = array_key_first($recommendations[$category]);
+                $result = array_slice($recommendations[$category][$firstCategory], 0, $limit);
+            }
+
+            return ['recommendations' => $result];
+        }
+
+        return ['error' => "不支持的推荐类别: {$category}"];
+    }
+);
+
+class CurrentTimeTool extends AbstractTool
+{
+    public function getName(): string
+    {
+        return 'current_time';
+    }
+
+    public function getDescription(): string
+    {
+        return '获取当前系统时间，不需要任何参数';
+    }
+
+    public function getParameters(): ?ToolParameters
+    {
+        return ToolParameters::fromArray([
+            'type' => 'object',
+            'properties' => [],
+            'required' => [],
+        ]);
+    }
+
+    protected function handle(array $parameters): array
+    {
+        // 这个工具不需要任何参数，直接返回当前时间信息
+        return [
+            'current_time' => date('Y-m-d H:i:s'),
+            'timezone' => date_default_timezone_get(),
+            'timestamp' => time(),
+        ];
+    }
+}
+
+// 添加一个无参数的工具示例
+$currentTimeTool = new CurrentTimeTool();
+
+// 创建带有所有工具的代理
+$agent = new ToolUseAgent(
+    model: $model,
+    memory: $memory,
+    tools: [
+        $calculatorTool->getName() => $calculatorTool,
+        $databaseTool->getName() => $databaseTool,
+        $recommendTool->getName() => $recommendTool,
+        $currentTimeTool->getName() => $currentTimeTool,
+    ],
+    temperature: 0.6,
+    logger: $logger
+);
+
+// 顺序流式调用示例
+echo "===== 顺序流式工具调用示例 =====\n";
+$start = microtime(true);
+
+$userMessage = new UserMessage('先获取当前系统时间，再计算 7 的 3 次方，然后查询用户ID为2的信息，最后根据查询结果推荐一些科幻电影。请详细说明每一步。');
+$response = $agent->chatStreamed($userMessage);
+
+$content = '';
+/** @var ChatCompletionChoice $choice */
+foreach ($response as $choice) {
+    $delta = $choice->getMessage()->getContent();
+    if ($delta !== null) {
+        echo $delta;
+        $content .= $delta;
+    }
+}
+
+echo "\n";
+echo '顺序流式调用耗时：' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/exception/chat_completion_image_validation_example.php b/examples/exception/chat_completion_image_validation_example.php
new file mode 100644
index 0000000..f89f33c
--- /dev/null
+++ b/examples/exception/chat_completion_image_validation_example.php
@@ -0,0 +1,149 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Exception\LLMException\Model\LLMUnsupportedImageFormatException;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Message\UserMessageContent;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+echo "=== ChatCompletionRequest Image Validation Example ===\n";
+echo "=== ChatCompletionRequest 图片验证示例 ===\n\n";
+
+// Test case 1: Valid image format in chat request
+echo "📝 Test Case 1: Valid image format / 有效的图片格式\n";
+try {
+    $validUserMessage = (new UserMessage('Please analyze this image'))
+        ->addContent(UserMessageContent::text('Please analyze this image'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/photo.jpg'));
+
+    $chatRequest = new ChatCompletionRequest(
+        messages: [
+            new SystemMessage('You are a helpful vision assistant.'),
+            $validUserMessage,
+        ],
+        model: 'gpt-4-vision-preview',
+        temperature: 0.7
+    );
+
+    $chatRequest->validate();
+    echo "✅ PASSED - Valid image format in chat request accepted\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo '❌ FAILED - ' . $e->getMessage() . "\n";
+    echo '  Extension: ' . $e->getFileExtension() . "\n";
+}
+echo "\n";
+
+// Test case 2: Invalid image format in chat request
+echo "📝 Test Case 2: Invalid image format / 无效的图片格式\n";
+try {
+    $invalidUserMessage = (new UserMessage('Please analyze this document'))
+        ->addContent(UserMessageContent::text('Please analyze this document'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/document.pdf'));
+
+    $chatRequest = new ChatCompletionRequest(
+        messages: [
+            new SystemMessage('You are a helpful vision assistant.'),
+            $invalidUserMessage,
+        ],
+        model: 'gpt-4-vision-preview',
+        temperature: 0.7
+    );
+
+    $chatRequest->validate();
+    echo "❌ FAILED - Should have rejected invalid image format\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo "✅ PASSED - Invalid image format correctly rejected in chat request\n";
+    echo '  Error: ' . $e->getMessage() . "\n";
+    echo '  Extension: ' . $e->getFileExtension() . "\n";
+}
+echo "\n";
+
+// Test case 3: URL without extension (should pass)
+echo "📝 Test Case 3: URL without extension / 无扩展名URL\n";
+try {
+    $noExtUserMessage = (new UserMessage('Analyze this image'))
+        ->addContent(UserMessageContent::text('Analyze this image'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/api/image/123'));
+
+    $chatRequest = new ChatCompletionRequest(
+        messages: [
+            new SystemMessage('You are a helpful vision assistant.'),
+            $noExtUserMessage,
+        ],
+        model: 'gpt-4-vision-preview',
+        temperature: 0.7
+    );
+
+    $chatRequest->validate();
+    echo "✅ PASSED - URL without extension accepted in chat request\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo '❌ FAILED - ' . $e->getMessage() . "\n";
+}
+echo "\n";
+
+// Test case 4: Multiple messages with mixed image formats
+echo "📝 Test Case 4: Multiple messages with mixed formats / 多消息混合格式\n";
+try {
+    $validMessage = (new UserMessage('First image'))
+        ->addContent(UserMessageContent::text('First image'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/image1.jpg'));
+
+    $invalidMessage = (new UserMessage('Second file'))
+        ->addContent(UserMessageContent::text('Second file'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/document.docx'));
+
+    $chatRequest = new ChatCompletionRequest(
+        messages: [
+            new SystemMessage('You are a helpful vision assistant.'),
+            $validMessage,
+            $invalidMessage,
+        ],
+        model: 'gpt-4-vision-preview',
+        temperature: 0.7
+    );
+
+    $chatRequest->validate();
+    echo "❌ FAILED - Should have rejected invalid format in multiple messages\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo "✅ PASSED - Invalid format detected in multiple messages\n";
+    echo '  Error: ' . $e->getMessage() . "\n";
+    echo '  Extension: ' . $e->getFileExtension() . "\n";
+}
+echo "\n";
+
+// Test case 5: Text-only chat request (should pass)
+echo "📝 Test Case 5: Text-only chat request / 纯文本聊天请求\n";
+try {
+    $chatRequest = new ChatCompletionRequest(
+        messages: [
+            new SystemMessage('You are a helpful assistant.'),
+            new UserMessage('What is the capital of France?'),
+        ],
+        model: 'gpt-3.5-turbo',
+        temperature: 0.7
+    );
+
+    $chatRequest->validate();
+    echo "✅ PASSED - Text-only chat request accepted\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo '❌ FAILED - ' . $e->getMessage() . "\n";
+}
+echo "\n";
+
+echo "🔧 Integration Summary / 集成总结:\n";
+echo "✅ 图片格式验证已成功集成到 ChatCompletionRequest::validate() 方法中\n";
+echo "✅ 只有URL带有不支持扩展名的图片才会被拒绝\n";
+echo "✅ 其他情况（无扩展名、Base64、支持格式）都能正常通过验证\n";
+echo "✅ 验证发生在消息序列验证之后，确保基础验证通过\n";
+echo "✅ 抛出的异常包含详细的错误信息和具体的不支持扩展名\n";
diff --git a/examples/exception/image_downloader_example.php b/examples/exception/image_downloader_example.php
new file mode 100644
index 0000000..cd39688
--- /dev/null
+++ b/examples/exception/image_downloader_example.php
@@ -0,0 +1,115 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+use Hyperf\Odin\Exception\LLMException\Api\LLMInvalidRequestException;
+use Hyperf\Odin\Utils\ImageDownloader;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+echo "=== ImageDownloader Utility Example ===\n";
+echo "=== 图片下载工具示例 ===\n\n";
+
+// Test URLs
+$testUrls = [
+    // Valid remote image URLs (using placeholder URLs for testing)
+    'https://via.placeholder.com/300x200.jpg' => '✅ 期望成功 (小图片)',
+    'https://httpbin.org/image/jpeg' => '✅ 期望成功 (JPEG)',
+    'https://httpbin.org/image/png' => '✅ 期望成功 (PNG)',
+
+    // Base64 data URL (should be recognized but not downloaded)
+    'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEA...' => '✅ 期望识别为Base64',
+
+    // Invalid URLs
+    'ftp://example.com/image.jpg' => '❌ 期望失败 (不支持的协议)',
+    'invalid-url' => '❌ 期望失败 (无效URL)',
+    'https://httpbin.org/status/404' => '❌ 期望失败 (404错误)',
+];
+
+echo "🔍 Testing ImageDownloader utility:\n";
+echo "🔍 测试ImageDownloader工具：\n";
+echo '文件大小限制: ' . ImageDownloader::getMaxFileSizeFormatted() . "\n\n";
+
+foreach ($testUrls as $url => $expected) {
+    $displayUrl = strlen($url) > 60 ? substr($url, 0, 57) . '...' : $url;
+    echo "Testing: {$displayUrl}\n";
+    echo "Expected: {$expected}\n";
+
+    try {
+        // Check URL type
+        if (ImageDownloader::isRemoteImageUrl($url)) {
+            echo "  Type: Remote URL\n";
+
+            // Try to download and convert
+            $base64Url = ImageDownloader::downloadAndConvertToBase64($url);
+
+            // Check result
+            if (ImageDownloader::isBase64DataUrl($base64Url)) {
+                echo "  Result: ✅ PASSED - Successfully downloaded and converted to base64\n";
+                echo '  Base64 URL length: ' . strlen($base64Url) . " chars\n";
+
+                // Show MIME type
+                preg_match('/data:(image\/[^;]+)/', $base64Url, $matches);
+                $mimeType = $matches[1] ?? 'unknown';
+                echo "  Detected MIME type: {$mimeType}\n";
+            } else {
+                echo "  Result: ❌ FAILED - Invalid base64 format returned\n";
+            }
+        } elseif (ImageDownloader::isBase64DataUrl($url)) {
+            echo "  Type: Base64 Data URL\n";
+            echo "  Result: ✅ PASSED - Already in base64 format\n";
+        } else {
+            echo "  Type: Invalid URL\n";
+            echo "  Result: ❌ FAILED - Invalid URL format\n";
+        }
+    } catch (LLMInvalidRequestException $e) {
+        echo '  Result: ❌ FAILED - ' . $e->getMessage() . "\n";
+    } catch (Exception $e) {
+        echo '  Result: ⚠️  ERROR - ' . $e->getMessage() . "\n";
+    }
+
+    echo "\n";
+}
+
+// Test image format detection
+echo "🧪 Testing image format detection:\n";
+echo "🧪 测试图片格式检测：\n\n";
+
+$testBinaryData = [
+    'JPEG header' => "\xFF\xD8\xFF\xE0\x00\x10JFIF\x00\x01",
+    'PNG header' => "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A\x00\x00\x00\x0D",
+    'GIF87a header' => "GIF87a\x01\x00\x01\x00\x00\x00\x00\x00",
+    'GIF89a header' => "GIF89a\x01\x00\x01\x00\x00\x00\x00\x00",
+    'WebP header' => "RIFF\x1A\x00\x00\x00WEBPVP8 \x0E\x00",
+    'BMP header' => "BM\x1A\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+    'TIFF LE header' => "II\x2A\x00\x08\x00\x00\x00",
+    'TIFF BE header' => "MM\x00\x2A\x00\x00\x00\x08",
+    'Invalid data' => 'This is not image data at all',
+];
+
+foreach ($testBinaryData as $name => $binaryData) {
+    $mimeType = ImageDownloader::detectImageMimeType($binaryData);
+    $result = $mimeType ? "✅ {$mimeType}" : '❌ Unknown format';
+    echo "  {$name}: {$result}\n";
+}
+
+echo "\n💡 Utility Features / 工具特性:\n";
+echo "  ✅ 支持HTTP/HTTPS图片URL下载\n";
+echo "  ✅ 自动检测图片格式 (JPEG, PNG, GIF, WebP, BMP, TIFF)\n";
+echo "  ✅ 转换为标准Base64 Data URL格式\n";
+echo '  ✅ 文件大小限制: ' . ImageDownloader::getMaxFileSizeFormatted() . "\n";
+echo "  ✅ 超时保护: 连接10秒，读取30秒\n";
+echo "  ✅ 完整的错误处理和验证\n\n";
+
+echo "🔧 Integration with AWS Bedrock:\n";
+echo "  1. 检测远程图片URL\n";
+echo "  2. 自动下载并转换为Base64格式\n";
+echo "  3. 继续使用原有的Base64处理逻辑\n";
+echo "  4. 无缝集成，保持向后兼容\n";
diff --git a/examples/exception/image_format_validation_example.php b/examples/exception/image_format_validation_example.php
new file mode 100644
index 0000000..a50ae7f
--- /dev/null
+++ b/examples/exception/image_format_validation_example.php
@@ -0,0 +1,81 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+use Hyperf\Odin\Exception\LLMException\Model\LLMUnsupportedImageFormatException;
+use Hyperf\Odin\Utils\ImageFormatValidator;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+echo "=== Simple Image Format Validation Example ===\n";
+echo "=== 简单图片格式验证示例 ===\n\n";
+
+// Test cases for URL validation
+$testUrls = [
+    // Valid formats
+    'https://example.com/image.jpg' => '✅ 期望成功 (有效扩展名)',
+    'https://example.com/image.png' => '✅ 期望成功 (有效扩展名)',
+    'https://example.com/image.webp' => '✅ 期望成功 (有效扩展名)',
+
+    // Invalid formats (have extension but not supported)
+    'https://example.com/document.pdf' => '❌ 期望失败 (不支持的扩展名)',
+    'https://example.com/video.mp4' => '❌ 期望失败 (不支持的扩展名)',
+    'https://example.com/document.docx' => '❌ 期望失败 (不支持的扩展名)',
+
+    // No extension - should pass
+    'https://example.com/image' => '✅ 期望成功 (无扩展名)',
+    'https://example.com/api/image/123' => '✅ 期望成功 (无扩展名)',
+    'https://cdn.example.com/images?id=123' => '✅ 期望成功 (无扩展名)',
+
+    // Base64 - should pass
+    'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEA...' => '✅ 期望成功 (Base64)',
+];
+
+echo "🔍 Testing simplified URL validation:\n";
+echo "🔍 测试简化的URL验证：\n";
+echo "规则：只有URL有扩展名且不在支持列表中时才报错\n\n";
+
+foreach ($testUrls as $url => $expected) {
+    $displayUrl = strlen($url) > 60 ? substr($url, 0, 57) . '...' : $url;
+    echo "Testing: {$displayUrl}\n";
+    echo "Expected: {$expected}\n";
+
+    try {
+        ImageFormatValidator::validateImageUrl($url);
+        echo "Result: ✅ PASSED - Validation passed\n";
+    } catch (LLMUnsupportedImageFormatException $e) {
+        echo 'Result: ❌ FAILED - ' . $e->getMessage() . "\n";
+        if ($e->getFileExtension()) {
+            echo '  Extension: ' . $e->getFileExtension() . "\n";
+        }
+    } catch (Exception $e) {
+        echo 'Result: ⚠️  ERROR - ' . $e->getMessage() . "\n";
+    }
+    echo "\n";
+}
+
+// Display supported formats
+echo "📋 Supported Image Extensions:\n";
+echo "📋 支持的图片扩展名：\n\n";
+
+$supportedExtensions = ImageFormatValidator::getSupportedExtensions();
+
+echo "支持的扩展名:\n";
+foreach (array_chunk($supportedExtensions, 8) as $chunk) {
+    echo '  ' . implode(', ', array_map(fn ($ext) => ".{$ext}", $chunk)) . "\n";
+}
+echo "\n";
+
+echo "💡 Validation Rules / 验证规则:\n";
+echo "  ✅ 无扩展名的URL → 通过验证\n";
+echo "  ✅ Base64格式(data:...) → 通过验证\n";
+echo "  ✅ 支持的扩展名 → 通过验证\n";
+echo "  ❌ 不支持的扩展名 → 验证失败\n";
+echo "  ❌ 无法解析的URL → 通过验证(不报错)\n";
diff --git a/examples/exception/oversize_image_error_example.php b/examples/exception/oversize_image_error_example.php
new file mode 100644
index 0000000..2a9d991
--- /dev/null
+++ b/examples/exception/oversize_image_error_example.php
@@ -0,0 +1,57 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+use GuzzleHttp\Exception\RequestException;
+use GuzzleHttp\Psr7\Request;
+use GuzzleHttp\Psr7\Response;
+use Hyperf\Odin\Exception\LLMException\Api\LLMInvalidRequestException;
+use Hyperf\Odin\Exception\LLMException\ErrorMappingManager;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+// Mock error response
+$errorResponseBody = [
+    'error' => [
+        'code' => 'InvalidParameter.OversizeImage',
+        'message' => 'The request failed because the size of the input image (222 MB) exceeds the limit (10 MB). Request id: mock-request-id-12345',
+        'param' => 'image_url',
+        'type' => 'BadRequest',
+    ],
+];
+
+$httpResponse = new Response(400, [], json_encode($errorResponseBody));
+$httpRequest = new Request('POST', 'https://api.example-llm-provider.com/v3/chat/completions');
+$requestException = new RequestException('Invalid parameter: image_url', $httpRequest, $httpResponse);
+
+try {
+    $errorMappingManager = new ErrorMappingManager();
+    $llmException = $errorMappingManager->mapException($requestException);
+
+    if ($llmException instanceof LLMInvalidRequestException) {
+        echo "✅ Test PASSED - Exception correctly mapped\n";
+        echo 'Error Message: ' . $llmException->getMessage() . "\n\n";
+
+        // Verify provider details are preserved
+        $providerDetails = $llmException->getProviderErrorDetails();
+        if ($providerDetails && isset($providerDetails['code']) && $providerDetails['code'] === 'InvalidParameter.OversizeImage') {
+            echo "✅ Test PASSED - Provider error details preserved\n";
+            echo 'Error Code: ' . $providerDetails['code'] . "\n";
+            echo 'Error Type: ' . $providerDetails['type'] . "\n";
+            echo 'Error Param: ' . $providerDetails['param'] . "\n";
+        } else {
+            echo "❌ Test FAILED - Provider error details missing or incomplete\n";
+        }
+    } else {
+        echo '❌ Test FAILED - Wrong exception type: ' . get_class($llmException) . "\n";
+    }
+} catch (Exception $e) {
+    echo '❌ Test FAILED - Exception during processing: ' . $e->getMessage() . "\n";
+}
diff --git a/examples/exception/proxy_error_handling_example.php b/examples/exception/proxy_error_handling_example.php
new file mode 100644
index 0000000..17fc5ff
--- /dev/null
+++ b/examples/exception/proxy_error_handling_example.php
@@ -0,0 +1,165 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+/**
+ * Example: Handling Errors in Proxy Scenarios.
+ *
+ * This example demonstrates how Odin properly handles errors when services
+ * are proxied through multiple layers. The error detection mechanism can
+ * recognize errors from downstream Odin services regardless of the response
+ * format (flat or nested).
+ *
+ * Supported Error Response Formats:
+ * 1. OpenAI format (nested): {"error": {"message": "...", "code": 4002}}
+ * 2. Flat format: {"code": 4002, "message": "..."}
+ *
+ * The system will:
+ * - Extract error messages from response body
+ * - Match Chinese and English error messages
+ * - Properly map errors to specific exception types
+ * - Preserve error details across proxy layers
+ */
+
+use GuzzleHttp\Exception\RequestException;
+use GuzzleHttp\Psr7\Request;
+use GuzzleHttp\Psr7\Response;
+use Hyperf\Odin\Exception\LLMException\LLMErrorHandler;
+use Hyperf\Odin\Exception\LLMException\Model\LLMContextLengthException;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+// Example 1: Handling OpenAI-style nested error response
+echo "Example 1: OpenAI-style nested error response\n";
+echo str_repeat('=', 60) . "\n";
+
+$nestedErrorResponse = json_encode([
+    'error' => [
+        'message' => 'Context length exceeds model limit',
+        'code' => 4002,
+        'request_id' => '838816451070042112',
+    ],
+]);
+
+$request = new Request('POST', 'https://proxy-service.example.com/v1/chat/completions');
+$response = new Response(400, [], $nestedErrorResponse);
+$exception = new RequestException('Client error', $request, $response);
+
+$errorHandler = new LLMErrorHandler();
+$mappedException = $errorHandler->handle($exception);
+
+echo 'Exception Type: ' . get_class($mappedException) . "\n";
+echo 'Error Message: ' . $mappedException->getMessage() . "\n";
+echo 'Error Code: ' . $mappedException->getErrorCode() . "\n";
+
+if ($mappedException instanceof LLMContextLengthException) {
+    echo 'Current Length: ' . ($mappedException->getCurrentLength() ?? 'N/A') . "\n";
+    echo 'Max Length: ' . ($mappedException->getMaxLength() ?? 'N/A') . "\n";
+}
+echo "\n";
+
+// Example 2: Handling flat error response
+echo "Example 2: Flat error response\n";
+echo str_repeat('=', 60) . "\n";
+
+$flatErrorResponse = json_encode([
+    'code' => 4002,
+    'message' => 'Context length exceeds model limit',
+]);
+
+$request = new Request('POST', 'https://proxy-service.example.com/v1/chat/completions');
+$response = new Response(400, [], $flatErrorResponse);
+$exception = new RequestException('Client error', $request, $response);
+
+$mappedException = $errorHandler->handle($exception);
+
+echo 'Exception Type: ' . get_class($mappedException) . "\n";
+echo 'Error Message: ' . $mappedException->getMessage() . "\n";
+echo 'Error Code: ' . $mappedException->getErrorCode() . "\n";
+echo "\n";
+
+// Example 3: Handling error with detailed context information
+echo "Example 3: Error with detailed context information\n";
+echo str_repeat('=', 60) . "\n";
+
+$detailedErrorResponse = json_encode([
+    'error' => [
+        'message' => 'Context length exceeds model limit, current length: 8000, max limit: 4096',
+        'code' => 4002,
+        'type' => 'context_length_exceeded',
+        'request_id' => '838816451070042116',
+    ],
+]);
+
+$request = new Request('POST', 'https://proxy-service.example.com/v1/chat/completions');
+$response = new Response(400, [], $detailedErrorResponse);
+$exception = new RequestException('Downstream error', $request, $response);
+
+$mappedException = $errorHandler->handle($exception);
+
+echo 'Exception Type: ' . get_class($mappedException) . "\n";
+echo 'Error Message: ' . $mappedException->getMessage() . "\n";
+echo 'Error Code: ' . $mappedException->getErrorCode() . "\n";
+
+if ($mappedException instanceof LLMContextLengthException) {
+    echo 'Current Length: ' . ($mappedException->getCurrentLength() ?? 'N/A') . "\n";
+    echo 'Max Length: ' . ($mappedException->getMaxLength() ?? 'N/A') . "\n";
+}
+echo "\n";
+
+// Example 4: Generating error report for logging/debugging
+echo "Example 4: Generating error report\n";
+echo str_repeat('=', 60) . "\n";
+
+$errorReport = $errorHandler->generateErrorReport($mappedException);
+echo "Error Report:\n";
+echo json_encode($errorReport, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE) . "\n";
+echo "\n";
+
+// Example 5: Demonstrating various error messages (English and Chinese)
+echo "Example 5: Various error messages (English and Chinese for backward compatibility)\n";
+echo str_repeat('=', 60) . "\n";
+
+$errorMessages = [
+    ['message' => 'API rate limit exceeded', 'status' => 429],
+    ['message' => 'Content filtered by safety system', 'status' => 400],
+    ['message' => 'Invalid or missing API key', 'status' => 401],
+    // Also test Chinese messages for backward compatibility
+    ['message' => 'API请求频率超出限制', 'status' => 429],
+    ['message' => '内容被系统安全过滤', 'status' => 400],
+    ['message' => 'API密钥无效或已过期', 'status' => 401],
+];
+
+foreach ($errorMessages as $error) {
+    $errorResponse = json_encode([
+        'error' => [
+            'message' => $error['message'],
+            'code' => 4000,
+        ],
+    ]);
+
+    $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+    $response = new Response($error['status'], [], $errorResponse);
+    $exception = new RequestException('Error', $request, $response);
+
+    $mappedException = $errorHandler->handle($exception);
+
+    echo "Message: {$error['message']}\n";
+    echo '  → Mapped to: ' . get_class($mappedException) . "\n";
+    echo '  → Error Code: ' . $mappedException->getErrorCode() . "\n\n";
+}
+
+echo "\nKey Features:\n";
+echo "- Supports both OpenAI-style nested and flat error formats\n";
+echo "- Recognizes English and Chinese error messages (backward compatibility)\n";
+echo "- Extracts detailed error information (lengths, retry times, etc.)\n";
+echo "- Works seamlessly with multiple proxy layers\n";
+echo "- Maintains error context across service boundaries\n";
+echo "- All default error messages are now in English for better internationalization\n";
diff --git a/examples/exception/vision_request_validation_example.php b/examples/exception/vision_request_validation_example.php
new file mode 100644
index 0000000..d3e7e50
--- /dev/null
+++ b/examples/exception/vision_request_validation_example.php
@@ -0,0 +1,102 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+use Hyperf\Odin\Exception\LLMException\Model\LLMUnsupportedImageFormatException;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Message\UserMessageContent;
+use Hyperf\Odin\Utils\VisionMessageValidator;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+echo "=== Simple Vision Request Validation Example ===\n";
+echo "=== 简单视觉理解请求验证示例 ===\n\n";
+
+// Test case 1: Valid vision message with supported image format
+echo "📝 Test Case 1: Valid image format / 有效的图片格式\n";
+try {
+    $validMessage = (new UserMessage('Please analyze this image'))
+        ->addContent(UserMessageContent::text('Please analyze this image'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/image.jpg'));
+
+    VisionMessageValidator::validateUserMessage($validMessage);
+    echo "✅ PASSED - Valid image format accepted\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo '❌ FAILED - ' . $e->getMessage() . "\n";
+}
+echo "\n";
+
+// Test case 2: Invalid vision message with unsupported image format
+echo "📝 Test Case 2: Invalid image format / 无效的图片格式\n";
+try {
+    $invalidMessage = (new UserMessage('Please analyze this document'))
+        ->addContent(UserMessageContent::text('Please analyze this document'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/document.pdf'));
+
+    VisionMessageValidator::validateUserMessage($invalidMessage);
+    echo "❌ FAILED - Should have rejected invalid format\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo "✅ PASSED - Invalid image format correctly rejected\n";
+    echo '  Error: ' . $e->getMessage() . "\n";
+    echo '  Extension: ' . $e->getFileExtension() . "\n";
+}
+echo "\n";
+
+// Test case 3: URL without extension (should pass)
+echo "📝 Test Case 3: URL without extension / 无扩展名URL\n";
+try {
+    $noExtMessage = (new UserMessage('Analyze this image'))
+        ->addContent(UserMessageContent::text('Analyze this image'))
+        ->addContent(UserMessageContent::imageUrl('https://example.com/api/image/123'));
+
+    VisionMessageValidator::validateUserMessage($noExtMessage);
+    echo "✅ PASSED - URL without extension accepted\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo '❌ FAILED - ' . $e->getMessage() . "\n";
+}
+echo "\n";
+
+// Test case 4: Base64 image (should pass)
+echo "📝 Test Case 4: Base64 image / Base64图片\n";
+try {
+    $base64Message = (new UserMessage('Analyze this Base64 image'))
+        ->addContent(UserMessageContent::text('Analyze this Base64 image'))
+        ->addContent(UserMessageContent::imageUrl('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=='));
+
+    VisionMessageValidator::validateUserMessage($base64Message);
+    echo "✅ PASSED - Base64 image accepted\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo '❌ FAILED - ' . $e->getMessage() . "\n";
+}
+echo "\n";
+
+// Test case 5: Text-only message (should pass)
+echo "📝 Test Case 5: Text-only message / 纯文本消息\n";
+try {
+    $textMessage = new UserMessage('This is just a text message without images');
+
+    VisionMessageValidator::validateUserMessage($textMessage);
+    echo "✅ PASSED - Text-only message accepted\n";
+} catch (LLMUnsupportedImageFormatException $e) {
+    echo '❌ FAILED - ' . $e->getMessage() . "\n";
+}
+echo "\n";
+
+echo "💡 Validation Rules / 验证规则:\n";
+echo "  ✅ 无扩展名的URL → 通过验证\n";
+echo "  ✅ Base64格式(data:...) → 通过验证\n";
+echo "  ✅ 支持的扩展名 → 通过验证\n";
+echo "  ❌ 不支持的扩展名 → 验证失败\n";
+echo "  ✅ 纯文本消息 → 通过验证\n\n";
+
+echo "🔧 Integration Tips / 集成建议:\n";
+echo "1. 在处理视觉理解请求前调用验证器\n";
+echo "2. 只有URL带有不支持的扩展名时才会报错\n";
+echo "3. 其他情况（无扩展名、Base64等）都会通过验证\n";
diff --git a/examples/gemini/gemini_tool.php b/examples/gemini/gemini_tool.php
new file mode 100644
index 0000000..2ca24b8
--- /dev/null
+++ b/examples/gemini/gemini_tool.php
@@ -0,0 +1,145 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\ToolMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Model\GeminiModel;
+use Hyperf\Odin\Model\ModelOptions;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+use function Hyperf\Support\env;
+
+ClassLoader::init();
+
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// Create Gemini model instance
+// Using Gemini 2.5 Flash model
+$model = new GeminiModel(
+    'gemini-2.5-flash',
+    [
+        'api_key' => env('GOOGLE_GEMINI_API_KEY'),
+        'base_url' => env('GOOGLE_GEMINI_BASE_URL', 'https://generativelanguage.googleapis.com/v1beta'),
+    ],
+    new Logger(),
+);
+$model->setModelOptions(new ModelOptions([
+    'function_call' => true,
+]));
+$model->setApiRequestOptions(new ApiOptions([
+    // Add proxy if needed
+    'proxy' => env('HTTP_CLIENT_PROXY'),
+]));
+
+echo '=== Gemini 工具调用测试 ===' . PHP_EOL;
+echo '支持函数调用功能' . PHP_EOL . PHP_EOL;
+
+// Define a weather query tool
+$weatherTool = new ToolDefinition(
+    name: 'weather',
+    description: '查询指定城市的天气信息。当用户询问天气时，必须使用此工具来获取天气数据。',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'city' => [
+                'type' => 'string',
+                'description' => '要查询天气的城市名称，例如：北京、上海、广州、深圳',
+            ],
+        ],
+        'required' => ['city'],
+    ]),
+    toolHandler: function ($params) {
+        $city = $params['city'];
+        // Simulate weather data
+        $weatherData = [
+            '北京' => ['temperature' => '25°C', 'condition' => '晴朗', 'humidity' => '45%'],
+            '上海' => ['temperature' => '28°C', 'condition' => '多云', 'humidity' => '60%'],
+            '广州' => ['temperature' => '30°C', 'condition' => '阵雨', 'humidity' => '75%'],
+            '深圳' => ['temperature' => '29°C', 'condition' => '晴朗', 'humidity' => '65%'],
+        ];
+
+        if (isset($weatherData[$city])) {
+            return $weatherData[$city];
+        }
+        return ['error' => '没有找到该城市的天气信息'];
+    }
+);
+
+$toolMessages = [
+    new SystemMessage('你是一位有用的天气助手。当用户询问任何城市的天气信息时，你必须使用 weather 工具来查询天气数据，然后根据查询结果回答用户。'),
+    new UserMessage('请查询上海的天气。'),
+];
+
+$start = microtime(true);
+
+// Use tool for API call
+$response = $model->chat($toolMessages, 0.7, 0, [], [$weatherTool]);
+
+// Output complete response
+$message = $response->getFirstChoice()->getMessage();
+if ($message instanceof AssistantMessage) {
+    echo '响应内容: ' . ($message->getContent() ?? '无内容，可能是工具调用') . PHP_EOL;
+
+    // Check if there are tool calls
+    $toolCalls = $message->getToolCalls();
+    if (! empty($toolCalls)) {
+        echo '工具调用信息:' . PHP_EOL;
+        foreach ($toolCalls as $toolCall) {
+            echo '- 工具名称: ' . $toolCall->getName() . PHP_EOL;
+            echo '- 参数: ' . json_encode($toolCall->getArguments(), JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . PHP_EOL;
+        }
+
+        // Simulate tool execution result
+        echo PHP_EOL . '模拟工具执行...' . PHP_EOL;
+
+        // Add assistant's tool call message to conversation
+        $toolMessages[] = $message;
+
+        // Create tool response message for each tool call
+        foreach ($toolCalls as $toolCall) {
+            // Create tool response message
+            $toolContent = json_encode([
+                'temperature' => '22°C',
+                'condition' => '晴天',
+                'humidity' => '65%',
+                'wind' => '东北风 3级',
+            ]);
+
+            $toolResponseMessage = new ToolMessage($toolContent, $toolCall->getId(), $weatherTool->getName(), $toolCall->getArguments());
+            $toolMessages[] = $toolResponseMessage; // Add tool response
+        }
+
+        // Continue conversation with all tool responses
+        $continueResponse = $model->chat($toolMessages, 0.7, 0, [], [$weatherTool]);
+        $continueMessage = $continueResponse->getFirstChoice()->getMessage();
+        if ($continueMessage instanceof AssistantMessage) {
+            echo PHP_EOL . '助手最终回复:' . PHP_EOL;
+            echo $continueMessage->getContent() . PHP_EOL;
+        }
+    } else {
+        echo PHP_EOL . '未检测到工具调用' . PHP_EOL;
+    }
+}
+
+echo '耗时' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/gemini/gemini_tool_stream.php b/examples/gemini/gemini_tool_stream.php
new file mode 100644
index 0000000..f5cd313
--- /dev/null
+++ b/examples/gemini/gemini_tool_stream.php
@@ -0,0 +1,186 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Api\Response\ChatCompletionChoice;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\ToolMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Model\GeminiModel;
+use Hyperf\Odin\Model\ModelOptions;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+use function Hyperf\Support\env;
+
+ClassLoader::init();
+
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// Create Gemini model instance
+// Using Gemini 2.5 Flash model
+$model = new GeminiModel(
+    'gemini-2.5-flash',
+    [
+        'api_key' => env('GOOGLE_GEMINI_API_KEY'),
+        'base_url' => env('GOOGLE_GEMINI_BASE_URL', 'https://generativelanguage.googleapis.com/v1beta'),
+    ],
+    new Logger(),
+);
+$model->setModelOptions(new ModelOptions([
+    'function_call' => true,
+]));
+$model->setApiRequestOptions(new ApiOptions([
+    // Add proxy if needed
+    'proxy' => env('HTTP_CLIENT_PROXY'),
+]));
+
+echo '=== Gemini 流式工具调用测试 ===' . PHP_EOL;
+echo '支持流式函数调用功能' . PHP_EOL . PHP_EOL;
+
+// Define a weather query tool
+$weatherTool = new ToolDefinition(
+    name: 'weather',
+    description: '查询指定城市的天气信息。当用户询问天气时，必须使用此工具来获取天气数据。',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'city' => [
+                'type' => 'string',
+                'description' => '要查询天气的城市名称，例如：北京、上海、广州、深圳',
+            ],
+        ],
+        'required' => ['city'],
+    ]),
+    toolHandler: function ($params) {
+        $city = $params['city'];
+        // Simulate weather data
+        $weatherData = [
+            '北京' => ['temperature' => '25°C', 'condition' => '晴朗', 'humidity' => '45%'],
+            '上海' => ['temperature' => '28°C', 'condition' => '多云', 'humidity' => '60%'],
+            '广州' => ['temperature' => '30°C', 'condition' => '阵雨', 'humidity' => '75%'],
+            '深圳' => ['temperature' => '29°C', 'condition' => '晴朗', 'humidity' => '65%'],
+        ];
+
+        if (isset($weatherData[$city])) {
+            return $weatherData[$city];
+        }
+        return ['error' => '没有找到该城市的天气信息'];
+    }
+);
+
+$toolMessages = [
+    new SystemMessage('你是一位有用的天气助手。当用户询问任何城市的天气信息时，你必须使用 weather 工具来查询天气数据，然后根据查询结果回答用户。'),
+    new UserMessage('请查询上海的天气。'),
+];
+
+$start = microtime(true);
+
+// Use streaming API for tool call
+echo '流式响应:' . PHP_EOL;
+$response = $model->chatStream($toolMessages, 0.7, 0, [], [$weatherTool]);
+
+$streamedContent = '';
+
+// Process streaming response
+/** @var ChatCompletionChoice $choice */
+foreach ($response->getStreamIterator() as $choice) {
+    $message = $choice->getMessage();
+    if ($message instanceof AssistantMessage) {
+        // Collect streamed content
+        $content = $message->getContent();
+        if ($content !== null && $content !== '') {
+            echo $content;
+            $streamedContent .= $content;
+        }
+    }
+}
+
+echo PHP_EOL . PHP_EOL;
+
+// Get complete message after streaming is done
+// After streaming completes, we can get the complete message from choices
+$completeMessage = null;
+$allChoices = $response->getChoices();
+if (! empty($allChoices)) {
+    // Get the last choice which should have the complete message
+    $lastChoice = end($allChoices);
+    $completeMessage = $lastChoice->getMessage();
+}
+
+// Check if there are tool calls
+if ($completeMessage instanceof AssistantMessage) {
+    $toolCalls = $completeMessage->getToolCalls();
+    if (! empty($toolCalls)) {
+        echo '工具调用信息:' . PHP_EOL;
+        foreach ($toolCalls as $toolCall) {
+            echo '- 工具名称: ' . $toolCall->getName() . PHP_EOL;
+            echo '- 参数: ' . json_encode($toolCall->getArguments(), JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) . PHP_EOL;
+        }
+
+        // Simulate tool execution result
+        echo PHP_EOL . '模拟工具执行...' . PHP_EOL;
+
+        // Add assistant's tool call message to conversation
+        $toolMessages[] = $completeMessage;
+
+        // Create tool response message for each tool call
+        foreach ($toolCalls as $toolCall) {
+            // Create tool response message
+            $toolContent = json_encode([
+                'temperature' => '22°C',
+                'condition' => '晴天',
+                'humidity' => '65%',
+                'wind' => '东北风 3级',
+            ]);
+
+            $toolResponseMessage = new ToolMessage($toolContent, $toolCall->getId(), $weatherTool->getName(), $toolCall->getArguments());
+            $toolMessages[] = $toolResponseMessage; // Add tool response
+        }
+
+        // Continue conversation with all tool responses (also streaming)
+        echo PHP_EOL . '助手最终回复（流式）:' . PHP_EOL;
+        $continueResponse = $model->chatStream($toolMessages, 0.7, 0, [], [$weatherTool]);
+
+        $finalContent = '';
+        /** @var ChatCompletionChoice $choice */
+        foreach ($continueResponse->getStreamIterator() as $choice) {
+            $message = $choice->getMessage();
+            if ($message instanceof AssistantMessage) {
+                $content = $message->getContent();
+                if ($content !== null && $content !== '') {
+                    echo $content;
+                    $finalContent .= $content;
+                }
+            }
+        }
+        echo PHP_EOL;
+    } else {
+        echo PHP_EOL . '未检测到工具调用' . PHP_EOL;
+        if (! empty($streamedContent)) {
+            echo '响应内容: ' . $streamedContent . PHP_EOL;
+        }
+    }
+} else {
+    echo PHP_EOL . '响应不是 AssistantMessage 类型' . PHP_EOL;
+}
+
+echo PHP_EOL . '耗时' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/mapper/long_conversation.php b/examples/mapper/long_conversation.php
new file mode 100644
index 0000000..ea601ee
--- /dev/null
+++ b/examples/mapper/long_conversation.php
@@ -0,0 +1,469 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Agent\Tool\ToolUseAgent;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Memory\MemoryManager;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\ModelMapper;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// 创建日志记录器
+$logger = new Logger();
+
+// 初始化模型（通过 ModelMapper，模型配置在配置文件中）
+$modelId = \Hyperf\Support\env('MODEL_MAPPER_TEST_MODEL_ID', '');
+$modelMapper = $container->get(ModelMapper::class);
+$model = $modelMapper->getModel($modelId);
+
+// 定义系统消息（真实、详细的系统提示词，确保达到缓存阈值）
+$systemPrompt = '你是一位资深的AI技术顾问和问题解决专家，拥有超过10年的软件开发和人工智能领域经验。你的专业领域包括但不限于：机器学习、深度学习、自然语言处理、计算机视觉、软件架构设计、系统优化、性能调优、代码审查、技术选型、团队协作和项目管理。
+
+## 核心能力
+1. **技术咨询**：能够深入分析技术问题，提供多角度的解决方案，并评估各种方案的优缺点。
+2. **代码审查**：具备敏锐的代码嗅觉，能够识别潜在的性能问题、安全漏洞和设计缺陷。
+3. **架构设计**：擅长设计可扩展、可维护、高性能的系统架构，熟悉微服务、分布式系统、云原生架构等。
+4. **问题诊断**：能够快速定位复杂技术问题的根本原因，并提供系统性的解决方案。
+5. **知识传递**：善于用通俗易懂的语言解释复杂的技术概念，帮助团队成员提升技术水平。
+
+## 工作原则
+- **准确性优先**：确保提供的信息准确可靠，对于不确定的内容会明确说明。
+- **深入思考**：在回答问题前会充分思考，考虑各种可能性和边界情况。
+- **实用导向**：提供的建议和方案都基于实际项目经验，具有可操作性。
+- **持续学习**：保持对新技术和行业趋势的关注，不断更新知识库。
+- **用户友好**：用清晰、结构化的方式组织回答，便于理解和执行。
+
+## 回答风格
+- 使用结构化的格式（如列表、代码块、表格）来组织信息。
+- 提供具体的代码示例和最佳实践。
+- 解释技术决策背后的原因和考量。
+- 在适当的时候提供相关的参考资料和延伸阅读。
+- 对于复杂问题，会分步骤详细说明。
+
+## 专业领域深度
+在机器学习领域，你熟悉监督学习、无监督学习、强化学习等各类算法，了解神经网络、决策树、支持向量机、聚类算法等的原理和应用场景。在深度学习方面，你精通卷积神经网络、循环神经网络、Transformer架构、注意力机制等前沿技术。
+
+在软件工程方面，你熟悉敏捷开发、DevOps、CI/CD、容器化、Kubernetes、服务网格等现代软件开发实践。你了解各种编程语言的特性和适用场景，包括Python、Java、Go、Rust、JavaScript等。
+
+在系统设计方面，你能够设计高可用、高并发、低延迟的分布式系统，熟悉负载均衡、缓存策略、数据库优化、消息队列、分布式事务等技术。
+
+请始终以专业、负责、友好的态度回答用户的问题，帮助用户解决实际的技术挑战。当需要使用工具时，请明确指出工具的作用和使用步骤。';
+
+// 初始化内存管理器
+$memory = new MemoryManager();
+$memory->addSystemMessage(new SystemMessage($systemPrompt));
+
+// 定义工具 - 代码分析工具
+$codeAnalyzerTool = new ToolDefinition(
+    name: 'code_analyzer',
+    description: '分析代码质量，检测潜在的性能问题、安全漏洞和设计缺陷',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'code' => [
+                'type' => 'string',
+                'description' => '要分析的代码片段',
+            ],
+            'language' => [
+                'type' => 'string',
+                'enum' => ['python', 'java', 'javascript', 'php', 'go', 'rust'],
+                'description' => '编程语言',
+            ],
+            'analysis_type' => [
+                'type' => 'string',
+                'enum' => ['performance', 'security', 'design', 'all'],
+                'description' => '分析类型：性能、安全、设计或全部',
+                'default' => 'all',
+            ],
+        ],
+        'required' => ['code', 'language'],
+    ]),
+    toolHandler: function ($params) {
+        $code = $params['code'];
+        $language = $params['language'];
+        $analysisType = $params['analysis_type'] ?? 'all';
+
+        // 模拟代码分析结果
+        $issues = [];
+
+        if ($analysisType === 'all' || $analysisType === 'performance') {
+            $issues[] = [
+                'type' => 'performance',
+                'severity' => 'medium',
+                'message' => '检测到可能的性能问题：循环中频繁字符串拼接',
+                'suggestion' => '考虑使用 StringBuilder 或类似机制优化',
+            ];
+        }
+
+        if ($analysisType === 'all' || $analysisType === 'security') {
+            $issues[] = [
+                'type' => 'security',
+                'severity' => 'high',
+                'message' => '检测到潜在的安全漏洞：SQL注入风险',
+                'suggestion' => '使用参数化查询或ORM框架',
+            ];
+        }
+
+        if ($analysisType === 'all' || $analysisType === 'design') {
+            $issues[] = [
+                'type' => 'design',
+                'severity' => 'low',
+                'message' => '设计建议：考虑使用设计模式提高代码可维护性',
+                'suggestion' => '可以引入策略模式或工厂模式',
+            ];
+        }
+
+        return [
+            'language' => $language,
+            'analysis_type' => $analysisType,
+            'issues_found' => count($issues),
+            'issues' => $issues,
+            'score' => 75,
+        ];
+    }
+);
+
+// 定义工具 - 技术选型建议工具
+$techSelectionTool = new ToolDefinition(
+    name: 'tech_selection',
+    description: '根据项目需求提供技术选型建议，包括框架、库、工具等的推荐',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'project_type' => [
+                'type' => 'string',
+                'enum' => ['web', 'mobile', 'api', 'microservice', 'data_processing', 'ml'],
+                'description' => '项目类型',
+            ],
+            'requirements' => [
+                'type' => 'string',
+                'description' => '项目需求和约束条件，如性能要求、团队规模、预算等',
+            ],
+            'preferred_language' => [
+                'type' => 'string',
+                'enum' => ['python', 'java', 'javascript', 'php', 'go', 'rust', 'any'],
+                'description' => '首选编程语言，或 any 表示不限',
+                'default' => 'any',
+            ],
+        ],
+        'required' => ['project_type', 'requirements'],
+    ]),
+    toolHandler: function ($params) {
+        $projectType = $params['project_type'];
+        $requirements = $params['requirements'];
+        $preferredLanguage = $params['preferred_language'] ?? 'any';
+
+        // 模拟技术选型建议
+        $recommendations = [
+            'web' => [
+                'framework' => 'React/Vue.js',
+                'backend' => 'Node.js/Express 或 Python/Django',
+                'database' => 'PostgreSQL + Redis',
+                'deployment' => 'Docker + Kubernetes',
+            ],
+            'api' => [
+                'framework' => 'FastAPI (Python) 或 Spring Boot (Java)',
+                'database' => 'PostgreSQL',
+                'cache' => 'Redis',
+                'message_queue' => 'RabbitMQ 或 Kafka',
+            ],
+            'microservice' => [
+                'framework' => 'Go/Gin 或 Java/Spring Cloud',
+                'service_mesh' => 'Istio',
+                'registry' => 'Consul 或 Eureka',
+                'gateway' => 'Kong 或 Zuul',
+            ],
+        ];
+
+        $baseRecommendations = $recommendations[$projectType] ?? [
+            'framework' => '根据具体需求选择',
+            'database' => 'PostgreSQL',
+        ];
+
+        return [
+            'project_type' => $projectType,
+            'recommendations' => $baseRecommendations,
+            'reasoning' => "基于项目类型 {$projectType} 和需求 {$requirements} 的推荐",
+            'alternatives' => [
+                '如果团队熟悉 Java，可以考虑 Spring Boot',
+                '如果追求极致性能，可以考虑 Go 或 Rust',
+            ],
+        ];
+    }
+);
+
+// 定义工具 - 性能优化建议工具
+$performanceOptimizerTool = new ToolDefinition(
+    name: 'performance_optimizer',
+    description: '提供系统性能优化建议，包括数据库优化、缓存策略、代码优化等',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'component' => [
+                'type' => 'string',
+                'enum' => ['database', 'cache', 'api', 'frontend', 'infrastructure'],
+                'description' => '需要优化的组件',
+            ],
+            'current_metrics' => [
+                'type' => 'string',
+                'description' => '当前性能指标，如响应时间、吞吐量、错误率等',
+            ],
+            'target_metrics' => [
+                'type' => 'string',
+                'description' => '目标性能指标',
+            ],
+        ],
+        'required' => ['component', 'current_metrics'],
+    ]),
+    toolHandler: function ($params) {
+        $component = $params['component'];
+        $currentMetrics = $params['current_metrics'];
+        $targetMetrics = $params['target_metrics'] ?? '';
+
+        // 模拟性能优化建议
+        $optimizations = [
+            'database' => [
+                '添加适当的索引',
+                '优化查询语句，避免全表扫描',
+                '考虑使用读写分离',
+                '实施连接池管理',
+                '定期进行数据库维护和清理',
+            ],
+            'cache' => [
+                '实施多级缓存策略（L1/L2/L3）',
+                '设置合理的缓存过期时间',
+                '使用缓存预热机制',
+                '监控缓存命中率',
+                '考虑使用分布式缓存',
+            ],
+            'api' => [
+                '实施请求限流和熔断',
+                '使用异步处理非关键路径',
+                '优化序列化/反序列化',
+                '实施API版本控制',
+                '使用CDN加速静态资源',
+            ],
+        ];
+
+        return [
+            'component' => $component,
+            'current_metrics' => $currentMetrics,
+            'target_metrics' => $targetMetrics,
+            'optimizations' => $optimizations[$component] ?? ['根据具体情况分析'],
+            'priority' => 'high',
+            'estimated_impact' => '预计可提升性能 30-50%',
+        ];
+    }
+);
+
+// 定义工具 - 架构评估工具
+$architectureEvaluatorTool = new ToolDefinition(
+    name: 'architecture_evaluator',
+    description: '评估系统架构设计，提供可扩展性、可维护性、可靠性等方面的建议',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'architecture_type' => [
+                'type' => 'string',
+                'enum' => ['monolith', 'microservices', 'serverless', 'event_driven', 'layered'],
+                'description' => '架构类型',
+            ],
+            'scale_requirement' => [
+                'type' => 'string',
+                'description' => '规模要求，如用户量、并发量、数据量等',
+            ],
+            'team_size' => [
+                'type' => 'integer',
+                'description' => '团队规模',
+            ],
+        ],
+        'required' => ['architecture_type', 'scale_requirement'],
+    ]),
+    toolHandler: function ($params) {
+        $architectureType = $params['architecture_type'];
+        $scaleRequirement = $params['scale_requirement'];
+        $teamSize = $params['team_size'] ?? 5;
+
+        // 模拟架构评估结果
+        return [
+            'architecture_type' => $architectureType,
+            'scalability_score' => 85,
+            'maintainability_score' => 80,
+            'reliability_score' => 90,
+            'cost_score' => 75,
+            'recommendations' => [
+                '考虑引入服务网格以提高可观测性',
+                '实施完善的监控和告警机制',
+                '建立清晰的API契约和版本管理策略',
+                '考虑使用事件驱动架构提高解耦度',
+            ],
+            'risks' => [
+                '分布式事务管理复杂度较高',
+                '需要完善的DevOps基础设施',
+                '团队需要具备微服务开发经验',
+            ],
+        ];
+    }
+);
+
+// 创建带有所有工具的代理
+$agent = new ToolUseAgent(
+    model: $model,
+    memory: $memory,
+    tools: [
+        $codeAnalyzerTool->getName() => $codeAnalyzerTool,
+        $techSelectionTool->getName() => $techSelectionTool,
+        $performanceOptimizerTool->getName() => $performanceOptimizerTool,
+        $architectureEvaluatorTool->getName() => $architectureEvaluatorTool,
+    ],
+    temperature: 0.6,
+    logger: $logger
+);
+
+// 第一轮对话 - 创建缓存
+echo "===== 第一轮对话（创建缓存）=====\n";
+$start1 = microtime(true);
+
+$userMessage1 = new UserMessage('我需要构建一个高并发的API服务，预计日活用户100万，请帮我分析一下技术选型，并评估一下微服务架构是否适合。');
+$response1 = $agent->chat($userMessage1);
+$duration1 = microtime(true) - $start1;
+
+$message1 = $response1->getFirstChoice()->getMessage();
+if ($message1 instanceof AssistantMessage) {
+    echo '助手回复: ' . substr($message1->getContent(), 0, 300) . "...\n";
+}
+$usage1 = $response1->getUsage();
+$inputTokens1 = $usage1?->getPromptTokens() ?? 0;
+$outputTokens1 = $usage1?->getCompletionTokens() ?? 0;
+$totalTokens1 = $usage1?->getTotalTokens() ?? 0;
+$promptDetails1 = $usage1?->getPromptTokensDetails() ?? [];
+
+echo "耗时: {$duration1} 秒\n";
+echo "Input Tokens: {$inputTokens1}, Output Tokens: {$outputTokens1}, Total Tokens: {$totalTokens1}\n\n";
+
+// 第二轮对话 - 使用缓存（对话连续）
+echo "===== 第二轮对话（使用缓存）=====\n";
+$start2 = microtime(true);
+
+$userMessage2 = new UserMessage('基于刚才的建议，如果选择微服务架构，那么数据库应该如何设计？请分析一下性能优化方案。');
+$response2 = $agent->chat($userMessage2);
+$duration2 = microtime(true) - $start2;
+
+$message2 = $response2->getFirstChoice()->getMessage();
+if ($message2 instanceof AssistantMessage) {
+    echo '助手回复: ' . substr($message2->getContent(), 0, 300) . "...\n";
+}
+
+$usage2 = $response2->getUsage();
+$inputTokens2 = $usage2?->getPromptTokens() ?? 0;
+$outputTokens2 = $usage2?->getCompletionTokens() ?? 0;
+$totalTokens2 = $usage2?->getTotalTokens() ?? 0;
+$promptDetails2 = $usage2?->getPromptTokensDetails() ?? [];
+
+echo "耗时: {$duration2} 秒\n";
+echo "Input Tokens: {$inputTokens2}, Output Tokens: {$outputTokens2}, Total Tokens: {$totalTokens2}\n\n";
+
+// 第三轮对话 - 继续使用缓存（对话连续）
+echo "===== 第三轮对话（继续使用缓存）=====\n";
+$start3 = microtime(true);
+
+$userMessage3 = new UserMessage('很好，现在请帮我分析一下这段代码的性能问题：function processData(data) { let result = ""; for (let i = 0; i < data.length; i++) { result += data[i]; } return result; }');
+$response3 = $agent->chat($userMessage3);
+$duration3 = microtime(true) - $start3;
+
+$message3 = $response3->getFirstChoice()->getMessage();
+if ($message3 instanceof AssistantMessage) {
+    echo '助手回复: ' . substr($message3->getContent(), 0, 300) . "...\n";
+}
+
+$usage3 = $response3->getUsage();
+$inputTokens3 = $usage3?->getPromptTokens() ?? 0;
+$outputTokens3 = $usage3?->getCompletionTokens() ?? 0;
+$totalTokens3 = $usage3?->getTotalTokens() ?? 0;
+$promptDetails3 = $usage3?->getPromptTokensDetails() ?? [];
+
+echo "耗时: {$duration3} 秒\n";
+echo "Input Tokens: {$inputTokens3}, Output Tokens: {$outputTokens3}, Total Tokens: {$totalTokens3}\n\n";
+
+// 总结
+echo "===== 缓存效果总结 =====\n";
+echo "第一轮（创建缓存）: {$duration1} 秒, Input Tokens: {$inputTokens1}\n";
+echo "第二轮（使用缓存）: {$duration2} 秒, Input Tokens: {$inputTokens2}\n";
+echo "第三轮（使用缓存）: {$duration3} 秒, Input Tokens: {$inputTokens3}\n\n";
+
+// 分析缓存命中情况
+echo "===== 缓存命中分析 =====\n";
+
+// 检查是否有缓存相关的详细信息
+$cacheReadTokens2 = $promptDetails2['cache_read_input_tokens'] ?? $promptDetails2['cached_tokens'] ?? null;
+$cacheReadTokens3 = $promptDetails3['cache_read_input_tokens'] ?? $promptDetails3['cached_tokens'] ?? null;
+
+if ($cacheReadTokens2 !== null || $cacheReadTokens3 !== null) {
+    // 如果有明确的缓存命中信息
+    if ($cacheReadTokens2 !== null && $cacheReadTokens2 > 0) {
+        echo "第二轮缓存命中: {$cacheReadTokens2} tokens 从缓存读取\n";
+    } else {
+        echo "第二轮缓存命中: 未命中\n";
+    }
+
+    if ($cacheReadTokens3 !== null && $cacheReadTokens3 > 0) {
+        echo "第三轮缓存命中: {$cacheReadTokens3} tokens 从缓存读取\n";
+    } else {
+        echo "第三轮缓存命中: 未命中\n";
+    }
+} else {
+    // 通过比较 input tokens 来判断缓存命中
+    // 如果后续轮次的 input tokens 明显减少，说明使用了缓存
+    if ($inputTokens1 > 0) {
+        $reduction2 = (($inputTokens1 - $inputTokens2) / $inputTokens1) * 100;
+        $reduction3 = (($inputTokens1 - $inputTokens3) / $inputTokens1) * 100;
+
+        if ($inputTokens2 < $inputTokens1 * 0.8) {
+            // 如果减少了超过 20%，认为命中了缓存
+            $savedTokens2 = $inputTokens1 - $inputTokens2;
+            echo "第二轮缓存命中: 通过 Input Tokens 减少判断，节省了 {$savedTokens2} tokens (" . number_format($reduction2, 2) . "%)\n";
+        } else {
+            echo '第二轮缓存命中: 未命中（Input Tokens 变化: ' . number_format($reduction2, 2) . "%）\n";
+        }
+
+        if ($inputTokens3 < $inputTokens1 * 0.8) {
+            $savedTokens3 = $inputTokens1 - $inputTokens3;
+            echo "第三轮缓存命中: 通过 Input Tokens 减少判断，节省了 {$savedTokens3} tokens (" . number_format($reduction3, 2) . "%)\n";
+        } else {
+            echo '第三轮缓存命中: 未命中（Input Tokens 变化: ' . number_format($reduction3, 2) . "%）\n";
+        }
+    }
+}
+
+echo "\n";
+
+// 性能对比
+if ($duration1 > 0) {
+    $speedup2 = (($duration1 - $duration2) / $duration1) * 100;
+    $speedup3 = (($duration1 - $duration3) / $duration1) * 100;
+    echo "===== 性能对比 =====\n";
+    echo '第二轮相比第一轮加速: ' . number_format($speedup2, 2) . "%\n";
+    echo '第三轮相比第一轮加速: ' . number_format($speedup3, 2) . "%\n";
+}
diff --git a/examples/mapper/long_conversation_stream.php b/examples/mapper/long_conversation_stream.php
new file mode 100644
index 0000000..3c02f85
--- /dev/null
+++ b/examples/mapper/long_conversation_stream.php
@@ -0,0 +1,522 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Agent\Tool\ToolUseAgent;
+use Hyperf\Odin\Api\Response\ChatCompletionChoice;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Memory\MemoryManager;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\ModelMapper;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// 创建日志记录器
+$logger = new Logger();
+
+// 初始化模型（通过 ModelMapper，模型配置在配置文件中）
+$modelId = \Hyperf\Support\env('MODEL_MAPPER_TEST_MODEL_ID', '');
+$modelMapper = $container->get(ModelMapper::class);
+$model = $modelMapper->getModel($modelId);
+
+// 定义系统消息（真实、详细的系统提示词，确保达到缓存阈值）
+$systemPrompt = '你是一位资深的AI技术顾问和问题解决专家，拥有超过10年的软件开发和人工智能领域经验。你的专业领域包括但不限于：机器学习、深度学习、自然语言处理、计算机视觉、软件架构设计、系统优化、性能调优、代码审查、技术选型、团队协作和项目管理。
+
+## 核心能力
+1. **技术咨询**：能够深入分析技术问题，提供多角度的解决方案，并评估各种方案的优缺点。
+2. **代码审查**：具备敏锐的代码嗅觉，能够识别潜在的性能问题、安全漏洞和设计缺陷。
+3. **架构设计**：擅长设计可扩展、可维护、高性能的系统架构，熟悉微服务、分布式系统、云原生架构等。
+4. **问题诊断**：能够快速定位复杂技术问题的根本原因，并提供系统性的解决方案。
+5. **知识传递**：善于用通俗易懂的语言解释复杂的技术概念，帮助团队成员提升技术水平。
+
+## 工作原则
+- **准确性优先**：确保提供的信息准确可靠，对于不确定的内容会明确说明。
+- **深入思考**：在回答问题前会充分思考，考虑各种可能性和边界情况。
+- **实用导向**：提供的建议和方案都基于实际项目经验，具有可操作性。
+- **持续学习**：保持对新技术和行业趋势的关注，不断更新知识库。
+- **用户友好**：用清晰、结构化的方式组织回答，便于理解和执行。
+
+## 回答风格
+- 使用结构化的格式（如列表、代码块、表格）来组织信息。
+- 提供具体的代码示例和最佳实践。
+- 解释技术决策背后的原因和考量。
+- 在适当的时候提供相关的参考资料和延伸阅读。
+- 对于复杂问题，会分步骤详细说明。
+
+## 专业领域深度
+在机器学习领域，你熟悉监督学习、无监督学习、强化学习等各类算法，了解神经网络、决策树、支持向量机、聚类算法等的原理和应用场景。在深度学习方面，你精通卷积神经网络、循环神经网络、Transformer架构、注意力机制等前沿技术。
+
+在软件工程方面，你熟悉敏捷开发、DevOps、CI/CD、容器化、Kubernetes、服务网格等现代软件开发实践。你了解各种编程语言的特性和适用场景，包括Python、Java、Go、Rust、JavaScript等。
+
+在系统设计方面，你能够设计高可用、高并发、低延迟的分布式系统，熟悉负载均衡、缓存策略、数据库优化、消息队列、分布式事务等技术。
+
+请始终以专业、负责、友好的态度回答用户的问题，帮助用户解决实际的技术挑战。当需要使用工具时，请明确指出工具的作用和使用步骤。';
+
+// 初始化内存管理器
+$memory = new MemoryManager();
+$memory->addSystemMessage(new SystemMessage($systemPrompt));
+
+// 定义工具 - 代码分析工具
+$codeAnalyzerTool = new ToolDefinition(
+    name: 'code_analyzer',
+    description: '分析代码质量，检测潜在的性能问题、安全漏洞和设计缺陷',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'code' => [
+                'type' => 'string',
+                'description' => '要分析的代码片段',
+            ],
+            'language' => [
+                'type' => 'string',
+                'enum' => ['python', 'java', 'javascript', 'php', 'go', 'rust'],
+                'description' => '编程语言',
+            ],
+            'analysis_type' => [
+                'type' => 'string',
+                'enum' => ['performance', 'security', 'design', 'all'],
+                'description' => '分析类型：性能、安全、设计或全部',
+                'default' => 'all',
+            ],
+        ],
+        'required' => ['code', 'language'],
+    ]),
+    toolHandler: function ($params) {
+        $code = $params['code'];
+        $language = $params['language'];
+        $analysisType = $params['analysis_type'] ?? 'all';
+
+        // 模拟代码分析结果
+        $issues = [];
+
+        if ($analysisType === 'all' || $analysisType === 'performance') {
+            $issues[] = [
+                'type' => 'performance',
+                'severity' => 'medium',
+                'message' => '检测到可能的性能问题：循环中频繁字符串拼接',
+                'suggestion' => '考虑使用 StringBuilder 或类似机制优化',
+            ];
+        }
+
+        if ($analysisType === 'all' || $analysisType === 'security') {
+            $issues[] = [
+                'type' => 'security',
+                'severity' => 'high',
+                'message' => '检测到潜在的安全漏洞：SQL注入风险',
+                'suggestion' => '使用参数化查询或ORM框架',
+            ];
+        }
+
+        if ($analysisType === 'all' || $analysisType === 'design') {
+            $issues[] = [
+                'type' => 'design',
+                'severity' => 'low',
+                'message' => '设计建议：考虑使用设计模式提高代码可维护性',
+                'suggestion' => '可以引入策略模式或工厂模式',
+            ];
+        }
+
+        return [
+            'language' => $language,
+            'analysis_type' => $analysisType,
+            'issues_found' => count($issues),
+            'issues' => $issues,
+            'score' => 75,
+        ];
+    }
+);
+
+// 定义工具 - 技术选型建议工具
+$techSelectionTool = new ToolDefinition(
+    name: 'tech_selection',
+    description: '根据项目需求提供技术选型建议，包括框架、库、工具等的推荐',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'project_type' => [
+                'type' => 'string',
+                'enum' => ['web', 'mobile', 'api', 'microservice', 'data_processing', 'ml'],
+                'description' => '项目类型',
+            ],
+            'requirements' => [
+                'type' => 'string',
+                'description' => '项目需求和约束条件，如性能要求、团队规模、预算等',
+            ],
+            'preferred_language' => [
+                'type' => 'string',
+                'enum' => ['python', 'java', 'javascript', 'php', 'go', 'rust', 'any'],
+                'description' => '首选编程语言，或 any 表示不限',
+                'default' => 'any',
+            ],
+        ],
+        'required' => ['project_type', 'requirements'],
+    ]),
+    toolHandler: function ($params) {
+        $projectType = $params['project_type'];
+        $requirements = $params['requirements'];
+        $preferredLanguage = $params['preferred_language'] ?? 'any';
+
+        // 模拟技术选型建议
+        $recommendations = [
+            'web' => [
+                'framework' => 'React/Vue.js',
+                'backend' => 'Node.js/Express 或 Python/Django',
+                'database' => 'PostgreSQL + Redis',
+                'deployment' => 'Docker + Kubernetes',
+            ],
+            'api' => [
+                'framework' => 'FastAPI (Python) 或 Spring Boot (Java)',
+                'database' => 'PostgreSQL',
+                'cache' => 'Redis',
+                'message_queue' => 'RabbitMQ 或 Kafka',
+            ],
+            'microservice' => [
+                'framework' => 'Go/Gin 或 Java/Spring Cloud',
+                'service_mesh' => 'Istio',
+                'registry' => 'Consul 或 Eureka',
+                'gateway' => 'Kong 或 Zuul',
+            ],
+        ];
+
+        $baseRecommendations = $recommendations[$projectType] ?? [
+            'framework' => '根据具体需求选择',
+            'database' => 'PostgreSQL',
+        ];
+
+        return [
+            'project_type' => $projectType,
+            'recommendations' => $baseRecommendations,
+            'reasoning' => "基于项目类型 {$projectType} 和需求 {$requirements} 的推荐",
+            'alternatives' => [
+                '如果团队熟悉 Java，可以考虑 Spring Boot',
+                '如果追求极致性能，可以考虑 Go 或 Rust',
+            ],
+        ];
+    }
+);
+
+// 定义工具 - 性能优化建议工具
+$performanceOptimizerTool = new ToolDefinition(
+    name: 'performance_optimizer',
+    description: '提供系统性能优化建议，包括数据库优化、缓存策略、代码优化等',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'component' => [
+                'type' => 'string',
+                'enum' => ['database', 'cache', 'api', 'frontend', 'infrastructure'],
+                'description' => '需要优化的组件',
+            ],
+            'current_metrics' => [
+                'type' => 'string',
+                'description' => '当前性能指标，如响应时间、吞吐量、错误率等',
+            ],
+            'target_metrics' => [
+                'type' => 'string',
+                'description' => '目标性能指标',
+            ],
+        ],
+        'required' => ['component', 'current_metrics'],
+    ]),
+    toolHandler: function ($params) {
+        $component = $params['component'];
+        $currentMetrics = $params['current_metrics'];
+        $targetMetrics = $params['target_metrics'] ?? '';
+
+        // 模拟性能优化建议
+        $optimizations = [
+            'database' => [
+                '添加适当的索引',
+                '优化查询语句，避免全表扫描',
+                '考虑使用读写分离',
+                '实施连接池管理',
+                '定期进行数据库维护和清理',
+            ],
+            'cache' => [
+                '实施多级缓存策略（L1/L2/L3）',
+                '设置合理的缓存过期时间',
+                '使用缓存预热机制',
+                '监控缓存命中率',
+                '考虑使用分布式缓存',
+            ],
+            'api' => [
+                '实施请求限流和熔断',
+                '使用异步处理非关键路径',
+                '优化序列化/反序列化',
+                '实施API版本控制',
+                '使用CDN加速静态资源',
+            ],
+        ];
+
+        return [
+            'component' => $component,
+            'current_metrics' => $currentMetrics,
+            'target_metrics' => $targetMetrics,
+            'optimizations' => $optimizations[$component] ?? ['根据具体情况分析'],
+            'priority' => 'high',
+            'estimated_impact' => '预计可提升性能 30-50%',
+        ];
+    }
+);
+
+// 定义工具 - 架构评估工具
+$architectureEvaluatorTool = new ToolDefinition(
+    name: 'architecture_evaluator',
+    description: '评估系统架构设计，提供可扩展性、可维护性、可靠性等方面的建议',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'architecture_type' => [
+                'type' => 'string',
+                'enum' => ['monolith', 'microservices', 'serverless', 'event_driven', 'layered'],
+                'description' => '架构类型',
+            ],
+            'scale_requirement' => [
+                'type' => 'string',
+                'description' => '规模要求，如用户量、并发量、数据量等',
+            ],
+            'team_size' => [
+                'type' => 'integer',
+                'description' => '团队规模',
+            ],
+        ],
+        'required' => ['architecture_type', 'scale_requirement'],
+    ]),
+    toolHandler: function ($params) {
+        $architectureType = $params['architecture_type'];
+        $scaleRequirement = $params['scale_requirement'];
+        $teamSize = $params['team_size'] ?? 5;
+
+        // 模拟架构评估结果
+        return [
+            'architecture_type' => $architectureType,
+            'scalability_score' => 85,
+            'maintainability_score' => 80,
+            'reliability_score' => 90,
+            'cost_score' => 75,
+            'recommendations' => [
+                '考虑引入服务网格以提高可观测性',
+                '实施完善的监控和告警机制',
+                '建立清晰的API契约和版本管理策略',
+                '考虑使用事件驱动架构提高解耦度',
+            ],
+            'risks' => [
+                '分布式事务管理复杂度较高',
+                '需要完善的DevOps基础设施',
+                '团队需要具备微服务开发经验',
+            ],
+        ];
+    }
+);
+
+// 创建带有所有工具的代理
+$agent = new ToolUseAgent(
+    model: $model,
+    memory: $memory,
+    tools: [
+        $codeAnalyzerTool->getName() => $codeAnalyzerTool,
+        $techSelectionTool->getName() => $techSelectionTool,
+        $performanceOptimizerTool->getName() => $performanceOptimizerTool,
+        $architectureEvaluatorTool->getName() => $architectureEvaluatorTool,
+    ],
+    temperature: 0.6,
+    logger: $logger
+);
+
+// 第一轮对话 - 创建缓存（流式）
+echo "===== 第一轮对话（创建缓存 - 流式）=====\n";
+$start1 = microtime(true);
+
+$userMessage1 = new UserMessage('我需要构建一个高并发的API服务，预计日活用户100万，请帮我分析一下技术选型，并评估一下微服务架构是否适合。');
+$response1 = $agent->chatStreamed($userMessage1);
+
+$content1 = '';
+/** @var ChatCompletionChoice $choice */
+foreach ($response1 as $choice) {
+    $delta = $choice->getMessage()->getContent();
+    if ($delta !== null) {
+        echo $delta;
+        $content1 .= $delta;
+    }
+}
+$duration1 = microtime(true) - $start1;
+
+// 流式响应完成后，尝试获取 usage 信息
+$usage1 = null;
+if (method_exists($response1, 'getUsage')) {
+    $usage1 = $response1->getUsage();
+}
+$inputTokens1 = $usage1?->getPromptTokens() ?? 0;
+$outputTokens1 = $usage1?->getCompletionTokens() ?? 0;
+$totalTokens1 = $usage1?->getTotalTokens() ?? 0;
+$promptDetails1 = $usage1?->getPromptTokensDetails() ?? [];
+
+echo "\n耗时: {$duration1} 秒\n";
+if ($inputTokens1 > 0) {
+    echo "Input Tokens: {$inputTokens1}, Output Tokens: {$outputTokens1}, Total Tokens: {$totalTokens1}\n";
+} else {
+    echo "Tokens: N/A (流式响应中 usage 信息可能不可用)\n";
+}
+echo "\n";
+
+// 第二轮对话 - 使用缓存（对话连续，流式）
+echo "===== 第二轮对话（使用缓存 - 流式）=====\n";
+$start2 = microtime(true);
+
+$userMessage2 = new UserMessage('基于刚才的建议，如果选择微服务架构，那么数据库应该如何设计？请分析一下性能优化方案。');
+$response2 = $agent->chatStreamed($userMessage2);
+
+$content2 = '';
+/** @var ChatCompletionChoice $choice */
+foreach ($response2 as $choice) {
+    $delta = $choice->getMessage()->getContent();
+    if ($delta !== null) {
+        echo $delta;
+        $content2 .= $delta;
+    }
+}
+$duration2 = microtime(true) - $start2;
+
+$usage2 = null;
+if (method_exists($response2, 'getUsage')) {
+    $usage2 = $response2->getUsage();
+}
+$inputTokens2 = $usage2?->getPromptTokens() ?? 0;
+$outputTokens2 = $usage2?->getCompletionTokens() ?? 0;
+$totalTokens2 = $usage2?->getTotalTokens() ?? 0;
+$promptDetails2 = $usage2?->getPromptTokensDetails() ?? [];
+
+echo "\n耗时: {$duration2} 秒\n";
+if ($inputTokens2 > 0) {
+    echo "Input Tokens: {$inputTokens2}, Output Tokens: {$outputTokens2}, Total Tokens: {$totalTokens2}\n";
+} else {
+    echo "Tokens: N/A (流式响应中 usage 信息可能不可用)\n";
+}
+echo "\n";
+
+// 第三轮对话 - 继续使用缓存（对话连续，流式）
+echo "===== 第三轮对话（继续使用缓存 - 流式）=====\n";
+$start3 = microtime(true);
+
+$userMessage3 = new UserMessage('很好，现在请帮我分析一下这段代码的性能问题：function processData(data) { let result = ""; for (let i = 0; i < data.length; i++) { result += data[i]; } return result; }');
+$response3 = $agent->chatStreamed($userMessage3);
+
+$content3 = '';
+/** @var ChatCompletionChoice $choice */
+foreach ($response3 as $choice) {
+    $delta = $choice->getMessage()->getContent();
+    if ($delta !== null) {
+        echo $delta;
+        $content3 .= $delta;
+    }
+}
+$duration3 = microtime(true) - $start3;
+
+$usage3 = null;
+if (method_exists($response3, 'getUsage')) {
+    $usage3 = $response3->getUsage();
+}
+$inputTokens3 = $usage3?->getPromptTokens() ?? 0;
+$outputTokens3 = $usage3?->getCompletionTokens() ?? 0;
+$totalTokens3 = $usage3?->getTotalTokens() ?? 0;
+$promptDetails3 = $usage3?->getPromptTokensDetails() ?? [];
+
+echo "\n耗时: {$duration3} 秒\n";
+if ($inputTokens3 > 0) {
+    echo "Input Tokens: {$inputTokens3}, Output Tokens: {$outputTokens3}, Total Tokens: {$totalTokens3}\n";
+} else {
+    echo "Tokens: N/A (流式响应中 usage 信息可能不可用)\n";
+}
+echo "\n";
+
+// 总结
+echo "===== 缓存效果总结 =====\n";
+echo "第一轮（创建缓存）: {$duration1} 秒";
+if ($inputTokens1 > 0) {
+    echo ", Input Tokens: {$inputTokens1}";
+}
+echo "\n";
+echo "第二轮（使用缓存）: {$duration2} 秒";
+if ($inputTokens2 > 0) {
+    echo ", Input Tokens: {$inputTokens2}";
+}
+echo "\n";
+echo "第三轮（使用缓存）: {$duration3} 秒";
+if ($inputTokens3 > 0) {
+    echo ", Input Tokens: {$inputTokens3}";
+}
+echo "\n\n";
+
+// 分析缓存命中情况（仅在 usage 信息可用时）
+if ($inputTokens1 > 0 && ($inputTokens2 > 0 || $inputTokens3 > 0)) {
+    echo "===== 缓存命中分析 =====\n";
+
+    // 检查是否有缓存相关的详细信息
+    $cacheReadTokens2 = $promptDetails2['cache_read_input_tokens'] ?? $promptDetails2['cached_tokens'] ?? null;
+    $cacheReadTokens3 = $promptDetails3['cache_read_input_tokens'] ?? $promptDetails3['cached_tokens'] ?? null;
+
+    if ($cacheReadTokens2 !== null || $cacheReadTokens3 !== null) {
+        // 如果有明确的缓存命中信息
+        if ($cacheReadTokens2 !== null && $cacheReadTokens2 > 0) {
+            echo "第二轮缓存命中: {$cacheReadTokens2} tokens 从缓存读取\n";
+        } else {
+            echo "第二轮缓存命中: 未命中\n";
+        }
+
+        if ($cacheReadTokens3 !== null && $cacheReadTokens3 > 0) {
+            echo "第三轮缓存命中: {$cacheReadTokens3} tokens 从缓存读取\n";
+        } else {
+            echo "第三轮缓存命中: 未命中\n";
+        }
+    } else {
+        // 通过比较 input tokens 来判断缓存命中
+        if ($inputTokens1 > 0 && $inputTokens2 > 0) {
+            $reduction2 = (($inputTokens1 - $inputTokens2) / $inputTokens1) * 100;
+            if ($inputTokens2 < $inputTokens1 * 0.8) {
+                $savedTokens2 = $inputTokens1 - $inputTokens2;
+                echo "第二轮缓存命中: 通过 Input Tokens 减少判断，节省了 {$savedTokens2} tokens (" . number_format($reduction2, 2) . "%)\n";
+            } else {
+                echo '第二轮缓存命中: 未命中（Input Tokens 变化: ' . number_format($reduction2, 2) . "%）\n";
+            }
+        }
+
+        if ($inputTokens1 > 0 && $inputTokens3 > 0) {
+            $reduction3 = (($inputTokens1 - $inputTokens3) / $inputTokens1) * 100;
+            if ($inputTokens3 < $inputTokens1 * 0.8) {
+                $savedTokens3 = $inputTokens1 - $inputTokens3;
+                echo "第三轮缓存命中: 通过 Input Tokens 减少判断，节省了 {$savedTokens3} tokens (" . number_format($reduction3, 2) . "%)\n";
+            } else {
+                echo '第三轮缓存命中: 未命中（Input Tokens 变化: ' . number_format($reduction3, 2) . "%）\n";
+            }
+        }
+    }
+    echo "\n";
+}
+
+// 性能对比
+if ($duration1 > 0) {
+    $speedup2 = (($duration1 - $duration2) / $duration1) * 100;
+    $speedup3 = (($duration1 - $duration3) / $duration1) * 100;
+    echo "===== 性能对比 =====\n";
+    echo '第二轮相比第一轮加速: ' . number_format($speedup2, 2) . "%\n";
+    echo '第三轮相比第一轮加速: ' . number_format($speedup3, 2) . "%\n";
+}
diff --git a/examples/mapper/tool_use_agent_stream.php b/examples/mapper/tool_use_agent_stream.php
index b9b4e97..dfa037e 100644
--- a/examples/mapper/tool_use_agent_stream.php
+++ b/examples/mapper/tool_use_agent_stream.php
@@ -269,7 +269,7 @@ protected function handle(array $parameters): array
 echo "===== 顺序流式工具调用示例 =====\n";
 $start = microtime(true);
 
-$userMessage = new UserMessage('先获取当前系统时间，再计算 7 的 3 次方，然后查询用户ID为2的信息，最后根据查询结果推荐一些科幻电影。请详细说明每一步。');
+$userMessage = new UserMessage('先获取当前系统时间，再计算 7 的 3 次方，然后查询用户ID为2的信息，最后根据查询结果推荐一些科幻电影。请详细说明每一步。在最后进行总结');
 $response = $agent->chatStreamed($userMessage);
 
 $content = '';
diff --git a/examples/mapper/vision.php b/examples/mapper/vision.php
new file mode 100644
index 0000000..16c0be8
--- /dev/null
+++ b/examples/mapper/vision.php
@@ -0,0 +1,53 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Message\UserMessageContent;
+use Hyperf\Odin\ModelMapper;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// 创建日志记录器
+$logger = new Logger();
+
+// 初始化模型
+$modelId = \Hyperf\Support\env('MODEL_MAPPER_TEST_MODEL_ID', '');
+$modelMapper = $container->get(ModelMapper::class);
+$model = $modelMapper->getModel($modelId);
+
+$userMessage = new UserMessage();
+$userMessage->addContent(UserMessageContent::text('请分析下面图片中的内容，并描述其主要元素和可能的用途。'));
+$userMessage->addContent(UserMessageContent::imageUrl('https://tos-tools.tos-cn-beijing.volces.com/misc/sample1.jpg'));
+
+$start = microtime(true);
+
+// 使用非流式API调用
+$response = $model->chat([$userMessage]);
+
+// 输出完整响应
+$message = $response->getFirstChoice()->getMessage();
+if ($message instanceof AssistantMessage) {
+    echo $message->getReasoningContent() ?? $message->getContent();
+}
+
+echo PHP_EOL;
+echo '耗时' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/mapper/vision_base64.php b/examples/mapper/vision_base64.php
new file mode 100644
index 0000000..2c0ff4a
--- /dev/null
+++ b/examples/mapper/vision_base64.php
@@ -0,0 +1,62 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Message\UserMessageContent;
+use Hyperf\Odin\ModelMapper;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// Create logger
+$logger = new Logger();
+
+// Initialize model
+$modelId = \Hyperf\Support\env('MODEL_MAPPER_TEST_MODEL_ID', '');
+$modelMapper = $container->get(ModelMapper::class);
+$model = $modelMapper->getModel($modelId);
+
+// Convert image URL to base64 format
+$imageUrl = 'https://tos-tools.tos-cn-beijing.volces.com/misc/sample1.jpg';
+$imageData = file_get_contents($imageUrl);
+$base64Image = base64_encode($imageData);
+$imageType = 'image/jpeg'; // Default to jpeg, or detect from URL/headers if needed
+$dataUrl = "data:{$imageType};base64,{$base64Image}";
+
+echo '已将图像转换为 base64 格式' . PHP_EOL;
+
+$userMessage = new UserMessage();
+$userMessage->addContent(UserMessageContent::text('请分析下面图片中的内容，并描述其主要元素和可能的用途。'));
+$userMessage->addContent(UserMessageContent::imageUrl($dataUrl));
+
+$start = microtime(true);
+
+// Use non-streaming API
+$response = $model->chat([$userMessage]);
+
+// Output complete response
+$message = $response->getFirstChoice()->getMessage();
+if ($message instanceof AssistantMessage) {
+    echo $message->getReasoningContent() ?? $message->getContent();
+}
+
+echo PHP_EOL;
+echo '耗时' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/mapper/vision_stream.php b/examples/mapper/vision_stream.php
new file mode 100644
index 0000000..c7f5338
--- /dev/null
+++ b/examples/mapper/vision_stream.php
@@ -0,0 +1,57 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Api\Response\ChatCompletionChoice;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Message\UserMessageContent;
+use Hyperf\Odin\ModelMapper;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// Create logger
+$logger = new Logger();
+
+// Initialize model
+$modelId = \Hyperf\Support\env('MODEL_MAPPER_TEST_MODEL_ID', '');
+$modelMapper = $container->get(ModelMapper::class);
+$model = $modelMapper->getModel($modelId);
+
+$userMessage = new UserMessage();
+$userMessage->addContent(UserMessageContent::text('请分析下面图片中的内容，并描述其主要元素和可能的用途。'));
+$userMessage->addContent(UserMessageContent::imageUrl('https://tos-tools.tos-cn-beijing.volces.com/misc/sample1.jpg'));
+
+$start = microtime(true);
+
+// Use streaming API
+$response = $model->chatStream([$userMessage]);
+
+// Output streaming response
+/** @var ChatCompletionChoice $choice */
+foreach ($response->getStreamIterator() as $choice) {
+    $message = $choice->getMessage();
+    if ($message instanceof AssistantMessage) {
+        echo $message->getReasoningContent() ?? $message->getContent();
+    }
+}
+
+echo PHP_EOL;
+echo '耗时' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/mapper/vision_stream_base64.php b/examples/mapper/vision_stream_base64.php
new file mode 100644
index 0000000..45936e8
--- /dev/null
+++ b/examples/mapper/vision_stream_base64.php
@@ -0,0 +1,66 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Api\Response\ChatCompletionChoice;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Message\UserMessageContent;
+use Hyperf\Odin\ModelMapper;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+
+// Create logger
+$logger = new Logger();
+
+// Initialize model
+$modelId = \Hyperf\Support\env('MODEL_MAPPER_TEST_MODEL_ID', '');
+$modelMapper = $container->get(ModelMapper::class);
+$model = $modelMapper->getModel($modelId);
+
+// Convert image URL to base64 format
+$imageUrl = 'https://tos-tools.tos-cn-beijing.volces.com/misc/sample1.jpg';
+$imageData = file_get_contents($imageUrl);
+$base64Image = base64_encode($imageData);
+$imageType = 'image/jpeg'; // Default to jpeg, or detect from URL/headers if needed
+$dataUrl = "data:{$imageType};base64,{$base64Image}";
+
+echo '已将图像转换为 base64 格式' . PHP_EOL;
+
+$userMessage = new UserMessage();
+$userMessage->addContent(UserMessageContent::text('请分析下面图片中的内容，并描述其主要元素和可能的用途。'));
+$userMessage->addContent(UserMessageContent::imageUrl($dataUrl));
+
+$start = microtime(true);
+
+// Use streaming API
+$response = $model->chatStream([$userMessage]);
+
+// Output streaming response
+/** @var ChatCompletionChoice $choice */
+foreach ($response->getStreamIterator() as $choice) {
+    $message = $choice->getMessage();
+    if ($message instanceof AssistantMessage) {
+        echo $message->getReasoningContent() ?? $message->getContent();
+    }
+}
+
+echo PHP_EOL;
+echo '耗时' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/examples/openai/openai_tool_use_agent.php b/examples/openai/openai_tool_use_agent.php
new file mode 100644
index 0000000..a9a1478
--- /dev/null
+++ b/examples/openai/openai_tool_use_agent.php
@@ -0,0 +1,315 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+! defined('BASE_PATH') && define('BASE_PATH', dirname(__DIR__, 2));
+
+require_once dirname(__FILE__, 3) . '/vendor/autoload.php';
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Di\ClassLoader;
+use Hyperf\Di\Container;
+use Hyperf\Di\Definition\DefinitionSourceFactory;
+use Hyperf\Odin\Agent\Tool\ToolUseAgent;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Factory\ModelFactory;
+use Hyperf\Odin\Logger;
+use Hyperf\Odin\Memory\MemoryManager;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Model\AzureOpenAIModel;
+use Hyperf\Odin\Model\ModelOptions;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Tool\Definition\ToolParameters;
+
+use function Hyperf\Support\env;
+
+ClassLoader::init();
+$container = ApplicationContext::setContainer(new Container((new DefinitionSourceFactory())()));
+$logger = new Logger();
+
+// 初始化模型
+$model = ModelFactory::create(
+    implementation: AzureOpenAIModel::class,
+    modelName: 'gpt-5-global',
+    config: [
+        'api_key' => env('AZURE_OPENAI_GPT5_API_KEY'),
+        'api_base' => env('AZURE_OPENAI_GPT5_API_BASE'),
+        'api_version' => env('AZURE_OPENAI_GPT5_API_VERSION'),
+        'deployment_name' => env('AZURE_OPENAI_GPT5_DEPLOYMENT_NAME'),
+    ],
+    modelOptions: ModelOptions::fromArray([
+        'chat' => true,
+        'function_call' => true,
+        'embedding' => false,
+        'multi_modal' => true,
+        'vector_size' => 0,
+    ]),
+    apiOptions: ApiOptions::fromArray([
+        'timeout' => [
+            'connection' => 5.0,  // 连接超时（秒）
+            'write' => 10.0,      // 写入超时（秒）
+            'read' => 300.0,      // 读取超时（秒）
+            'total' => 350.0,     // 总体超时（秒）
+            'thinking' => 120.0,  // 思考超时（秒）
+            'stream_chunk' => 30.0, // 流式块间超时（秒）
+            'stream_first' => 60.0, // 首个流式块超时（秒）
+        ],
+        'custom_error_mapping_rules' => [],
+    ]),
+    logger: $logger
+);
+
+// 初始化内存管理器
+$memory = new MemoryManager();
+$systemPrompt = '你是一个专业且智能的AI助手，具备丰富的知识库和强大的工具使用能力。你的主要职责是帮助用户解决各种问题，并在需要时合理使用可用的工具来提供准确、及时的信息和服务。
+
+## 工具使用原则
+
+### 1. 工具选择策略
+- 当用户的需求需要实时数据、精确计算或特定功能时，优先考虑使用相应的工具
+- 在使用工具前，先分析用户需求，选择最合适的工具组合
+- 对于复杂任务，可以按逻辑顺序使用多个工具
+- 如果某个工具无法满足需求，主动说明原因并提供替代方案
+
+### 2. 工具调用规范
+- 使用工具前，向用户清楚说明将要使用的工具及其作用
+- 调用工具时确保参数正确完整，避免错误调用
+- 工具返回结果后，对结果进行解读和总结
+- 如果工具返回错误，要向用户说明错误原因并提供解决建议
+
+### 3. 响应格式要求
+- 回复结构清晰，逻辑层次分明
+- 使用工具时采用以下格式：
+  1. 说明即将使用的工具和原因
+  2. 调用工具并展示结果
+  3. 对结果进行分析和解释
+  4. 根据结果给出最终答案或建议
+
+## 可用工具说明
+
+### 计算器工具 (calculator)
+功能：执行基本数学运算（加、减、乘、除）
+使用场景：需要进行精确数学计算时
+参数要求：
+- operation: 运算类型（add/subtract/multiply/divide）
+- a: 第一个操作数
+- b: 第二个操作数
+
+### 天气查询工具 (weather)
+功能：查询指定城市的天气信息
+使用场景：用户询问天气情况时
+参数要求：
+- city: 城市名称
+注意：当前支持北京、上海、广州、深圳等主要城市
+
+### 翻译工具 (translate)
+功能：将文本从一种语言翻译成另一种语言
+使用场景：用户需要翻译服务时
+参数要求：
+- text: 要翻译的文本内容
+- target_language: 目标语言
+
+## 交互指导原则
+
+### 1. 用户体验优先
+- 始终保持友好、专业的对话态度
+- 主动了解用户需求，提供个性化服务
+- 回复要简洁明了，避免冗余信息
+- 对于复杂问题，提供分步解决方案
+
+### 2. 准确性保证
+- 使用工具获得的数据要如实呈现
+- 对于无法确定的信息，明确说明不确定性
+- 区分事实信息和推测内容
+- 承认知识局限性，必要时建议用户咨询专业人士
+
+### 3. 安全和隐私
+- 保护用户隐私，不泄露敏感信息
+- 对于涉及安全的操作，提供必要的警告和建议
+- 拒绝执行可能造成危害的请求
+- 遵守相关法律法规和道德规范
+
+### 4. 持续学习
+- 从用户反馈中改进服务质量
+- 灵活应对各种场景和需求
+- 保持开放心态，接受新的挑战
+- 不断优化工具使用效率
+
+## 特殊情况处理
+
+### 工具故障处理
+- 如果工具调用失败，立即向用户说明情况
+- 提供人工替代方案或建议重试
+- 记录问题详情，便于后续改进
+
+### 多工具协作
+- 合理规划工具使用顺序
+- 确保前一个工具的输出能为下一个工具提供有效输入
+- 对整个工具链的执行过程进行监控和优化
+
+### 异常情况应对
+- 面对超出工具能力范围的需求，诚实说明限制
+- 提供可行的替代解决方案
+- 引导用户调整需求或寻求其他帮助渠道
+
+通过以上原则和规范，我将为你提供高质量、可靠的智能助手服务。请随时告诉我你的需求，我会选择最合适的方式来帮助你。';
+
+$memory->addSystemMessage(new SystemMessage($systemPrompt));
+
+// 定义多个工具
+// 计算器工具
+$calculatorTool = new ToolDefinition(
+    name: 'calculator',
+    description: '用于执行基本数学运算的计算器工具',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'operation' => [
+                'type' => 'string',
+                'enum' => ['add', 'subtract', 'multiply', 'divide'],
+                'description' => '要执行的数学运算类型',
+            ],
+            'a' => [
+                'type' => 'number',
+                'description' => '第一个操作数',
+            ],
+            'b' => [
+                'type' => 'number',
+                'description' => '第二个操作数',
+            ],
+        ],
+        'required' => ['operation', 'a', 'b'],
+    ]),
+    toolHandler: function ($params) {
+        $a = $params['a'];
+        $b = $params['b'];
+        switch ($params['operation']) {
+            case 'add':
+                return ['result' => $a + $b];
+            case 'subtract':
+                return ['result' => $a - $b];
+            case 'multiply':
+                return ['result' => $a * $b];
+            case 'divide':
+                if ($b == 0) {
+                    return ['error' => '除数不能为零'];
+                }
+                return ['result' => $a / $b];
+            default:
+                return ['error' => '未知操作'];
+        }
+    }
+);
+
+// 天气查询工具 (模拟)
+$weatherTool = new ToolDefinition(
+    name: 'weather',
+    description: '查询指定城市的天气信息',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'city' => [
+                'type' => 'string',
+                'description' => '要查询天气的城市名称',
+            ],
+        ],
+        'required' => ['city'],
+    ]),
+    toolHandler: function ($params) {
+        $city = $params['city'];
+        // 模拟天气数据
+        $weatherData = [
+            '北京' => ['temperature' => '25°C', 'condition' => '晴朗', 'humidity' => '45%'],
+            '上海' => ['temperature' => '28°C', 'condition' => '多云', 'humidity' => '60%'],
+            '广州' => ['temperature' => '30°C', 'condition' => '阵雨', 'humidity' => '75%'],
+            '深圳' => ['temperature' => '29°C', 'condition' => '晴朗', 'humidity' => '65%'],
+        ];
+
+        if (isset($weatherData[$city])) {
+            return $weatherData[$city];
+        }
+        return ['error' => '没有找到该城市的天气信息'];
+    }
+);
+
+// 翻译工具 (模拟)
+$translateTool = new ToolDefinition(
+    name: 'translate',
+    description: '将文本从一种语言翻译成另一种语言',
+    parameters: ToolParameters::fromArray([
+        'type' => 'object',
+        'properties' => [
+            'text' => [
+                'type' => 'string',
+                'description' => '要翻译的文本',
+            ],
+            'target_language' => [
+                'type' => 'string',
+                'description' => '目标语言，例如：英语、中文、日语等',
+            ],
+        ],
+        'required' => ['text', 'target_language'],
+    ]),
+    toolHandler: function ($params) {
+        $text = $params['text'];
+        $targetLanguage = $params['target_language'];
+
+        // 模拟翻译结果
+        $translations = [
+            '你好' => [
+                '英语' => 'Hello',
+                '日语' => 'こんにちは',
+                '法语' => 'Bonjour',
+            ],
+            'Hello' => [
+                '中文' => '你好',
+                '日语' => 'こんにちは',
+                '法语' => 'Bonjour',
+            ],
+        ];
+
+        if (isset($translations[$text][$targetLanguage])) {
+            return ['translated_text' => $translations[$text][$targetLanguage]];
+        }
+
+        // 如果没有预设的翻译，返回原文加上模拟的后缀
+        return ['translated_text' => $text . ' (已翻译为' . $targetLanguage . ')', 'note' => '这是模拟翻译'];
+    }
+);
+
+// 创建带有所有工具的代理
+$agent = new ToolUseAgent(
+    model: $model,
+    memory: $memory,
+    tools: [
+        $calculatorTool->getName() => $calculatorTool,
+        $weatherTool->getName() => $weatherTool,
+        $translateTool->getName() => $translateTool,
+    ],
+    temperature: 1,
+    logger: $logger
+);
+
+// 顺序调用示例
+echo "===== 顺序工具调用示例 =====\n";
+$start = microtime(true);
+
+$userMessage = new UserMessage('请计算 23 × 45，然后查询北京的天气，最后将"你好"翻译成英语。请详细说明每一步。');
+$response = $agent->chat($userMessage);
+
+$message = $response->getFirstChoice()->getMessage();
+if ($message instanceof AssistantMessage) {
+    echo $message->getContent();
+}
+
+echo "\n";
+echo '顺序调用耗时：' . (microtime(true) - $start) . '秒' . PHP_EOL;
diff --git a/publish/odin.php b/publish/odin.php
index 0448f18..fd84a04 100644
--- a/publish/odin.php
+++ b/publish/odin.php
@@ -36,6 +36,7 @@
                 'thinking' => 120.0,  // 思考超时（秒）
                 'stream_chunk' => 30.0, // 流式块间超时（秒）
                 'stream_first' => 60.0, // 首个流式块超时（秒）
+                'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
             ],
             'custom_error_mapping_rules' => [],
             /**
@@ -121,6 +122,8 @@
                 ],
                 // 是否启用字段白名单过滤，默认true（启用过滤）
                 'enable_whitelist' => env('ODIN_LOG_WHITELIST_ENABLED', true),
+                // 最大字符串长度限制，超过此长度的字符串将被替换为 [Long Text]，设置为 0 表示不限制
+                'max_text_length' => env('ODIN_LOG_MAX_TEXT_LENGTH', 2000),
             ],
             'network_retry_count' => 0,
         ],
@@ -149,6 +152,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -177,6 +181,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -203,6 +208,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -230,6 +236,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -257,6 +264,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -284,6 +292,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -311,6 +320,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 3600.0, // 流式总超时（秒，1小时）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -338,6 +348,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'custom_error_mapping_rules' => [],
                 ],
@@ -366,6 +377,7 @@
                         'thinking' => 120.0,  // 思考超时（秒）
                         'stream_chunk' => 30.0, // 流式块间超时（秒）
                         'stream_first' => 60.0, // 首个流式块超时（秒）
+                        'stream_total' => 600.0, // 流式总超时（秒，默认10分钟）
                     ],
                     'proxy' => env('HTTP_CLIENT_PROXY'),
                     'custom_error_mapping_rules' => [],
diff --git a/src/Agent/Tool/ToolUseAgent.php b/src/Agent/Tool/ToolUseAgent.php
index 13573c4..881a666 100644
--- a/src/Agent/Tool/ToolUseAgent.php
+++ b/src/Agent/Tool/ToolUseAgent.php
@@ -26,6 +26,7 @@
 use Hyperf\Odin\Message\ToolMessage;
 use Hyperf\Odin\Message\UserMessage;
 use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Utils\TimeUtil;
 use Hyperf\Odin\Utils\ToolUtil;
 use Psr\Log\LoggerInterface;
 use Throwable;
@@ -456,7 +457,7 @@ private function executeToolCalls(AssistantMessage $message): array
                     ], JSON_UNESCAPED_UNICODE);
                 } finally {
                     $usedTool = new UsedTool(
-                        elapsedTime: round((microtime(true) - $start) * 1000, 2),
+                        elapsedTime: TimeUtil::calculateDurationMs($start, 2),
                         success: $success,
                         id: $toolCall->getId(),
                         name: $tool->getName(),
diff --git a/src/Api/Providers/AbstractClient.php b/src/Api/Providers/AbstractClient.php
index 8de69e2..8cba806 100644
--- a/src/Api/Providers/AbstractClient.php
+++ b/src/Api/Providers/AbstractClient.php
@@ -14,6 +14,7 @@
 
 use GuzzleHttp\Client as GuzzleClient;
 use GuzzleHttp\RequestOptions;
+use Hyperf\Engine\Coroutine;
 use Hyperf\Odin\Api\Request\ChatCompletionRequest;
 use Hyperf\Odin\Api\Request\CompletionRequest;
 use Hyperf\Odin\Api\Request\EmbeddingRequest;
@@ -22,6 +23,7 @@
 use Hyperf\Odin\Api\Response\ChatCompletionStreamResponse;
 use Hyperf\Odin\Api\Response\EmbeddingResponse;
 use Hyperf\Odin\Api\Response\TextCompletionResponse;
+use Hyperf\Odin\Api\Transport\OdinSimpleCurl;
 use Hyperf\Odin\Api\Transport\SSEClient;
 use Hyperf\Odin\Contract\Api\ClientInterface;
 use Hyperf\Odin\Contract\Api\ConfigInterface;
@@ -35,6 +37,7 @@
 use Hyperf\Odin\Utils\EventUtil;
 use Hyperf\Odin\Utils\LoggingConfigHelper;
 use Hyperf\Odin\Utils\LogUtil;
+use Hyperf\Odin\Utils\TimeUtil;
 use Psr\Log\LoggerInterface;
 use Throwable;
 
@@ -88,6 +91,7 @@ public function chatCompletions(ChatCompletionRequest $chatRequest): ChatComplet
             $this->logResponse('ChatCompletionsResponse', $requestId, $duration, [
                 'content' => $chatCompletionResponse->getContent(),
                 'response_headers' => $response->getHeaders(),
+                'usage' => $chatCompletionResponse->getUsage()?->toArray(),
             ]);
 
             EventUtil::dispatch(new AfterChatCompletionsEvent($chatRequest, $chatCompletionResponse, $duration));
@@ -110,15 +114,31 @@ public function chatCompletionsStream(ChatCompletionRequest $chatRequest): ChatC
 
         $startTime = microtime(true);
         try {
+            // For streaming requests, use first chunk timeout to fail fast on network issues
             $options[RequestOptions::STREAM] = true;
-            $response = $this->client->post($url, $options);
+            $options[RequestOptions::TIMEOUT] = $this->requestOptions->getStreamFirstChunkTimeout();
+
+            if (Coroutine::id()) {
+                foreach ($this->getHeaders() as $key => $value) {
+                    $options['headers'][$key] = $value;
+                }
+                $options['connect_timeout'] = $this->requestOptions->getConnectionTimeout();
+                $options['stream_chunk'] = $this->requestOptions->getStreamChunkTimeout();
+                $options['header_timeout'] = $this->requestOptions->getStreamFirstChunkTimeout();
+                if ($proxy = $this->requestOptions->getProxy()) {
+                    $options['proxy'] = $proxy;
+                }
+                $response = OdinSimpleCurl::send($url, $options);
+            } else {
+                $response = $this->client->post($url, $options);
+            }
+
             $firstResponseDuration = $this->calculateDuration($startTime);
 
             $stream = $response->getBody()->detach();
             $sseClient = new SSEClient(
                 $stream,
                 true,
-                (int) $this->requestOptions->getTotalTimeout(),
                 $this->requestOptions->getTimeout(),
                 $this->logger
             );
@@ -354,13 +374,13 @@ protected function createExceptionContext(string $url, array $options, string $m
      */
     protected function calculateDuration(float $startTime): float
     {
-        return round((microtime(true) - $startTime) * 1000);
+        return TimeUtil::calculateDurationMs($startTime);
     }
 
     /**
      * 获取请求头.
      */
-    private function getHeaders(): array
+    protected function getHeaders(): array
     {
         $headers = [
             'User-Agent' => 'Hyperf-Odin/1.0',
diff --git a/src/Api/Providers/AwsBedrock/AwsBedrock.php b/src/Api/Providers/AwsBedrock/AwsBedrock.php
index f2db067..377579a 100644
--- a/src/Api/Providers/AwsBedrock/AwsBedrock.php
+++ b/src/Api/Providers/AwsBedrock/AwsBedrock.php
@@ -21,11 +21,11 @@
 class AwsBedrock extends AbstractApi
 {
     /**
-     * @var Client[]|ConverseClient[]
+     * @var Client[]|ConverseClient[]|ConverseCustomClient[]
      */
     protected array $clients = [];
 
-    public function getClient(AwsBedrockConfig $config, ?ApiOptions $requestOptions = null, ?LoggerInterface $logger = null): Client|ConverseClient
+    public function getClient(AwsBedrockConfig $config, ?ApiOptions $requestOptions = null, ?LoggerInterface $logger = null): Client|ConverseClient|ConverseCustomClient
     {
         // 检查AWS凭证，必须有访问密钥和密钥
         if (empty($config->accessKey) || empty($config->secretKey)) {
@@ -44,9 +44,14 @@ public function getClient(AwsBedrockConfig $config, ?ApiOptions $requestOptions
             return $this->clients[$key];
         }
 
-        if ($config->getType() === AwsType::CONVERSE) {
+        if ($config->getType() === AwsType::CONVERSE_CUSTOM) {
+            // Use custom Converse client without AWS SDK (manual Guzzle + SigV4)
+            $client = new ConverseCustomClient($config, $requestOptions, $logger);
+        } elseif ($config->getType() === AwsType::CONVERSE) {
+            // Use Converse API with AWS SDK
             $client = new ConverseClient($config, $requestOptions, $logger);
         } else {
+            // Use InvokeModel API with AWS SDK (default)
             $client = new Client($config, $requestOptions, $logger);
         }
 
diff --git a/src/Api/Providers/AwsBedrock/AwsBedrockConfig.php b/src/Api/Providers/AwsBedrock/AwsBedrockConfig.php
index 0056744..3bf9868 100644
--- a/src/Api/Providers/AwsBedrock/AwsBedrockConfig.php
+++ b/src/Api/Providers/AwsBedrock/AwsBedrockConfig.php
@@ -22,9 +22,14 @@ public function __construct(
         public string $secretKey,
         public string $region = 'us-east-1',
         /**
-         * @var string 类型 converse|invoke
+         * API type:
+         * - converse_custom: Converse API without AWS SDK (custom Guzzle + SigV4) [default]
+         * - converse: Converse API with AWS SDK
+         * - invoke: InvokeModel API with AWS SDK
+         *
+         * @var string
          */
-        public string $type = AwsType::CONVERSE,
+        public string $type = AwsType::CONVERSE_CUSTOM,
         public bool $autoCache = false,
         public ?AutoCacheConfig $autoCacheConfig = null,
     ) {
diff --git a/src/Api/Providers/AwsBedrock/AwsBedrockConverseFormatConverter.php b/src/Api/Providers/AwsBedrock/AwsBedrockConverseFormatConverter.php
index 784e421..d0f4279 100644
--- a/src/Api/Providers/AwsBedrock/AwsBedrockConverseFormatConverter.php
+++ b/src/Api/Providers/AwsBedrock/AwsBedrockConverseFormatConverter.php
@@ -78,11 +78,37 @@ public function getIterator(): Generator
         $created = time();
         $isFirstChunk = true;
         $toolCallIndex = 0;
+        $chunkIndex = 0;
+        $firstChunks = [];
+        $lastChunks = [];
+        $maxChunksToLog = 5;
 
         foreach ($this->responseStream as $chunk) {
             if (empty($chunk) || ! is_array($chunk)) {
                 continue;
             }
+
+            $timestamp = microtime(true);
+            $chunkWithTime = [
+                'index' => $chunkIndex,
+                'timestamp' => $timestamp,
+                'datetime' => date('Y-m-d H:i:s', (int) $timestamp) . '.' . substr((string) fmod($timestamp, 1), 2, 6),
+                'data' => $chunk,
+            ];
+
+            // Collect first 5 chunks
+            if ($chunkIndex < $maxChunksToLog) {
+                $firstChunks[] = $chunkWithTime;
+            }
+
+            // Keep a rolling window of last 5 chunks
+            $lastChunks[] = $chunkWithTime;
+            if (count($lastChunks) > $maxChunksToLog) {
+                array_shift($lastChunks);
+            }
+
+            ++$chunkIndex;
+
             foreach ($chunk as $eventType => $event) {
                 // 根据事件类型处理
                 switch ($eventType) {
@@ -141,6 +167,21 @@ public function getIterator(): Generator
                 }
             }
         }
+
+        // Log first 5 and last 5 chunks after all processing
+        if (! empty($firstChunks)) {
+            $this->log(LogLevel::INFO, 'FirstChunks', [
+                'total_chunks' => $chunkIndex,
+                'chunks' => $firstChunks,
+            ]);
+        }
+
+        if (! empty($lastChunks)) {
+            $this->log(LogLevel::INFO, 'LastChunks', [
+                'total_chunks' => $chunkIndex,
+                'chunks' => $lastChunks,
+            ]);
+        }
     }
 
     /**
@@ -161,6 +202,18 @@ public function getModel(): string
 
     private function formatUsageEvent(int $created, array $usage): string
     {
+        // 转换Claude的token统计方式为Qwen格式（与非流式保持一致）
+        // Claude: inputTokens=新输入, cacheReadInputTokens=缓存命中
+        // OpenAI: promptTokens=总输入(包括缓存), cachedTokens=缓存命中
+        $inputTokens = $usage['inputTokens'] ?? 0;
+        $cacheReadTokens = $usage['cacheReadInputTokens'] ?? 0;
+        $cacheWriteTokens = $usage['cacheWriteInputTokens'] ?? 0;
+
+        // 按照 OpenAI 的方式：promptTokens = 总处理的提示tokens（包括缓存）
+        $promptTokens = $inputTokens + $cacheReadTokens + $cacheWriteTokens;
+        $completionTokens = $usage['outputTokens'] ?? 0;
+        $totalTokens = $promptTokens + $completionTokens;
+
         return $this->formatOpenAiEvent([
             'id' => $this->messageId ?? ('bedrock-' . uniqid()),
             'object' => 'chat.completion.chunk',
@@ -168,15 +221,15 @@ private function formatUsageEvent(int $created, array $usage): string
             'model' => $this->model ?: 'aws.bedrock',
             'choices' => null,
             'usage' => [
-                'prompt_tokens' => $usage['inputTokens'] ?? 0,
-                'completion_tokens' => $usage['outputTokens'] ?? 0,
-                'total_tokens' => $usage['totalTokens'] ?? 0,
+                'prompt_tokens' => $promptTokens,
+                'completion_tokens' => $completionTokens,
+                'total_tokens' => $totalTokens,
                 'prompt_tokens_details' => [
-                    'cache_write_input_tokens' => $usage['cacheWriteInputTokens'] ?? 0,
-                    'cache_read_input_tokens' => $usage['cacheReadInputTokens'] ?? 0,
-                    // 兼容旧参数
+                    'cache_write_input_tokens' => $cacheWriteTokens,
+                    'cache_read_input_tokens' => $cacheReadTokens,
+                    // 兼容 OpenAI 格式：cached_tokens表示缓存命中
                     'audio_tokens' => 0,
-                    'cached_tokens' => $usage['cacheWriteInputTokens'] ?? 0,
+                    'cached_tokens' => $cacheReadTokens,
                 ],
                 'completion_tokens_details' => [
                     'reasoning_tokens' => 0,
diff --git a/src/Api/Providers/AwsBedrock/AwsBedrockFormatConverter.php b/src/Api/Providers/AwsBedrock/AwsBedrockFormatConverter.php
index e1c8e4e..beebdc9 100644
--- a/src/Api/Providers/AwsBedrock/AwsBedrockFormatConverter.php
+++ b/src/Api/Providers/AwsBedrock/AwsBedrockFormatConverter.php
@@ -328,7 +328,7 @@ private function formatMessageStopEvent(int $created): string
      * @param mixed $chunk AWS Bedrock 响应块
      * @return null|array|bool 解析后的事件数据，失败返回 null
      */
-    private function parseChunk(array $chunk): null|array|bool
+    private function parseChunk(array $chunk): array|bool|null
     {
         $rawData = $chunk['chunk']['bytes'] ?? null;
         if (! is_string($rawData) || empty($rawData)) {
diff --git a/src/Api/Providers/AwsBedrock/AwsEventStreamParser.php b/src/Api/Providers/AwsBedrock/AwsEventStreamParser.php
new file mode 100644
index 0000000..e38fc44
--- /dev/null
+++ b/src/Api/Providers/AwsBedrock/AwsEventStreamParser.php
@@ -0,0 +1,437 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\AwsBedrock;
+
+use Generator;
+use Hyperf\Odin\Api\Transport\SimpleCURLClient;
+use Hyperf\Odin\Utils\LogUtil;
+use InvalidArgumentException;
+use IteratorAggregate;
+use RuntimeException;
+use Throwable;
+
+/**
+ * AWS Event Stream Parser.
+ *
+ * Parses AWS event-stream format without depending on AWS SDK.
+ *
+ * AWS event-stream format:
+ * - Prelude (12 bytes): total_length (4) + headers_length (4) + prelude_crc (4)
+ * - Headers (variable): key-value pairs with type info
+ * - Payload (variable): the actual event data
+ * - Message CRC (4 bytes): checksum of the entire message
+ *
+ * @see https://docs.aws.amazon.com/AmazonS3/latest/API/RESTSelectObjectAppendix.html
+ */
+class AwsEventStreamParser implements IteratorAggregate
+{
+    /**
+     * @var resource
+     */
+    private $stream;
+
+    private string $buffer = '';
+
+    /**
+     * @param resource $stream PHP stream resource
+     */
+    public function __construct($stream)
+    {
+        if (! is_resource($stream)) {
+            throw new InvalidArgumentException('Stream must be a resource');
+        }
+
+        $this->stream = $stream;
+    }
+
+    /**
+     * Get iterator to parse event stream.
+     */
+    public function getIterator(): Generator
+    {
+        $messageCount = 0;
+        $this->log('开始解析EventStream', [
+            'feof' => feof($this->stream),
+        ]);
+
+        try {
+            while (! feof($this->stream)) {
+                $length = $this->readExactly(4);
+                if ($length === null) {
+                    // Normal EOF
+                    $this->log('流正常结束', [
+                        'total_messages' => $messageCount,
+                        'feof' => feof($this->stream),
+                    ]);
+                    break;
+                }
+
+                $lengthUnpacked = unpack('N', $length);
+                $toRead = $lengthUnpacked[1] - 4;
+
+                $body = $this->readExactly($toRead);
+                if ($body === null) {
+                    $this->log('读取消息体失败', [
+                        'message_count' => $messageCount,
+                        'to_read' => $toRead,
+                        'buffer_preview' => substr($this->buffer, 0, 200),
+                    ]);
+                    throw new RuntimeException('Failed to read message body from stream');
+                }
+
+                $chunk = $length . $body;
+                $this->buffer .= $chunk;
+
+                while (($message = $this->parseNextMessage()) !== null) {
+                    ++$messageCount;
+                    yield $message;
+                }
+            }
+        } finally {
+            $this->log('EventStream解析完成', [
+                'total_messages' => $messageCount,
+                'feof' => feof($this->stream),
+                'remaining_buffer' => strlen($this->buffer),
+            ]);
+
+            // Log last read chunks from SimpleCURLClient if available
+            $this->logLastReadChunks();
+        }
+    }
+
+    /**
+     * Read exactly N bytes from stream with retry.
+     *
+     * @param int $length Number of bytes to read
+     * @return null|string Returns null on EOF, string of exact length on success
+     */
+    private function readExactly(int $length): ?string
+    {
+        $data = '';
+        $remaining = $length;
+        $maxAttempts = 100;
+        $attempt = 0;
+
+        while ($remaining > 0 && ! feof($this->stream)) {
+            $chunk = fread($this->stream, $remaining);
+
+            if ($chunk === false) {
+                $this->log('fread返回false', [
+                    'remaining' => $remaining,
+                    'data_read_so_far' => strlen($data),
+                    'data_preview' => substr($data, 0, 200),
+                ]);
+                throw new RuntimeException('Failed to read from stream');
+            }
+
+            if ($chunk === '') {
+                if (++$attempt > $maxAttempts) {
+                    $this->log('fread超过最大重试次数', [
+                        'total_attempts' => $attempt,
+                        'data_read_so_far' => strlen($data),
+                        'remaining' => $remaining,
+                        'requested_length' => $length,
+                        'data_preview' => substr($data, 0, 200),
+                    ]);
+                    throw new RuntimeException("Failed to read {$length} bytes after {$maxAttempts} attempts");
+                }
+                usleep(10000);
+                continue;
+            }
+
+            $data .= $chunk;
+            $remaining -= strlen($chunk);
+            $attempt = 0;
+        }
+
+        if ($remaining > 0) {
+            if ($data === '') {
+                // Normal EOF, no log needed
+                return null;
+            }
+            $this->log('意外的EOF，数据不完整', [
+                'data_read' => strlen($data),
+                'expected' => $length,
+                'remaining' => $remaining,
+                'data_preview' => substr($data, 0, 200),
+            ]);
+            throw new RuntimeException('Unexpected EOF: read ' . strlen($data) . " bytes, expected {$length}");
+        }
+
+        return $data;
+    }
+
+    /**
+     * Parse next message from buffer.
+     *
+     * @return null|array Parsed message or null if insufficient data
+     */
+    private function parseNextMessage(): ?array
+    {
+        // Need at least 12 bytes for prelude
+        if (strlen($this->buffer) < 12) {
+            return null;
+        }
+
+        // Read prelude (12 bytes)
+        $totalLength = unpack('N', substr($this->buffer, 0, 4))[1];
+        $headersLength = unpack('N', substr($this->buffer, 4, 4))[1];
+        $preludeCrc = unpack('N', substr($this->buffer, 8, 4))[1];
+
+        // Check if we have the complete message
+        if (strlen($this->buffer) < $totalLength) {
+            return null;
+        }
+
+        // Extract the complete message
+        $messageBytes = substr($this->buffer, 0, $totalLength);
+        $this->buffer = substr($this->buffer, $totalLength);
+
+        // Verify prelude CRC
+        $preludeBytes = substr($messageBytes, 0, 8);
+        $computedPreludeCrc = $this->crc32($preludeBytes);
+        if ($computedPreludeCrc !== $preludeCrc) {
+            // TODO: Implement proper CRC32C validation
+            // For now, log warning and continue
+            // throw new RuntimeException('Prelude CRC mismatch');
+        }
+
+        // Extract headers
+        $headersBytes = substr($messageBytes, 12, $headersLength);
+        $headers = $this->parseHeaders($headersBytes);
+
+        // Extract payload
+        $payloadLength = $totalLength - 12 - $headersLength - 4;
+        $payload = substr($messageBytes, 12 + $headersLength, $payloadLength);
+
+        // Verify message CRC
+        $messageCrc = unpack('N', substr($messageBytes, -4))[1];
+        $messageWithoutCrc = substr($messageBytes, 0, -4);
+        $computedMessageCrc = $this->crc32($messageWithoutCrc);
+        if ($computedMessageCrc !== $messageCrc) {
+            // TODO: Implement proper CRC32C validation
+            // For now, log warning and continue
+            // throw new RuntimeException('Message CRC mismatch');
+        }
+
+        return [
+            'headers' => $headers,
+            'payload' => $payload,
+        ];
+    }
+
+    /**
+     * Parse headers from header bytes.
+     *
+     * @param string $headersBytes Raw header bytes
+     * @return array Parsed headers
+     */
+    private function parseHeaders(string $headersBytes): array
+    {
+        $headers = [];
+        $offset = 0;
+        $length = strlen($headersBytes);
+
+        while ($offset < $length) {
+            // Read header name length (1 byte)
+            $nameLength = ord($headersBytes[$offset]);
+            ++$offset;
+
+            // Read header name
+            $name = substr($headersBytes, $offset, $nameLength);
+            $offset += $nameLength;
+
+            // Read header value type (1 byte)
+            $valueType = ord($headersBytes[$offset]);
+            ++$offset;
+
+            // Read header value based on type
+            $value = $this->parseHeaderValue($headersBytes, $offset, $valueType);
+            $offset += $this->getValueLength($headersBytes, $offset, $valueType);
+
+            $headers[$name] = $value;
+        }
+
+        return $headers;
+    }
+
+    /**
+     * Parse header value based on type.
+     *
+     * @param string $data Header data
+     * @param int $offset Current offset
+     * @param int $type Value type
+     * @return mixed Parsed value
+     */
+    private function parseHeaderValue(string $data, int $offset, int $type): mixed
+    {
+        return match ($type) {
+            0 => true,  // boolean true
+            1 => false, // boolean false
+            2 => ord($data[$offset]), // byte
+            3 => unpack('n', substr($data, $offset, 2))[1], // short
+            4 => unpack('N', substr($data, $offset, 4))[1], // integer
+            5, 8 => unpack('J', substr($data, $offset, 8))[1], // long
+            6 => $this->parseByteArray($data, $offset), // byte array
+            7 => $this->parseString($data, $offset), // string
+            // timestamp
+            9 => $this->parseUuid($data, $offset), // UUID
+            default => null,
+        };
+    }
+
+    /**
+     * Get value length based on type.
+     */
+    private function getValueLength(string $data, int $offset, int $type): int
+    {
+        return match ($type) {
+            0, 1 => 0,  // boolean (no additional bytes)
+            2 => 1,     // byte
+            3 => 2,     // short
+            4 => 4,     // integer
+            5 => 8,     // long
+            6, 7 => unpack('n', substr($data, $offset, 2))[1] + 2, // byte array (2-byte length + data)
+            // string (2-byte length + data)
+            8 => 8,     // timestamp
+            9 => 16,    // UUID
+            default => 0,
+        };
+    }
+
+    /**
+     * Parse byte array value.
+     */
+    private function parseByteArray(string $data, int $offset): string
+    {
+        $length = unpack('n', substr($data, $offset, 2))[1];
+        return substr($data, $offset + 2, $length);
+    }
+
+    /**
+     * Parse string value.
+     */
+    private function parseString(string $data, int $offset): string
+    {
+        $length = unpack('n', substr($data, $offset, 2))[1];
+        return substr($data, $offset + 2, $length);
+    }
+
+    /**
+     * Parse UUID value.
+     */
+    private function parseUuid(string $data, int $offset): string
+    {
+        $bytes = substr($data, $offset, 16);
+        $hex = bin2hex($bytes);
+        return sprintf(
+            '%s-%s-%s-%s-%s',
+            substr($hex, 0, 8),
+            substr($hex, 8, 4),
+            substr($hex, 12, 4),
+            substr($hex, 16, 4),
+            substr($hex, 20, 12)
+        );
+    }
+
+    /**
+     * Calculate CRC32 checksum (AWS uses CRC32 with specific polynomial).
+     *
+     * AWS uses CRC-32C (Castagnoli) with polynomial 0x1EDC6F41
+     * PHP's crc32() uses a different polynomial, so we need to use hash extension
+     *
+     * @param string $data Data to checksum
+     * @return int CRC32 value
+     */
+    private function crc32(string $data): int
+    {
+        // Use hash_final with crc32c if available
+        if (in_array('crc32c', hash_algos())) {
+            $hash = hash('crc32c', $data, true);
+            return unpack('N', $hash)[1];
+        }
+
+        // Fallback to PHP's crc32 (note: this uses different polynomial)
+        // For production, should use proper CRC32C implementation
+        return crc32($data) & 0xFFFFFFFF;
+    }
+
+    /**
+     * Log last read chunks from the underlying SimpleCURLClient stream.
+     */
+    private function logLastReadChunks(): void
+    {
+        try {
+            // Get stream metadata which includes wrapper_data
+            $metadata = stream_get_meta_data($this->stream);
+            $wrapper = $metadata['wrapper_data'] ?? null;
+
+            // Check if it's a SimpleCURLClient instance
+            if (! $wrapper instanceof SimpleCURLClient) {
+                return;
+            }
+
+            // Get custom metadata from SimpleCURLClient
+            $customMetadata = $wrapper->stream_metadata();
+            if (! isset($customMetadata['last_read']) || ! is_array($customMetadata['last_read'])) {
+                return;
+            }
+
+            // Format last read data for logging
+            $lastReadPreview = [];
+            foreach ($customMetadata['last_read'] as $data) {
+                // Keep original data as-is, but convert non-UTF-8 binary data to hex for JSON safety
+                if (is_string($data) && ! mb_check_encoding($data, 'UTF-8')) {
+                    $lastReadPreview[] = bin2hex($data);
+                } else {
+                    $lastReadPreview[] = $data;
+                }
+            }
+
+            $logger = LogUtil::getHyperfLogger();
+            if ($logger !== null) {
+                $logger->info('SimpleCURLClientStreamCompleted', [
+                    'last_read_count' => count($customMetadata['last_read']),
+                    'last_read_preview' => $lastReadPreview,
+                ]);
+            }
+        } catch (Throwable $e) {
+            // Silently fail if logging fails to prevent disrupting parser operations
+            $logger = LogUtil::getHyperfLogger();
+            $logger?->warning('Failed to log last read chunks', [
+                'error' => $e->getMessage(),
+            ]);
+        }
+    }
+
+    /**
+     * Log parser activity for debugging.
+     *
+     * @param string $message Log message
+     * @param array $context Additional context data
+     */
+    private function log(string $message, array $context = []): void
+    {
+        try {
+            $logger = LogUtil::getHyperfLogger();
+            if ($logger === null) {
+                return;
+            }
+
+            $context['parser_class'] = self::class;
+            $logger->info('[AwsEventStreamParser] ' . $message, $context);
+        } catch (Throwable $e) {
+            // Silently fail if logging fails to prevent disrupting parser operations
+        }
+    }
+}
diff --git a/src/Api/Providers/AwsBedrock/AwsSignatureV4.php b/src/Api/Providers/AwsBedrock/AwsSignatureV4.php
new file mode 100644
index 0000000..974e6df
--- /dev/null
+++ b/src/Api/Providers/AwsBedrock/AwsSignatureV4.php
@@ -0,0 +1,313 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\AwsBedrock;
+
+use Psr\Http\Message\RequestInterface;
+
+/**
+ * AWS Signature Version 4 implementation for signing HTTP requests.
+ */
+class AwsSignatureV4
+{
+    private const ISO8601_BASIC = 'Ymd\THis\Z';
+
+    private const ALGORITHM = 'AWS4-HMAC-SHA256';
+
+    private const SERVICE = 'bedrock';
+
+    private const TERMINATOR = 'aws4_request';
+
+    private string $accessKey;
+
+    private string $secretKey;
+
+    private string $region;
+
+    private ?string $sessionToken;
+
+    /**
+     * Cache for derived signing keys.
+     */
+    private array $cache = [];
+
+    private int $cacheSize = 0;
+
+    /**
+     * Headers that should not be signed.
+     */
+    private array $headerBlacklist = [
+        'cache-control',
+        'content-length',
+        'expect',
+        'max-forwards',
+        'pragma',
+        'range',
+        'te',
+        'if-match',
+        'if-none-match',
+        'if-modified-since',
+        'if-unmodified-since',
+        'if-range',
+        'accept',
+        'authorization',
+        'proxy-authorization',
+        'from',
+        'referer',
+        'user-agent',
+        'x-amz-user-agent',
+        'x-amzn-trace-id',
+        'aws-sdk-invocation-id',
+        'aws-sdk-retry',
+    ];
+
+    public function __construct(
+        string $accessKey,
+        string $secretKey,
+        string $region,
+        ?string $sessionToken = null
+    ) {
+        $this->accessKey = $accessKey;
+        $this->secretKey = $secretKey;
+        $this->region = $region;
+        $this->sessionToken = $sessionToken;
+    }
+
+    /**
+     * Sign a PSR-7 request with AWS Signature V4.
+     */
+    public function signRequest(RequestInterface $request): RequestInterface
+    {
+        // Get current timestamp
+        $timestamp = gmdate(self::ISO8601_BASIC);
+        $date = substr($timestamp, 0, 8); // YYYYMMDD
+
+        // Add required headers
+        $request = $request->withHeader('X-Amz-Date', $timestamp);
+        $request = $request->withHeader('Host', $request->getUri()->getHost());
+
+        if ($this->sessionToken) {
+            $request = $request->withHeader('X-Amz-Security-Token', $this->sessionToken);
+        }
+
+        // Step 1: Create canonical request
+        $canonicalRequest = $this->createCanonicalRequest($request);
+
+        // Step 2: Create string to sign
+        $credentialScope = $this->createCredentialScope($date);
+        $stringToSign = $this->createStringToSign($timestamp, $credentialScope, $canonicalRequest);
+
+        // Step 3: Calculate signature
+        $signature = $this->calculateSignature($date, $stringToSign);
+
+        // Step 4: Add authorization header
+        $signedHeaders = $this->getSignedHeaders($request);
+        $authorizationHeader = sprintf(
+            '%s Credential=%s/%s, SignedHeaders=%s, Signature=%s',
+            self::ALGORITHM,
+            $this->accessKey,
+            $credentialScope,
+            $signedHeaders,
+            $signature
+        );
+
+        return $request->withHeader('Authorization', $authorizationHeader);
+    }
+
+    /**
+     * Create canonical request string.
+     */
+    private function createCanonicalRequest(RequestInterface $request): string
+    {
+        $method = $request->getMethod();
+        $uri = $this->getCanonicalUri($request);
+        $queryString = $this->getCanonicalQueryString($request);
+        $headers = $this->getCanonicalHeaders($request);
+        $signedHeaders = $this->getSignedHeaders($request);
+        $payload = $this->getPayloadHash($request);
+
+        return implode("\n", [
+            $method,
+            $uri,
+            $queryString,
+            $headers,
+            $signedHeaders,
+            $payload,
+        ]);
+    }
+
+    /**
+     * Get canonical URI from request.
+     */
+    private function getCanonicalUri(RequestInterface $request): string
+    {
+        $path = $request->getUri()->getPath();
+        if (empty($path)) {
+            return '/';
+        }
+
+        // Encode the path, but preserve forward slashes
+        $encoded = rawurlencode(ltrim($path, '/'));
+        return '/' . str_replace('%2F', '/', $encoded);
+    }
+
+    /**
+     * Get canonical query string from request.
+     */
+    private function getCanonicalQueryString(RequestInterface $request): string
+    {
+        $query = $request->getUri()->getQuery();
+        if (empty($query)) {
+            return '';
+        }
+
+        parse_str($query, $params);
+        ksort($params);
+
+        $parts = [];
+        foreach ($params as $key => $value) {
+            if (is_array($value)) {
+                sort($value);
+                foreach ($value as $v) {
+                    $parts[] = rawurlencode((string) $key) . '=' . rawurlencode((string) $v);
+                }
+            } else {
+                $parts[] = rawurlencode((string) $key) . '=' . rawurlencode($value !== null ? (string) $value : '');
+            }
+        }
+
+        return implode('&', $parts);
+    }
+
+    /**
+     * Get canonical headers string.
+     */
+    private function getCanonicalHeaders(RequestInterface $request): string
+    {
+        $headers = [];
+        foreach ($request->getHeaders() as $name => $values) {
+            $name = strtolower((string) $name);
+            if ($this->shouldSignHeader($name)) {
+                $value = implode(',', $values);
+                // Normalize whitespace
+                $value = preg_replace('/\s+/', ' ', trim($value));
+                $headers[$name] = $name . ':' . $value;
+            }
+        }
+
+        ksort($headers);
+        return implode("\n", $headers) . "\n";
+    }
+
+    /**
+     * Get signed headers list.
+     */
+    private function getSignedHeaders(RequestInterface $request): string
+    {
+        $headers = [];
+        foreach ($request->getHeaders() as $name => $values) {
+            $name = strtolower((string) $name);
+            if ($this->shouldSignHeader($name)) {
+                $headers[] = $name;
+            }
+        }
+
+        sort($headers);
+        return implode(';', $headers);
+    }
+
+    /**
+     * Check if header should be signed.
+     */
+    private function shouldSignHeader(string $headerName): bool
+    {
+        return ! in_array($headerName, $this->headerBlacklist, true);
+    }
+
+    /**
+     * Get payload hash (SHA256 of request body).
+     */
+    private function getPayloadHash(RequestInterface $request): string
+    {
+        // For HTTPS streaming requests, can use UNSIGNED-PAYLOAD
+        // For regular requests, compute SHA256 hash of body
+        $body = (string) $request->getBody();
+        $request->getBody()->rewind();
+        return hash('sha256', $body);
+    }
+
+    /**
+     * Create credential scope.
+     */
+    private function createCredentialScope(string $date): string
+    {
+        return sprintf(
+            '%s/%s/%s/%s',
+            $date,
+            $this->region,
+            self::SERVICE,
+            self::TERMINATOR
+        );
+    }
+
+    /**
+     * Create string to sign.
+     */
+    private function createStringToSign(
+        string $timestamp,
+        string $credentialScope,
+        string $canonicalRequest
+    ): string {
+        $hashedRequest = hash('sha256', $canonicalRequest);
+
+        return implode("\n", [
+            self::ALGORITHM,
+            $timestamp,
+            $credentialScope,
+            $hashedRequest,
+        ]);
+    }
+
+    /**
+     * Calculate signature using derived signing key.
+     */
+    private function calculateSignature(string $date, string $stringToSign): string
+    {
+        $signingKey = $this->getSigningKey($date);
+        return hash_hmac('sha256', $stringToSign, $signingKey);
+    }
+
+    /**
+     * Derive signing key with caching.
+     */
+    private function getSigningKey(string $date): string
+    {
+        $cacheKey = $date . '_' . $this->region . '_' . self::SERVICE . '_' . $this->secretKey;
+
+        if (! isset($this->cache[$cacheKey])) {
+            // Clear the cache when it reaches 50 entries
+            if (++$this->cacheSize > 50) {
+                $this->cache = [];
+                $this->cacheSize = 0;
+            }
+
+            $kDate = hash_hmac('sha256', $date, 'AWS4' . $this->secretKey, true);
+            $kRegion = hash_hmac('sha256', $this->region, $kDate, true);
+            $kService = hash_hmac('sha256', self::SERVICE, $kRegion, true);
+            $kSigning = hash_hmac('sha256', self::TERMINATOR, $kService, true);
+
+            $this->cache[$cacheKey] = $kSigning;
+        }
+
+        return $this->cache[$cacheKey];
+    }
+}
diff --git a/src/Api/Providers/AwsBedrock/AwsType.php b/src/Api/Providers/AwsBedrock/AwsType.php
index 569b490..e85ff67 100644
--- a/src/Api/Providers/AwsBedrock/AwsType.php
+++ b/src/Api/Providers/AwsBedrock/AwsType.php
@@ -14,7 +14,18 @@
 
 class AwsType
 {
+    /**
+     * Converse API with AWS SDK.
+     */
     public const CONVERSE = 'converse';
 
+    /**
+     * Converse API without AWS SDK (custom Guzzle implementation).
+     */
+    public const CONVERSE_CUSTOM = 'converse_custom';
+
+    /**
+     * InvokeModel API with AWS SDK.
+     */
     public const INVOKE = 'invoke';
 }
diff --git a/src/Api/Providers/AwsBedrock/Client.php b/src/Api/Providers/AwsBedrock/Client.php
index cf60cc8..a3629f3 100644
--- a/src/Api/Providers/AwsBedrock/Client.php
+++ b/src/Api/Providers/AwsBedrock/Client.php
@@ -299,7 +299,17 @@ protected function convertException(Throwable $exception, array $context = []):
      */
     protected function getHttpArgs(bool $stream = false, ?string $proxy = null): array
     {
-        $http = [];
+        // For streaming requests, use first chunk timeout to fail fast on network issues
+        // For non-streaming requests, use total timeout
+        $timeout = $stream
+            ? $this->requestOptions->getStreamFirstChunkTimeout()
+            : $this->requestOptions->getTotalTimeout();
+
+        $http = [
+            'timeout' => $timeout,
+            'connect_timeout' => $this->requestOptions->getConnectionTimeout(),
+        ];
+
         if ($stream) {
             $http['stream'] = true;
         }
diff --git a/src/Api/Providers/AwsBedrock/ConverseClient.php b/src/Api/Providers/AwsBedrock/ConverseClient.php
index 08740f3..849049d 100644
--- a/src/Api/Providers/AwsBedrock/ConverseClient.php
+++ b/src/Api/Providers/AwsBedrock/ConverseClient.php
@@ -76,7 +76,9 @@ public function chatCompletions(ChatCompletionRequest $chatRequest): ChatComplet
                 'request_id' => $requestId,
                 'model_id' => $modelId,
                 'duration_ms' => $duration,
-                'usage' => $result['usage'] ?? [],
+                'usage' => $result['usage'] ?? [], // 原始Claude usage
+                'converted_usage' => $chatCompletionResponse->getUsage()->toArray(), // 转换后的usage
+                'cache_hit_rate' => $chatCompletionResponse->getUsage()->getCacheHitRatePercentage(), // 缓存命中率
                 'content' => $chatCompletionResponse->getContent(),
                 'response_headers' => $result['@metadata']['headers'] ?? [],
                 'performance_flag' => $performanceFlag,
@@ -140,7 +142,7 @@ public function chatCompletionsStream(ChatCompletionRequest $chatRequest): ChatC
                 'performance_flag' => $performanceFlag,
             ];
 
-            $this->logger?->info('AwsBedrockConverseStreamFirstResponse', LoggingConfigHelper::filterAndFormatLogData($logData, $this->requestOptions));
+            $this->logger?->info('AwsBedrockConverseStreamResponse', LoggingConfigHelper::filterAndFormatLogData($logData, $this->requestOptions));
 
             // 创建 AWS Bedrock 格式转换器，负责将 AWS Bedrock 格式转换为 OpenAI 格式
             $bedrockConverter = new AwsBedrockConverseFormatConverter($result, $this->logger, $modelId);
diff --git a/src/Api/Providers/AwsBedrock/ConverseConverter.php b/src/Api/Providers/AwsBedrock/ConverseConverter.php
index 5e5f94d..e975417 100644
--- a/src/Api/Providers/AwsBedrock/ConverseConverter.php
+++ b/src/Api/Providers/AwsBedrock/ConverseConverter.php
@@ -20,6 +20,7 @@
 use Hyperf\Odin\Message\ToolMessage;
 use Hyperf\Odin\Message\UserMessage;
 use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Utils\ImageDownloader;
 use stdClass;
 
 class ConverseConverter implements ConverterInterface
@@ -264,11 +265,16 @@ private function processMultiModalContents(UserMessage $message): array
     /**
      * 处理图像URL并转换为适合AWS Bedrock Claude格式的图像数据.
      *
-     * @param string $imageUrl 图像URL（必须是 data:image 格式的 base64 编码数据）
+     * @param string $imageUrl 图像URL（支持 data:image base64 格式或 HTTP(S) URL）
      * @return array Claude 格式的图像数据
      */
     private function processImageUrl(string $imageUrl): array
     {
+        // 如果是远程链接，先下载并转换为base64格式
+        if (ImageDownloader::isRemoteImageUrl($imageUrl)) {
+            $imageUrl = ImageDownloader::downloadAndConvertToBase64($imageUrl);
+        }
+
         // 检查是否为base64编码的Data URL
         if (str_starts_with($imageUrl, 'data:image/') && str_contains($imageUrl, ';base64,')) {
             // 提取MIME类型和base64数据
@@ -287,7 +293,7 @@ private function processImageUrl(string $imageUrl): array
             ];
         }
 
-        // 对于非 base64 编码的 URL，抛出异常
-        throw new LLMInvalidRequestException('图像URL必须是 base64 编码格式 (data:image/xxx;base64,...)');
+        // 不支持的URL格式
+        throw new LLMInvalidRequestException('图像URL必须是 base64 编码格式 (data:image/xxx;base64,...) 或 HTTP(S) URL');
     }
 }
diff --git a/src/Api/Providers/AwsBedrock/ConverseCustomClient.php b/src/Api/Providers/AwsBedrock/ConverseCustomClient.php
new file mode 100644
index 0000000..585362c
--- /dev/null
+++ b/src/Api/Providers/AwsBedrock/ConverseCustomClient.php
@@ -0,0 +1,650 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\AwsBedrock;
+
+use GuzzleHttp\Exception\BadResponseException;
+use GuzzleHttp\Exception\GuzzleException;
+use GuzzleHttp\Psr7\Request;
+use Hyperf\Engine\Coroutine;
+use Hyperf\Odin\Api\Providers\AbstractClient;
+use Hyperf\Odin\Api\Providers\AwsBedrock\Cache\AutoCacheConfig;
+use Hyperf\Odin\Api\Providers\AwsBedrock\Cache\AwsBedrockCachePointManager;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Api\Request\EmbeddingRequest;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Api\Response\ChatCompletionResponse;
+use Hyperf\Odin\Api\Response\ChatCompletionStreamResponse;
+use Hyperf\Odin\Api\Response\EmbeddingResponse;
+use Hyperf\Odin\Api\Transport\OdinSimpleCurl;
+use Hyperf\Odin\Contract\Message\MessageInterface;
+use Hyperf\Odin\Event\AfterChatCompletionsEvent;
+use Hyperf\Odin\Event\AfterChatCompletionsStreamEvent;
+use Hyperf\Odin\Exception\LLMException;
+use Hyperf\Odin\Exception\LLMException\Api\LLMInvalidRequestException;
+use Hyperf\Odin\Exception\LLMException\Api\LLMRateLimitException;
+use Hyperf\Odin\Exception\LLMException\LLMApiException;
+use Hyperf\Odin\Exception\LLMException\Network\LLMReadTimeoutException;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\ToolMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Utils\EventUtil;
+use Hyperf\Odin\Utils\LoggingConfigHelper;
+use Hyperf\Odin\Utils\LogUtil;
+use Psr\Log\LoggerInterface;
+use RuntimeException;
+use Throwable;
+
+/**
+ * Custom AWS Bedrock Converse Client using Guzzle HTTP without AWS SDK.
+ */
+class ConverseCustomClient extends AbstractClient
+{
+    protected AwsBedrockConfig $awsConfig;
+
+    protected AwsSignatureV4 $signer;
+
+    protected ConverterInterface $converter;
+
+    protected string $endpoint;
+
+    /**
+     * Constructor.
+     */
+    public function __construct(AwsBedrockConfig $config, ?ApiOptions $requestOptions = null, ?LoggerInterface $logger = null)
+    {
+        if (! $requestOptions) {
+            $requestOptions = new ApiOptions();
+        }
+
+        $this->awsConfig = $config;
+        $this->converter = $this->createConverter();
+        $this->endpoint = $this->buildEndpoint();
+
+        // Initialize AWS Signature V4 signer
+        $this->signer = new AwsSignatureV4(
+            $config->accessKey,
+            $config->secretKey,
+            $config->region
+        );
+
+        parent::__construct($config, $requestOptions, $logger);
+    }
+
+    /**
+     * Chat completions (non-streaming).
+     */
+    public function chatCompletions(ChatCompletionRequest $chatRequest): ChatCompletionResponse
+    {
+        $chatRequest->validate();
+        $startTime = microtime(true);
+
+        try {
+            // Get model ID and convert request parameters
+            $modelId = $chatRequest->getModel();
+            $requestBody = $this->prepareConverseRequestBody($chatRequest);
+
+            // Generate request ID
+            $requestId = $this->generateRequestId();
+
+            // Build URL with URL-encoded model ID to support ARNs with special characters
+            $encodedModelId = rawurlencode($modelId);
+            $url = "{$this->endpoint}/model/{$encodedModelId}/converse";
+
+            // Convert binary bytes to base64 for JSON encoding
+            $requestBodyForJson = $this->prepareBytesForJsonEncoding($requestBody);
+
+            // Create PSR-7 request
+            $request = new Request(
+                'POST',
+                $url,
+                [
+                    'Content-Type' => 'application/json',
+                    'Accept' => 'application/json',
+                ],
+                json_encode($requestBodyForJson, JSON_UNESCAPED_UNICODE)
+            );
+
+            // Sign the request
+            $signedRequest = $this->signer->signRequest($request);
+
+            $this->logger?->info('AwsBedrockConverseCustomRequest', LoggingConfigHelper::filterAndFormatLogData([
+                'request_id' => $requestId,
+                'model_id' => $modelId,
+                'url' => $url,
+                'body' => $requestBody,
+                'token_estimate' => $chatRequest->getTokenEstimateDetail(),
+            ], $this->requestOptions));
+
+            // Send request with Guzzle
+            $response = $this->client->send($signedRequest, $this->getGuzzleOptions(false));
+
+            $endTime = microtime(true);
+            $duration = round(($endTime - $startTime) * 1000); // milliseconds
+
+            // Parse response
+            $responseBody = json_decode($response->getBody()->getContents(), true);
+
+            // Convert to PSR-7 standard Response
+            $psrResponse = ResponseHandler::convertConverseToPsrResponse(
+                $responseBody['output'] ?? [],
+                $responseBody['usage'] ?? [],
+                $chatRequest->getModel()
+            );
+            $chatCompletionResponse = new ChatCompletionResponse($psrResponse, $this->logger);
+
+            $performanceFlag = LogUtil::getPerformanceFlag($duration);
+
+            $this->logger?->info('AwsBedrockConverseCustomResponse', LoggingConfigHelper::filterAndFormatLogData([
+                'request_id' => $requestId,
+                'model_id' => $modelId,
+                'duration_ms' => $duration,
+                'usage' => $chatCompletionResponse->getUsage()->toArray(),
+                'performance_flag' => $performanceFlag,
+            ], $this->requestOptions));
+
+            EventUtil::dispatch(new AfterChatCompletionsEvent($chatRequest, $chatCompletionResponse, $duration));
+
+            return $chatCompletionResponse;
+        } catch (GuzzleException $e) {
+            throw $this->convertGuzzleException($e);
+        } catch (Throwable $e) {
+            throw $this->convertException($e);
+        }
+    }
+
+    /**
+     * Chat completions (streaming).
+     */
+    public function chatCompletionsStream(ChatCompletionRequest $chatRequest): ChatCompletionStreamResponse
+    {
+        $chatRequest->validate();
+        $startTime = microtime(true);
+
+        try {
+            // Get model ID and convert request parameters
+            $modelId = $chatRequest->getModel();
+            $requestBody = $this->prepareConverseRequestBody($chatRequest);
+            $requestId = $this->generateRequestId();
+
+            // Build streaming URL with URL-encoded model ID to support ARNs with special characters
+            $encodedModelId = rawurlencode($modelId);
+            $url = "{$this->endpoint}/model/{$encodedModelId}/converse-stream";
+
+            // Convert binary bytes to base64 for JSON encoding
+            $requestBodyForJson = $this->prepareBytesForJsonEncoding($requestBody);
+
+            // Encode body to JSON string (save it before signing, as signing will consume the stream)
+            $bodyJson = json_encode($requestBodyForJson, JSON_UNESCAPED_UNICODE);
+
+            // Create PSR-7 request for streaming
+            $request = new Request(
+                'POST',
+                $url,
+                [
+                    'Content-Type' => 'application/json',
+                    'Accept' => 'application/vnd.amazon.eventstream',
+                ],
+                $bodyJson
+            );
+
+            // Sign the request
+            $signedRequest = $this->signer->signRequest($request);
+
+            $this->logger?->info('AwsBedrockConverseCustomStreamRequest', LoggingConfigHelper::filterAndFormatLogData([
+                'request_id' => $requestId,
+                'model_id' => $modelId,
+                'url' => $url,
+                'body' => $requestBody,
+                'token_estimate' => $chatRequest->getTokenEstimateDetail(),
+            ], $this->requestOptions));
+
+            // Send streaming request using OdinSimpleCurl in coroutine environment or Guzzle otherwise
+            if (Coroutine::id()) {
+                // In coroutine environment, use OdinSimpleCurl
+                // Extract headers from signed request
+                $headers = array_map(function ($values) {
+                    return implode(', ', $values);
+                }, $signedRequest->getHeaders());
+
+                // Prepare options for OdinSimpleCurl
+                // Use saved $bodyJson instead of reading from stream (which was consumed during signing)
+                $options = [
+                    'headers' => $headers,
+                    'body' => $bodyJson,  // Use pre-encoded and saved body for signature compatibility
+                    'connect_timeout' => $this->requestOptions->getConnectionTimeout(),
+                    'stream_chunk' => $this->requestOptions->getStreamChunkTimeout(),
+                    'header_timeout' => $this->requestOptions->getStreamFirstChunkTimeout(),
+                    'verify' => true,
+                ];
+
+                if ($proxy = $this->requestOptions->getProxy()) {
+                    $options['proxy'] = $proxy;
+                }
+
+                // Use skipContentTypeCheck=true for AWS EventStream (not SSE format)
+                $response = OdinSimpleCurl::send($url, $options, true);
+            } else {
+                // In non-coroutine environment, use Guzzle
+                $response = $this->client->send($signedRequest, $this->getGuzzleOptions(true));
+            }
+
+            $firstResponseTime = microtime(true);
+            $firstResponseDuration = round(($firstResponseTime - $startTime) * 1000);
+
+            $performanceFlag = LogUtil::getPerformanceFlag($firstResponseDuration);
+            $this->logger?->info('AwsBedrockConverseCustomStreamFirstResponse', LoggingConfigHelper::filterAndFormatLogData([
+                'request_id' => $requestId,
+                'model_id' => $modelId,
+                'first_response_ms' => $firstResponseDuration,
+                'response_headers' => $response->getHeaders(),
+                'performance_flag' => $performanceFlag,
+            ], $this->requestOptions));
+
+            $streamConverter = new CustomConverseStreamConverter(
+                $response,
+                $this->logger,
+                $modelId,
+                $this->requestOptions->getStreamChunkTimeout()
+            );
+
+            $chatCompletionStreamResponse = new ChatCompletionStreamResponse(
+                logger: $this->logger,
+                streamIterator: $streamConverter
+            );
+            $chatCompletionStreamResponse->setAfterChatCompletionsStreamEvent(
+                new AfterChatCompletionsStreamEvent($chatRequest, $firstResponseDuration)
+            );
+
+            return $chatCompletionStreamResponse;
+        } catch (RuntimeException $e) {
+            // Handle exceptions from OdinSimpleCurl
+            throw $this->convertException($e);
+        } catch (GuzzleException $e) {
+            throw $this->convertGuzzleException($e);
+        } catch (Throwable $e) {
+            throw $this->convertException($e);
+        }
+    }
+
+    /**
+     * Embeddings (not implemented for Bedrock Converse).
+     */
+    public function embeddings(EmbeddingRequest $embeddingRequest): EmbeddingResponse
+    {
+        throw new RuntimeException('Embeddings are not supported by Bedrock Converse API');
+    }
+
+    /**
+     * Build AWS Bedrock endpoint URL.
+     */
+    protected function buildEndpoint(): string
+    {
+        return sprintf('https://bedrock-runtime.%s.amazonaws.com', $this->awsConfig->region);
+    }
+
+    /**
+     * Build chat completions URL (required by AbstractClient).
+     */
+    protected function buildChatCompletionsUrl(): string
+    {
+        return $this->endpoint;
+    }
+
+    /**
+     * Build embeddings URL (required by AbstractClient).
+     */
+    protected function buildEmbeddingsUrl(): string
+    {
+        return $this->endpoint;
+    }
+
+    /**
+     * Build completions URL (required by AbstractClient).
+     */
+    protected function buildCompletionsUrl(): string
+    {
+        return $this->endpoint;
+    }
+
+    /**
+     * Get auth headers (not used as we use AWS Signature V4).
+     */
+    protected function getAuthHeaders(): array
+    {
+        return [];
+    }
+
+    /**
+     * Create converter for message transformation.
+     */
+    protected function createConverter(): ConverterInterface
+    {
+        return new ConverseConverter();
+    }
+
+    /**
+     * Get Guzzle options for request.
+     */
+    protected function getGuzzleOptions(bool $stream = false): array
+    {
+        // For streaming requests, use first chunk timeout to fail fast on network issues
+        // For non-streaming requests, use total timeout
+        $timeout = $stream
+            ? $this->requestOptions->getStreamFirstChunkTimeout()
+            : $this->requestOptions->getTotalTimeout();
+
+        $options = [
+            'timeout' => $timeout,
+            'connect_timeout' => $this->requestOptions->getConnectionTimeout(),  // Connection timeout
+            'http_errors' => true,  // Enable exceptions for 4xx and 5xx responses
+        ];
+
+        if ($stream) {
+            $options['stream'] = true;
+        }
+
+        if ($proxy = $this->requestOptions->getProxy()) {
+            $options['proxy'] = $proxy;
+        }
+
+        // SSL/TLS options - verify certificates by default
+        // Set verify to false only in development if needed (not recommended)
+        $options['verify'] = true;
+
+        // Add debug option if needed (helps troubleshoot connection issues)
+        // $options['debug'] = true;  // Uncomment to see detailed debug output
+
+        return $options;
+    }
+
+    /**
+     * Convert Guzzle exception to LLM exception.
+     */
+    protected function convertGuzzleException(GuzzleException $e): LLMException
+    {
+        $message = $e->getMessage();
+        $code = (int) $e->getCode();
+
+        // Get response body if available (for BadResponseException)
+        if ($e instanceof BadResponseException) {
+            $response = $e->getResponse();
+            $statusCode = $response->getStatusCode();
+            $responseBody = (string) $response->getBody();
+
+            try {
+                $jsonBody = json_decode($responseBody, true);
+                if (isset($jsonBody['message'])) {
+                    $message = $jsonBody['message'];
+                }
+            } catch (Throwable $jsonException) {
+                // Ignore JSON parse errors
+            }
+
+            // Map HTTP status codes to LLM exceptions
+            if ($statusCode === 429) {
+                return new LLMRateLimitException($message, $e, $statusCode);
+            }
+
+            if ($statusCode >= 400 && $statusCode < 500) {
+                return new LLMInvalidRequestException($message, $e, $statusCode);
+            }
+
+            if ($statusCode >= 500) {
+                return new LLMApiException($message, $statusCode, $e, 0, $statusCode);
+            }
+        }
+
+        // Check for timeout
+        if (str_contains($message, 'timed out')) {
+            return new LLMReadTimeoutException($message, $e);
+        }
+
+        return new LLMApiException($message, $code, $e);
+    }
+
+    /**
+     * Convert general exception to LLM exception.
+     */
+    protected function convertException(Throwable $exception, array $context = []): LLMException
+    {
+        $message = $exception->getMessage();
+        $code = (int) $exception->getCode();
+
+        // Check for timeout-related errors (fallback, as OdinSimpleCurl should handle most cases)
+        if (str_contains($message, 'timed out') || str_contains($message, 'timeout')) {
+            return new LLMReadTimeoutException($message, $exception);
+        }
+
+        // Check for rate limit
+        if (str_contains($message, 'rate limit') || str_contains($message, 'throttled')) {
+            return new LLMRateLimitException($message, $exception, $code);
+        }
+
+        // Check for client errors
+        if ($code >= 400 && $code < 500) {
+            return new LLMInvalidRequestException($message, $exception, $code);
+        }
+
+        // Check for server errors
+        if ($code >= 500) {
+            return new LLMApiException($message, $code, $exception, 0, $code);
+        }
+
+        // Default to generic API exception
+        return new LLMApiException($message, $code, $exception);
+    }
+
+    /**
+     * Check if auto cache is enabled.
+     */
+    protected function isAutoCache(): bool
+    {
+        return $this->awsConfig->isAutoCache();
+    }
+
+    /**
+     * Get auto cache configuration.
+     */
+    protected function getAutoCacheConfig(): AutoCacheConfig
+    {
+        return $this->awsConfig->getAutoCacheConfig();
+    }
+
+    /**
+     * Prepare bytes fields for JSON encoding by converting binary data to base64.
+     * This is necessary because AWS Bedrock API expects base64-encoded strings for bytes fields,
+     * while the converter returns binary data (for AWS SDK compatibility).
+     *
+     * @param array $data Request body data
+     * @return array Data with bytes fields converted to base64
+     */
+    private function prepareBytesForJsonEncoding(array $data): array
+    {
+        foreach ($data as $key => $value) {
+            if (is_array($value)) {
+                // Recursively process nested arrays
+                $data[$key] = $this->prepareBytesForJsonEncoding($value);
+            } elseif ($key === 'bytes' && is_string($value)) {
+                // Convert binary bytes to base64 string for JSON encoding
+                // Check if it's already base64 (printable ASCII) or binary
+                if (! ctype_print($value) || strlen($value) !== strlen(utf8_decode($value))) {
+                    $data[$key] = base64_encode($value);
+                }
+            }
+        }
+
+        return $data;
+    }
+
+    /**
+     * Prepare Converse API request body.
+     */
+    private function prepareConverseRequestBody(ChatCompletionRequest $chatRequest): array
+    {
+        if ($this->isAutoCache()) {
+            $cachePointManager = new AwsBedrockCachePointManager($this->getAutoCacheConfig());
+            $cachePointManager->configureCachePoints($chatRequest);
+        }
+
+        $messages = [];
+        $systemMessage = '';
+        $originalMessages = $chatRequest->getMessages();
+
+        // Process messages with tool call grouping logic
+        $processedMessages = $this->processMessagesWithToolGrouping($originalMessages);
+
+        foreach ($processedMessages as $message) {
+            if (! $message instanceof MessageInterface) {
+                continue;
+            }
+            match (true) {
+                $message instanceof SystemMessage => $systemMessage = $this->converter->convertSystemMessage($message),
+                $message instanceof ToolMessage => $messages[] = $this->converter->convertToolMessage($message),
+                $message instanceof AssistantMessage => $messages[] = $this->converter->convertAssistantMessage($message),
+                $message instanceof UserMessage => $messages[] = $this->converter->convertUserMessage($message),
+            };
+        }
+
+        // Get request parameters
+        $maxTokens = $chatRequest->getMaxTokens();
+        $temperature = $chatRequest->getTemperature();
+        $stop = $chatRequest->getStop();
+
+        // Prepare request body - conform to Converse API format
+        $requestBody = [
+            'messages' => $messages,
+        ];
+
+        // Add system prompt
+        if (! empty($systemMessage)) {
+            $requestBody['system'] = $systemMessage;
+        }
+
+        // Add inference configuration
+        $inferenceConfig = [
+            'temperature' => $temperature,
+        ];
+
+        // Add max tokens
+        if ($maxTokens > 0) {
+            $inferenceConfig['maxTokens'] = $maxTokens;
+        }
+
+        // Add inference config if not empty
+        if (! empty($inferenceConfig)) {
+            $requestBody['inferenceConfig'] = $inferenceConfig;
+        }
+
+        // Add stop sequences
+        if (! empty($stop)) {
+            $requestBody['additionalModelRequestFields'] = [
+                'stop_sequences' => $stop,
+            ];
+        }
+
+        if (! empty($chatRequest->getThinking())) {
+            $requestBody['thinking'] = $chatRequest->getThinking();
+        }
+
+        // Add tool support
+        if (! empty($chatRequest->getTools())) {
+            $tools = $this->converter->convertTools($chatRequest->getTools(), $chatRequest->isToolsCache());
+            if (! empty($tools)) {
+                $requestBody['toolConfig'] = [
+                    'tools' => $tools,
+                ];
+            }
+        }
+
+        return $requestBody;
+    }
+
+    /**
+     * Process messages and group tool results for multi-tool calls.
+     *
+     * When an AssistantMessage contains multiple tool calls, Claude's Converse API
+     * requires all corresponding tool results to be in the same user message.
+     *
+     * @param array $messages Original messages array
+     * @return array Processed messages with grouped tool results
+     */
+    private function processMessagesWithToolGrouping(array $messages): array
+    {
+        $processedMessages = [];
+        $messageCount = count($messages);
+
+        for ($i = 0; $i < $messageCount; ++$i) {
+            $message = $messages[$i];
+
+            // Add non-assistant messages as-is
+            if (! $message instanceof AssistantMessage) {
+                $processedMessages[] = $message;
+                continue;
+            }
+
+            // Add the assistant message
+            $processedMessages[] = $message;
+
+            // Check if this assistant message has multiple tool calls
+            if (! $message->hasToolCalls() || count($message->getToolCalls()) <= 1) {
+                continue;
+            }
+
+            // Collect the expected tool call IDs
+            $expectedToolIds = [];
+            foreach ($message->getToolCalls() as $toolCall) {
+                $expectedToolIds[] = $toolCall->getId();
+            }
+
+            // Look for consecutive tool messages that match the expected tool IDs
+            $collectedToolMessages = [];
+            $j = $i + 1;
+
+            while ($j < $messageCount && $messages[$j] instanceof ToolMessage) {
+                $toolMessage = $messages[$j];
+                $toolCallId = $toolMessage->getToolCallId();
+
+                // Check if this tool message belongs to the current assistant message
+                if (in_array($toolCallId, $expectedToolIds)) {
+                    $collectedToolMessages[] = $toolMessage;
+                    ++$j;
+                } else {
+                    // This tool message doesn't belong to current assistant message
+                    break;
+                }
+            }
+
+            // If we found multiple tool messages, merge them
+            if (count($collectedToolMessages) > 1) {
+                $mergedToolMessage = $this->createMergedToolMessage($collectedToolMessages);
+                $processedMessages[] = $mergedToolMessage;
+                // Skip the original tool messages since we've merged them
+                $i = $j - 1;
+            }
+        }
+
+        return $processedMessages;
+    }
+
+    /**
+     * Create a merged tool message from multiple tool messages.
+     *
+     * @param array $toolMessages Array of ToolMessage instances
+     * @return ToolMessage Merged tool message
+     */
+    private function createMergedToolMessage(array $toolMessages): ToolMessage
+    {
+        return new MergedToolMessage($toolMessages);
+    }
+}
diff --git a/src/Api/Providers/AwsBedrock/CustomConverseStreamConverter.php b/src/Api/Providers/AwsBedrock/CustomConverseStreamConverter.php
new file mode 100644
index 0000000..b7e068d
--- /dev/null
+++ b/src/Api/Providers/AwsBedrock/CustomConverseStreamConverter.php
@@ -0,0 +1,252 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\AwsBedrock;
+
+use Generator;
+use IteratorAggregate;
+use Psr\Http\Message\ResponseInterface;
+use Psr\Log\LoggerInterface;
+use RuntimeException;
+
+/**
+ * Custom Converse Stream Converter.
+ *
+ * Converts AWS Bedrock Converse API streaming responses to OpenAI-compatible format
+ * WITHOUT depending on AWS SDK.
+ */
+class CustomConverseStreamConverter implements IteratorAggregate
+{
+    protected ?LoggerInterface $logger;
+
+    private AwsEventStreamParser $parser;
+
+    private ?string $messageId = null;
+
+    private string $model = '';
+
+    /**
+     * Constructor.
+     *
+     * @param ResponseInterface $response Guzzle HTTP response with event stream body
+     * @param null|LoggerInterface $logger Logger instance
+     * @param string $model Model ID
+     * @param float $chunkTimeout Maximum time to wait between chunks (seconds)
+     */
+    public function __construct(ResponseInterface $response, ?LoggerInterface $logger = null, string $model = '', float $chunkTimeout = 30.0)
+    {
+        // Detach the stream resource from the StreamInterface wrapper
+        // This allows direct access to the underlying resource for non-blocking I/O
+        $stream = $response->getBody()->detach();
+        if (! is_resource($stream)) {
+            throw new RuntimeException('Failed to detach stream resource from response body');
+        }
+
+        $this->parser = new AwsEventStreamParser($stream, $chunkTimeout);
+        $this->messageId = $response->getHeaderLine('x-amzn-requestid') ?: uniqid('bedrock-');
+        $this->model = $model;
+        $this->logger = $logger;
+    }
+
+    /**
+     * Get iterator to process stream events.
+     */
+    public function getIterator(): Generator
+    {
+        $created = time();
+        $isFirstChunk = true;
+        $toolCallIndex = 0;
+        $chunkIndex = 0;
+        $firstChunks = [];
+        $lastChunks = [];
+        $maxChunksToLog = 5;
+
+        try {
+            foreach ($this->parser as $message) {
+                if (empty($message) || ! isset($message['payload'])) {
+                    continue;
+                }
+
+                // Parse JSON payload
+                $chunk = json_decode($message['payload'], true);
+                if (empty($chunk) || ! is_array($chunk)) {
+                    continue;
+                }
+
+                $timestamp = microtime(true);
+                $chunkWithTime = [
+                    'index' => $chunkIndex,
+                    'timestamp' => $timestamp,
+                    'datetime' => date('Y-m-d H:i:s', (int) $timestamp) . '.' . substr((string) fmod($timestamp, 1), 2, 6),
+                    'data' => $chunk,
+                ];
+
+                // Collect first 5 chunks
+                if ($chunkIndex < $maxChunksToLog) {
+                    $firstChunks[] = $chunkWithTime;
+                }
+
+                // Keep last 5 chunks
+                if (count($lastChunks) >= $maxChunksToLog) {
+                    array_shift($lastChunks);
+                }
+                $lastChunks[] = $chunkWithTime;
+
+                ++$chunkIndex;
+
+                // Convert to OpenAI format
+                $openAiChunk = $this->convertChunkToOpenAiFormat($chunk, $created, $isFirstChunk, $toolCallIndex);
+
+                if ($openAiChunk !== null) {
+                    $isFirstChunk = false;
+                    // Yield raw data without SSE format (ChatCompletionStreamResponse will handle SSE formatting)
+                    yield $openAiChunk;
+                }
+            }
+
+            // Send [DONE] signal
+            yield '[DONE]';
+        } finally {
+            // Log streaming summary (always executed, even if generator is terminated early)
+            $this->logger?->info('AwsBedrockConverseCustomStreamSummary', [
+                'message_id' => $this->messageId,
+                'model' => $this->model,
+                'total_chunks' => $chunkIndex,
+                'first_chunks' => $firstChunks,
+                'last_chunks' => $lastChunks,
+            ]);
+        }
+    }
+
+    /**
+     * Convert AWS Bedrock chunk to OpenAI format.
+     *
+     * @param array $chunk AWS Bedrock event chunk
+     * @param int $created Timestamp
+     * @param bool $isFirstChunk Whether this is the first chunk
+     * @param int $toolCallIndex Tool call index counter
+     * @return null|array OpenAI formatted chunk or null if should skip
+     */
+    private function convertChunkToOpenAiFormat(array $chunk, int $created, bool $isFirstChunk, int &$toolCallIndex): ?array
+    {
+        $openAiChunk = [
+            'id' => $this->messageId,
+            'object' => 'chat.completion.chunk',
+            'created' => $created,
+            'model' => $this->model,
+            'choices' => [],
+        ];
+
+        $delta = [];
+        $finishReason = null;
+
+        // Handle different event types based on the actual chunk structure
+        // AWS Bedrock sends event type in headers, and the payload contains the data directly
+        if (isset($chunk['role'])) {
+            // Message start event: {"role":"assistant", "p":"..."}
+            $delta['role'] = 'assistant';
+            $finishReason = null;
+        } elseif (isset($chunk['start'])) {
+            // Content block start: {"start":{"toolUse":{...}}, "contentBlockIndex":0, "p":"..."}
+            if (isset($chunk['start']['toolUse'])) {
+                // Tool use start
+                $toolUse = $chunk['start']['toolUse'];
+                $delta['tool_calls'] = [[
+                    'index' => $toolCallIndex,
+                    'id' => $toolUse['toolUseId'] ?? uniqid('call_'),
+                    'type' => 'function',
+                    'function' => [
+                        'name' => $toolUse['name'] ?? '',
+                        'arguments' => '',
+                    ],
+                ]];
+                ++$toolCallIndex;
+            }
+        } elseif (isset($chunk['delta'], $chunk['contentBlockIndex'])) {
+            // Content delta: {"contentBlockIndex":0, "delta":{"text":"..."}, "p":"..."}
+            if (isset($chunk['delta']['text'])) {
+                // Text delta
+                $delta['content'] = $chunk['delta']['text'];
+            } elseif (isset($chunk['delta']['toolUse'])) {
+                // Tool use input delta
+                $toolUse = $chunk['delta']['toolUse'];
+                $delta['tool_calls'] = [[
+                    'index' => $toolCallIndex - 1,
+                    'function' => [
+                        'arguments' => $toolUse['input'] ?? '',
+                    ],
+                ]];
+            }
+        } elseif (isset($chunk['contentBlockIndex']) && ! isset($chunk['delta'])) {
+            // Content block stop: {"contentBlockIndex":0, "p":"..."}
+            return null;
+        } elseif (isset($chunk['stopReason'])) {
+            // Message stop: {"stopReason":"end_turn", "p":"..."}
+            $stopReason = $chunk['stopReason'] ?? 'stop';
+            $finishReason = match ($stopReason) {
+                'end_turn' => 'stop',
+                'tool_use' => 'tool_calls',
+                'max_tokens' => 'length',
+                'stop_sequence' => 'stop',
+                default => $stopReason,
+            };
+        } elseif (isset($chunk['usage'])) {
+            // Metadata event with usage: {"metrics":{...}, "usage":{...}, "p":"..."}
+            // Match the usage processing in ResponseHandler::convertConverseToPsrResponse
+            $usage = $chunk['usage'];
+            $inputTokens = $usage['inputTokens'] ?? 0;
+            $cacheReadTokens = $usage['cacheReadInputTokens'] ?? 0;
+            $cacheWriteTokens = $usage['cacheWriteInputTokens'] ?? 0;
+
+            // 按照 OpenAI 的方式：promptTokens = 总处理的提示tokens（包括缓存）
+            $promptTokens = $inputTokens + $cacheReadTokens + $cacheWriteTokens;
+            $completionTokens = $usage['outputTokens'] ?? 0;
+            $totalTokens = $promptTokens + $completionTokens;
+
+            $openAiChunk['usage'] = [
+                'prompt_tokens' => $promptTokens,
+                'completion_tokens' => $completionTokens,
+                'total_tokens' => $totalTokens,
+                'prompt_tokens_details' => [
+                    'cache_write_input_tokens' => $cacheWriteTokens,
+                    'cache_read_input_tokens' => $cacheReadTokens,
+                    // 兼容 OpenAI 格式：cached_tokens表示缓存命中
+                    'audio_tokens' => 0,
+                    'cached_tokens' => $cacheReadTokens,
+                ],
+                'completion_tokens_details' => [
+                    'reasoning_tokens' => 0,
+                ],
+            ];
+            return $openAiChunk;
+        } elseif (isset($chunk['metrics'])) {
+            // Metadata without usage - skip
+            return null;
+        }
+
+        // Build choice
+        $choice = [
+            'index' => 0,
+            'delta' => $delta,
+        ];
+
+        if ($finishReason !== null) {
+            $choice['finish_reason'] = $finishReason;
+        } else {
+            $choice['finish_reason'] = null;
+        }
+
+        $openAiChunk['choices'][] = $choice;
+
+        return $openAiChunk;
+    }
+}
diff --git a/src/Api/Providers/AwsBedrock/ResponseHandler.php b/src/Api/Providers/AwsBedrock/ResponseHandler.php
index 25cf64b..cf7f4c4 100644
--- a/src/Api/Providers/AwsBedrock/ResponseHandler.php
+++ b/src/Api/Providers/AwsBedrock/ResponseHandler.php
@@ -81,7 +81,7 @@ public static function convertToPsrResponse(array $responseBody, string $model):
         // 创建使用量对象（如果有）
         if (isset($responseBody['usage'])) {
             $usage = Usage::fromArray([
-                'prompt_tokens' => $responseBody['usage']['input_tokens'] ?? 0,
+                'prompt_tokens' => $responseBody['usage']['prompt_tokens'] ?? $responseBody['usage']['input_tokens'] ?? 0,
                 'completion_tokens' => $responseBody['usage']['output_tokens'] ?? 0,
                 'total_tokens' => $responseBody['usage']['total_tokens'] ?? 0,
                 'prompt_tokens_details' => $responseBody['usage']['prompt_tokens_details'] ?? [],
@@ -115,17 +115,30 @@ public static function convertToPsrResponse(array $responseBody, string $model):
 
     public static function convertConverseToPsrResponse(array $output, array $usage, string $model): ResponseInterface
     {
+        // 转换Claude的token统计方式为Qwen格式
+        // Claude: inputTokens=新输入, cacheReadInputTokens=缓存命中
+        // OpenAI: promptTokens=总输入(包括缓存), cachedTokens=缓存命中
+        $inputTokens = $usage['inputTokens'] ?? 0;
+        $cacheReadTokens = $usage['cacheReadInputTokens'] ?? 0;
+        $cacheWriteTokens = $usage['cacheWriteInputTokens'] ?? 0;
+
+        // 按照 OpenAI 的方式：promptTokens = 总处理的提示tokens（包括缓存）
+        $promptTokens = $inputTokens + $cacheReadTokens + $cacheWriteTokens;
+        $completionTokens = $usage['outputTokens'] ?? 0;
+        $totalTokens = $promptTokens + $completionTokens;
+
         $responseBody = [
             'usage' => [
-                'input_tokens' => $usage['inputTokens'] ?? 0,
-                'output_tokens' => $usage['outputTokens'] ?? 0,
-                'total_tokens' => $usage['totalTokens'] ?? 0,
+                'prompt_tokens' => $promptTokens,
+                'input_tokens' => $inputTokens,
+                'output_tokens' => $completionTokens,
+                'total_tokens' => $totalTokens,
                 'prompt_tokens_details' => [
-                    'cache_write_input_tokens' => $usage['cacheWriteInputTokens'] ?? 0,
-                    'cache_read_input_tokens' => $usage['cacheReadInputTokens'] ?? 0,
-                    // 兼容旧参数
+                    'cache_write_input_tokens' => $cacheWriteTokens,
+                    'cache_read_input_tokens' => $cacheReadTokens,
+                    // 兼容 OpenAI 格式：cached_tokens表示缓存命中
                     'audio_tokens' => 0,
-                    'cached_tokens' => $usage['cacheWriteInputTokens'] ?? 0,
+                    'cached_tokens' => $cacheReadTokens,
                 ],
                 'completion_tokens_details' => [
                     'reasoning_tokens' => 0,
diff --git a/src/Api/Providers/DashScope/Cache/DashScopeAutoCacheConfig.php b/src/Api/Providers/DashScope/Cache/DashScopeAutoCacheConfig.php
new file mode 100644
index 0000000..d03b49f
--- /dev/null
+++ b/src/Api/Providers/DashScope/Cache/DashScopeAutoCacheConfig.php
@@ -0,0 +1,65 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope\Cache;
+
+/**
+ * DashScope 自动缓存配置
+ * 参考 AWS Bedrock AutoCacheConfig 实现.
+ */
+class DashScopeAutoCacheConfig
+{
+    /**
+     * 缓存点最小生效 tokens 阈值
+     */
+    private int $minCacheTokens;
+
+    /**
+     * 支持的模型列表.
+     */
+    private array $supportedModels;
+
+    /**
+     * 是否启用自动缓存.
+     */
+    private bool $autoEnabled;
+
+    public function __construct(
+        int $minCacheTokens = 1024,
+        array $supportedModels = ['qwen3-coder-plus'],
+        bool $autoEnabled = false
+    ) {
+        $this->minCacheTokens = $minCacheTokens;
+        $this->supportedModels = $supportedModels;
+        $this->autoEnabled = $autoEnabled;
+    }
+
+    public function getMinCacheTokens(): int
+    {
+        return $this->minCacheTokens;
+    }
+
+    public function getSupportedModels(): array
+    {
+        return $this->supportedModels;
+    }
+
+    public function isAutoEnabled(): bool
+    {
+        return $this->autoEnabled;
+    }
+
+    public function isModelSupported(string $model): bool
+    {
+        return in_array($model, $this->supportedModels);
+    }
+}
diff --git a/src/Api/Providers/DashScope/Cache/DashScopeCachePointManager.php b/src/Api/Providers/DashScope/Cache/DashScopeCachePointManager.php
new file mode 100644
index 0000000..e957128
--- /dev/null
+++ b/src/Api/Providers/DashScope/Cache/DashScopeCachePointManager.php
@@ -0,0 +1,61 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope\Cache;
+
+use Hyperf\Odin\Api\Providers\DashScope\Cache\Strategy\AutoCacheStrategy;
+use Hyperf\Odin\Api\Providers\DashScope\Cache\Strategy\DashScopeCacheStrategyInterface;
+use Hyperf\Odin\Api\Providers\DashScope\Cache\Strategy\ManualCacheStrategy;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+
+/**
+ * DashScope 缓存点管理器
+ * 参考 AwsBedrockCachePointManager 实现.
+ */
+class DashScopeCachePointManager
+{
+    private DashScopeAutoCacheConfig $autoCacheConfig;
+
+    public function __construct(DashScopeAutoCacheConfig $autoCacheConfig)
+    {
+        $this->autoCacheConfig = $autoCacheConfig;
+    }
+
+    /**
+     * 配置缓存点.
+     *
+     * @param ChatCompletionRequest $request 需要配置缓存点的请求对象（会直接修改此对象）
+     */
+    public function configureCachePoints(ChatCompletionRequest $request): void
+    {
+        // 1. 估算 Token（使用 ChatCompletionRequest 内的方法）
+        $request->calculateTokenEstimates();
+
+        // 2. 选择策略
+        $strategy = $this->selectStrategy();
+
+        // 3. 应用策略
+        $strategy->apply($this->autoCacheConfig, $request);
+    }
+
+    /**
+     * 选择缓存策略.
+     */
+    private function selectStrategy(): DashScopeCacheStrategyInterface
+    {
+        if ($this->autoCacheConfig->isAutoEnabled()) {
+            return new AutoCacheStrategy();
+        }
+
+        return new ManualCacheStrategy();
+    }
+}
diff --git a/src/Api/Providers/DashScope/Cache/Strategy/AutoCacheStrategy.php b/src/Api/Providers/DashScope/Cache/Strategy/AutoCacheStrategy.php
new file mode 100644
index 0000000..f3c3441
--- /dev/null
+++ b/src/Api/Providers/DashScope/Cache/Strategy/AutoCacheStrategy.php
@@ -0,0 +1,52 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope\Cache\Strategy;
+
+use Hyperf\Odin\Api\Providers\DashScope\Cache\DashScopeAutoCacheConfig;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Message\CachePoint;
+
+/**
+ * DashScope 自动缓存策略
+ * 自动为最后一条消息添加缓存点.
+ */
+class AutoCacheStrategy implements DashScopeCacheStrategyInterface
+{
+    public function apply(DashScopeAutoCacheConfig $config, ChatCompletionRequest $request): void
+    {
+        // 1. 检查模型支持
+        if (! $config->isModelSupported($request->getModel())) {
+            return;
+        }
+
+        // 2. 检查 token 数量
+        $totalTokens = $request->getTotalTokenEstimate();
+        if ($totalTokens < $config->getMinCacheTokens()) {
+            return;
+        }
+
+        // 3. 清除所有手动设置的缓存点，并为最后一条消息自动添加缓存点
+        $messages = $request->getMessages();
+        if (! empty($messages)) {
+            // 清除所有消息的手动缓存点
+            foreach ($messages as $message) {
+                $message->setCachePoint(null);
+            }
+
+            // 为最后一条消息设置自动缓存点
+            $lastMessage = end($messages);
+            $cachePoint = new CachePoint('ephemeral');
+            $lastMessage->setCachePoint($cachePoint);
+        }
+    }
+}
diff --git a/src/Api/Providers/DashScope/Cache/Strategy/DashScopeCacheStrategyInterface.php b/src/Api/Providers/DashScope/Cache/Strategy/DashScopeCacheStrategyInterface.php
new file mode 100644
index 0000000..b7d4fb9
--- /dev/null
+++ b/src/Api/Providers/DashScope/Cache/Strategy/DashScopeCacheStrategyInterface.php
@@ -0,0 +1,21 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope\Cache\Strategy;
+
+use Hyperf\Odin\Api\Providers\DashScope\Cache\DashScopeAutoCacheConfig;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+
+interface DashScopeCacheStrategyInterface
+{
+    public function apply(DashScopeAutoCacheConfig $config, ChatCompletionRequest $request): void;
+}
diff --git a/src/Api/Providers/DashScope/Cache/Strategy/ManualCacheStrategy.php b/src/Api/Providers/DashScope/Cache/Strategy/ManualCacheStrategy.php
new file mode 100644
index 0000000..783e1b0
--- /dev/null
+++ b/src/Api/Providers/DashScope/Cache/Strategy/ManualCacheStrategy.php
@@ -0,0 +1,64 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope\Cache\Strategy;
+
+use Hyperf\Odin\Api\Providers\DashScope\Cache\DashScopeAutoCacheConfig;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+
+/**
+ * DashScope 手动缓存策略
+ * 验证用户手动设置的缓存点，只保留最后一个满足条件的缓存点.
+ */
+class ManualCacheStrategy implements DashScopeCacheStrategyInterface
+{
+    public function apply(DashScopeAutoCacheConfig $config, ChatCompletionRequest $request): void
+    {
+        $messages = $request->getMessages();
+        $validCachePointIndex = null;
+
+        // 第一轮：找到最后一个满足条件的缓存点
+        foreach ($messages as $index => $message) {
+            $cachePoint = $message->getCachePoint();
+            if ($cachePoint !== null && $cachePoint->getType() === 'ephemeral') {
+                $isValid = true;
+
+                // 检查模型支持
+                if (! $config->isModelSupported($request->getModel())) {
+                    $isValid = false;
+                }
+
+                // 检查 token 数量
+                $messageTokens = $message->getTokenEstimate() ?? 0;
+                if ($messageTokens < $config->getMinCacheTokens()) {
+                    $isValid = false;
+                }
+
+                // 如果当前缓存点有效，记录其位置
+                if ($isValid) {
+                    $validCachePointIndex = $index;
+                }
+            }
+        }
+
+        // 第二轮：清除所有缓存点，只保留最后一个有效的
+        foreach ($messages as $index => $message) {
+            $cachePoint = $message->getCachePoint();
+            if ($cachePoint !== null && $cachePoint->getType() === 'ephemeral') {
+                // 只保留最后一个有效的缓存点，其他都移除
+                if ($index !== $validCachePointIndex) {
+                    $message->setCachePoint(null);
+                }
+            }
+        }
+    }
+}
diff --git a/src/Api/Providers/DashScope/Client.php b/src/Api/Providers/DashScope/Client.php
new file mode 100644
index 0000000..966b4ce
--- /dev/null
+++ b/src/Api/Providers/DashScope/Client.php
@@ -0,0 +1,255 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope;
+
+use GuzzleHttp\RequestOptions;
+use Hyperf\Engine\Coroutine;
+use Hyperf\Odin\Api\Providers\AbstractClient;
+use Hyperf\Odin\Api\Providers\DashScope\Cache\DashScopeCachePointManager;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Api\Response\ChatCompletionResponse;
+use Hyperf\Odin\Api\Response\ChatCompletionStreamResponse;
+use Hyperf\Odin\Api\Transport\OdinSimpleCurl;
+use Hyperf\Odin\Api\Transport\SSEClient;
+use Hyperf\Odin\Event\AfterChatCompletionsEvent;
+use Hyperf\Odin\Event\AfterChatCompletionsStreamEvent;
+use Hyperf\Odin\Utils\EventUtil;
+use Psr\Log\LoggerInterface;
+use Throwable;
+
+class Client extends AbstractClient
+{
+    private ?DashScopeCachePointManager $cachePointManager = null;
+
+    public function __construct(
+        DashScopeConfig $config,
+        ?ApiOptions $requestOptions = null,
+        ?LoggerInterface $logger = null
+    ) {
+        parent::__construct($config, $requestOptions, $logger);
+
+        // 总是初始化缓存点管理器
+        $this->cachePointManager = new DashScopeCachePointManager($config->getAutoCacheConfig());
+    }
+
+    public function chatCompletions(ChatCompletionRequest $chatRequest): ChatCompletionResponse
+    {
+        $chatRequest->validate();
+        $startTime = microtime(true);
+
+        try {
+            // 应用缓存点配置（自动或手动验证）
+            $this->cachePointManager->configureCachePoints($chatRequest);
+
+            $options = $chatRequest->createOptions();
+
+            // 处理缓存点转换并决定是否添加缓存控制头部
+            $hasCachePoints = $this->processCachePoints($chatRequest, $options);
+
+            $url = $this->buildChatCompletionsUrl();
+            $requestId = $this->addRequestIdToOptions($options);
+
+            // 根据是否有缓存点添加缓存控制头部
+            if ($hasCachePoints) {
+                $this->addCacheControlHeader($options);
+            }
+
+            $this->logRequest('DashScopeChatRequest', $url, $options, $requestId);
+
+            $response = $this->client->post($url, $options);
+            $duration = $this->calculateDuration($startTime);
+
+            // 转换DashScope响应格式为标准格式
+            $standardResponse = ResponseHandler::convertResponse($response);
+            $chatResponse = new ChatCompletionResponse($standardResponse, $this->logger);
+
+            $this->logResponse('DashScopeChatResponse', $requestId, $duration, [
+                'content' => $chatResponse->getContent(),
+                'usage' => $chatResponse->getUsage(),
+                'response_headers' => $response->getHeaders(),
+            ]);
+
+            EventUtil::dispatch(new AfterChatCompletionsEvent($chatRequest, $chatResponse, $duration));
+
+            return $chatResponse;
+        } catch (Throwable $e) {
+            $context = $this->createExceptionContext($url ?? '', $options ?? [], 'completions');
+
+            throw $this->convertException($e, $context);
+        }
+    }
+
+    public function chatCompletionsStream(ChatCompletionRequest $chatRequest): ChatCompletionStreamResponse
+    {
+        $chatRequest->validate();
+        $chatRequest->setStream(true);
+
+        $this->cachePointManager->configureCachePoints($chatRequest);
+
+        $options = $chatRequest->createOptions();
+        $hasCachePoints = $this->processCachePoints($chatRequest, $options);
+
+        $url = $this->buildChatCompletionsUrl();
+        $requestId = $this->addRequestIdToOptions($options);
+
+        // 根据是否有缓存点添加缓存控制头部
+        if ($hasCachePoints) {
+            $this->addCacheControlHeader($options);
+        }
+
+        $this->logRequest('DashScopeChatStreamRequest', $url, $options, $requestId);
+
+        $startTime = microtime(true);
+
+        try {
+            $options[RequestOptions::STREAM] = true;
+            $options[RequestOptions::TIMEOUT] = $this->requestOptions->getStreamFirstChunkTimeout();
+
+            if (Coroutine::id()) {
+                foreach ($this->getHeaders() as $key => $value) {
+                    $options['headers'][$key] = $value;
+                }
+                $options['connect_timeout'] = $this->requestOptions->getConnectionTimeout();
+                $options['stream_chunk'] = $this->requestOptions->getStreamChunkTimeout();
+                $options['header_timeout'] = $this->requestOptions->getStreamFirstChunkTimeout();
+                $response = OdinSimpleCurl::send($url, $options);
+            } else {
+                $response = $this->client->post($url, $options);
+            }
+
+            $firstResponseDuration = $this->calculateDuration($startTime);
+
+            $stream = $response->getBody()->detach();
+            $sseClient = new SSEClient(
+                $stream,
+                true,
+                $this->requestOptions->getTimeout(),
+                $this->logger
+            );
+
+            // 对于流式响应，ResponseHandler的转换会在SSE事件中处理
+            $chatCompletionStreamResponse = new ChatCompletionStreamResponse($response, $this->logger, $sseClient);
+            $chatCompletionStreamResponse->setAfterChatCompletionsStreamEvent(
+                new AfterChatCompletionsStreamEvent($chatRequest, $firstResponseDuration)
+            );
+
+            $this->logResponse('DashScopeChatStreamResponse', $requestId, $firstResponseDuration, [
+                'first_response_ms' => $firstResponseDuration,
+                'response_headers' => $response->getHeaders(),
+            ]);
+
+            return $chatCompletionStreamResponse;
+        } catch (Throwable $e) {
+            throw $this->convertException($e, $this->createExceptionContext($url, $options, 'stream'));
+        }
+    }
+
+    protected function getAuthHeaders(): array
+    {
+        $headers = [];
+        /** @var DashScopeConfig $config */
+        $config = $this->config;
+
+        if ($config->getApiKey()) {
+            $headers['Authorization'] = 'Bearer ' . $config->getApiKey();
+        }
+
+        return $headers;
+    }
+
+    /**
+     * 构建聊天补全API的URL.
+     */
+    protected function buildChatCompletionsUrl(): string
+    {
+        return $this->getBaseUri() . '/chat/completions';
+    }
+
+    /**
+     * 构建嵌入API的URL.
+     */
+    protected function buildEmbeddingsUrl(): string
+    {
+        return $this->getBaseUri() . '/embeddings';
+    }
+
+    /**
+     * 构建文本补全API的URL.
+     */
+    protected function buildCompletionsUrl(): string
+    {
+        return $this->getBaseUri() . '/completions';
+    }
+
+    /**
+     * 将 Odin 的 CachePoint 转换为 DashScope 的 cache_control 格式.
+     *
+     * @return bool 是否有缓存点被处理
+     */
+    private function processCachePoints(ChatCompletionRequest $request, array &$options): bool
+    {
+        if (! isset($options['json']['messages'])) {
+            return false;
+        }
+
+        $messages = $request->getMessages();
+        $jsonMessages = &$options['json']['messages'];
+        $hasCachePoints = false;
+
+        foreach ($messages as $index => $message) {
+            $cachePoint = $message->getCachePoint();
+
+            if ($cachePoint && $cachePoint->getType() === 'ephemeral') {
+                $this->addCacheControlToMessage($jsonMessages[$index]);
+                $hasCachePoints = true;
+            }
+        }
+
+        return $hasCachePoints;
+    }
+
+    /**
+     * 为消息添加 cache_control 标记.
+     */
+    private function addCacheControlToMessage(array &$message): void
+    {
+        if (is_string($message['content'])) {
+            $message['content'] = [
+                [
+                    'type' => 'text',
+                    'text' => $message['content'],
+                ],
+            ];
+        }
+
+        if (is_array($message['content']) && ! empty($message['content'])) {
+            $lastIndex = count($message['content']) - 1;
+            $message['content'][$lastIndex]['cache_control'] = [
+                'type' => 'ephemeral',
+            ];
+        }
+    }
+
+    /**
+     * 添加缓存控制头部.
+     */
+    private function addCacheControlHeader(array &$options): void
+    {
+        if (! isset($options['headers'])) {
+            $options['headers'] = [];
+        }
+
+        $options['headers']['X-DashScope-CacheControl'] = 'enable';
+    }
+}
diff --git a/src/Api/Providers/DashScope/DashScope.php b/src/Api/Providers/DashScope/DashScope.php
new file mode 100644
index 0000000..2e1a1d8
--- /dev/null
+++ b/src/Api/Providers/DashScope/DashScope.php
@@ -0,0 +1,54 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope;
+
+use Hyperf\Odin\Api\Providers\AbstractApi;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Exception\LLMException\Configuration\LLMInvalidApiKeyException;
+use Hyperf\Odin\Exception\LLMException\Configuration\LLMInvalidEndpointException;
+use Psr\Log\LoggerInterface;
+
+class DashScope extends AbstractApi
+{
+    /**
+     * @var Client[]
+     */
+    protected array $clients = [];
+
+    public function getClient(
+        DashScopeConfig $config,
+        ?ApiOptions $requestOptions = null,
+        ?LoggerInterface $logger = null
+    ): Client {
+        // 检查 API Key
+        if (empty($config->getApiKey()) && ! $config->shouldSkipApiKeyValidation()) {
+            throw new LLMInvalidApiKeyException('DashScope API密钥不能为空', null, 'DashScope');
+        }
+
+        if (empty($config->getBaseUrl())) {
+            throw new LLMInvalidEndpointException('基础URL不能为空', null, $config->getBaseUrl());
+        }
+
+        $requestOptions = $requestOptions ?? new ApiOptions();
+
+        $key = md5(json_encode($config->toArray()) . json_encode($requestOptions->toArray()));
+        if (($this->clients[$key] ?? null) instanceof Client) {
+            return $this->clients[$key];
+        }
+
+        $client = new Client($config, $requestOptions, $logger);
+        $this->clients[$key] = $client;
+
+        return $this->clients[$key];
+    }
+}
diff --git a/src/Api/Providers/DashScope/DashScopeConfig.php b/src/Api/Providers/DashScope/DashScopeConfig.php
new file mode 100644
index 0000000..cb43147
--- /dev/null
+++ b/src/Api/Providers/DashScope/DashScopeConfig.php
@@ -0,0 +1,64 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope;
+
+use Hyperf\Odin\Api\Providers\DashScope\Cache\DashScopeAutoCacheConfig;
+use Hyperf\Odin\Contract\Api\ConfigInterface;
+
+class DashScopeConfig implements ConfigInterface
+{
+    private DashScopeAutoCacheConfig $autoCacheConfig;
+
+    public function __construct(
+        private readonly string $apiKey,
+        private readonly string $baseUrl = 'https://dashscope.aliyuncs.com',
+        private readonly bool $skipApiKeyValidation = false,
+        ?DashScopeAutoCacheConfig $autoCacheConfig = null
+    ) {
+        $this->autoCacheConfig = $autoCacheConfig ?? new DashScopeAutoCacheConfig();
+    }
+
+    public function getApiKey(): string
+    {
+        return $this->apiKey;
+    }
+
+    public function getBaseUrl(): string
+    {
+        return $this->baseUrl;
+    }
+
+    public function shouldSkipApiKeyValidation(): bool
+    {
+        return $this->skipApiKeyValidation;
+    }
+
+    public function getAutoCacheConfig(): DashScopeAutoCacheConfig
+    {
+        return $this->autoCacheConfig;
+    }
+
+    public function isAutoCache(): bool
+    {
+        return $this->autoCacheConfig->isAutoEnabled();
+    }
+
+    public function toArray(): array
+    {
+        return [
+            'api_key' => $this->apiKey,
+            'base_url' => $this->baseUrl,
+            'skip_api_key_validation' => $this->skipApiKeyValidation,
+        ];
+    }
+}
diff --git a/src/Api/Providers/DashScope/ResponseHandler.php b/src/Api/Providers/DashScope/ResponseHandler.php
new file mode 100644
index 0000000..ca9dd8b
--- /dev/null
+++ b/src/Api/Providers/DashScope/ResponseHandler.php
@@ -0,0 +1,94 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\DashScope;
+
+use GuzzleHttp\Psr7\Response;
+use Psr\Http\Message\ResponseInterface;
+
+/**
+ * DashScope 响应处理辅助类.
+ *
+ * 提供将 DashScope 响应转换为标准格式的静态方法
+ */
+class ResponseHandler
+{
+    /**
+     * 转换DashScope响应数据为标准格式.
+     *
+     * @param ResponseInterface $response 原始HTTP响应
+     * @return ResponseInterface 转换后的响应
+     */
+    public static function convertResponse(ResponseInterface $response): ResponseInterface
+    {
+        $content = $response->getBody()->getContents();
+        $data = json_decode($content, true);
+
+        if (isset($data['usage'])) {
+            $data['usage'] = self::convertUsageFields($data['usage']);
+        }
+
+        // 重新编码为JSON
+        $newContent = json_encode($data);
+
+        // 创建新的响应对象
+        return new Response(
+            $response->getStatusCode(),
+            $response->getHeaders(),
+            $newContent
+        );
+    }
+
+    /**
+     * 转换DashScope的usage字段为标准格式.
+     *
+     * @param array $usage DashScope的usage数据
+     * @return array 转换后的usage数据
+     */
+    public static function convertUsageFields(array $usage): array
+    {
+        // 处理 prompt_tokens_details
+        if (isset($usage['prompt_tokens_details'])) {
+            $usage['prompt_tokens_details'] = self::convertPromptTokensDetails($usage['prompt_tokens_details']);
+        }
+
+        return $usage;
+    }
+
+    /**
+     * 转换 prompt_tokens_details 中的DashScope字段为标准字段.
+     *
+     * @param array $promptTokensDetails DashScope的prompt_tokens_details
+     * @return array 转换后的prompt_tokens_details
+     */
+    private static function convertPromptTokensDetails(array $promptTokensDetails): array
+    {
+        $converted = $promptTokensDetails;
+
+        // 1. 优先转换外层的 cache_creation_input_tokens -> cache_write_input_tokens
+        if (isset($promptTokensDetails['cache_creation_input_tokens'])) {
+            $converted['cache_write_input_tokens'] = $promptTokensDetails['cache_creation_input_tokens'];
+        }
+        // 2. 如果外层没有，再尝试从内层 cache_creation 获取
+        elseif (isset($promptTokensDetails['cache_creation']['ephemeral_5m_input_tokens'])) {
+            $converted['cache_write_input_tokens'] = $promptTokensDetails['cache_creation']['ephemeral_5m_input_tokens'];
+        }
+
+        // 3. 转换 cached_tokens（命中的缓存）
+        // DashScope中的cached_tokens直接对应标准的cached_tokens，已经是标准字段，不需要转换
+
+        // 4. 处理其他可能的DashScope字段到标准字段的映射
+        // cache_type, cache_creation等保留为原始格式，不影响标准字段的使用
+
+        return $converted;
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/CacheInfo.php b/src/Api/Providers/Gemini/Cache/CacheInfo.php
new file mode 100644
index 0000000..4a4ceb3
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/CacheInfo.php
@@ -0,0 +1,107 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache;
+
+/**
+ * Cache information object.
+ * Encapsulates cache details returned from cache strategy.
+ */
+class CacheInfo
+{
+    /**
+     * Cache name (e.g., cachedContents/xxx).
+     */
+    private string $cacheName;
+
+    /**
+     * Whether this cache was newly created in this request.
+     */
+    private bool $isNewlyCreated;
+
+    /**
+     * Tokens written to cache (0 if using existing cache).
+     */
+    private int $cacheWriteTokens;
+
+    /**
+     * Hashes of cached messages.
+     * Used to filter out cached messages when applying cache.
+     *
+     * @var array<string>
+     */
+    private array $cachedMessageHashes;
+
+    /**
+     * @param array<string> $cachedMessageHashes
+     */
+    public function __construct(
+        string $cacheName,
+        bool $isNewlyCreated,
+        int $cacheWriteTokens,
+        array $cachedMessageHashes = []
+    ) {
+        $this->cacheName = $cacheName;
+        $this->isNewlyCreated = $isNewlyCreated;
+        $this->cacheWriteTokens = $cacheWriteTokens;
+        $this->cachedMessageHashes = $cachedMessageHashes;
+    }
+
+    public function getCacheName(): string
+    {
+        return $this->cacheName;
+    }
+
+    public function isNewlyCreated(): bool
+    {
+        return $this->isNewlyCreated;
+    }
+
+    public function getCacheWriteTokens(): int
+    {
+        return $this->cacheWriteTokens;
+    }
+
+    /**
+     * @return array<string>
+     */
+    public function getCachedMessageHashes(): array
+    {
+        return $this->cachedMessageHashes;
+    }
+
+    /**
+     * Convert to array (for logging or serialization).
+     */
+    public function toArray(): array
+    {
+        return [
+            'cache_name' => $this->cacheName,
+            'is_newly_created' => $this->isNewlyCreated,
+            'cache_write_tokens' => $this->cacheWriteTokens,
+            'cached_message_hashes' => $this->cachedMessageHashes,
+        ];
+    }
+
+    /**
+     * Create from array.
+     */
+    public static function fromArray(array $data): self
+    {
+        return new self(
+            $data['cache_name'] ?? '',
+            $data['is_newly_created'] ?? false,
+            $data['cache_write_tokens'] ?? 0,
+            $data['cached_message_hashes'] ?? []
+        );
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/GeminiCacheClient.php b/src/Api/Providers/Gemini/Cache/GeminiCacheClient.php
new file mode 100644
index 0000000..8ab78dc
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/GeminiCacheClient.php
@@ -0,0 +1,223 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache;
+
+use Exception;
+use GuzzleHttp\Client;
+use GuzzleHttp\RequestOptions;
+use Hyperf\Odin\Api\Providers\Gemini\GeminiConfig;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Psr\Log\LoggerInterface;
+use RuntimeException;
+use Throwable;
+
+/**
+ * Gemini 缓存 API 客户端.
+ * 封装缓存相关的 API 调用.
+ */
+class GeminiCacheClient
+{
+    private Client $client;
+
+    private GeminiConfig $config;
+
+    private ?LoggerInterface $logger;
+
+    public function __construct(GeminiConfig $config, ?ApiOptions $apiOptions = null, ?LoggerInterface $logger = null)
+    {
+        $this->config = $config;
+        $this->logger = $logger;
+
+        // Build client options from ApiOptions
+        $clientOptions = [
+            'base_uri' => $config->getBaseUrl(),
+            'timeout' => $apiOptions?->getTotalTimeout() ?? 30.0,
+            'connect_timeout' => $apiOptions?->getConnectionTimeout() ?? 5.0,
+        ];
+
+        // Add proxy if configured
+        if ($apiOptions && $apiOptions->hasProxy()) {
+            $clientOptions['proxy'] = $apiOptions->getProxy();
+        }
+
+        $this->client = new Client($clientOptions);
+    }
+
+    /**
+     * 创建缓存.
+     *
+     * @param string $model 模型名称
+     * @param array $config 缓存配置，包含 systemInstruction, tools, contents, ttl
+     * @return array 缓存响应数据，包含 name 和 usageMetadata
+     * @throws Exception
+     */
+    public function createCache(string $model, array $config): array
+    {
+        $url = $this->getBaseUri() . '/cachedContents';
+
+        // Ensure model name has 'models/' prefix (required by Gemini Cache API)
+        if (! str_starts_with($model, 'models/')) {
+            $model = 'models/' . $model;
+        }
+
+        // Merge config fields directly into body according to Gemini API spec
+        $body = array_merge(
+            ['model' => $model],
+            $config
+        );
+
+        $options = [
+            RequestOptions::JSON => $body,
+            RequestOptions::HEADERS => $this->getHeaders(),
+        ];
+
+        try {
+            $this->logger?->debug('Creating Gemini cache', [
+                'model' => $model,
+                'url' => $url,
+                'request_body' => json_encode($body, JSON_UNESCAPED_UNICODE),
+            ]);
+
+            $response = $this->client->post($url, $options);
+            $responseData = json_decode($response->getBody()->getContents(), true);
+
+            if (! isset($responseData['name'])) {
+                throw new RuntimeException('Failed to create cache: missing name in response');
+            }
+
+            $cacheName = $responseData['name'];
+
+            // Extract token usage from response if available
+            // If not available in create response, fetch cache metadata
+            $cacheTokens = null;
+            if (isset($responseData['usageMetadata']['totalTokenCount'])) {
+                $cacheTokens = $responseData['usageMetadata']['totalTokenCount'];
+                $this->logger?->debug('Got cache tokens from create response', [
+                    'cache_tokens' => $cacheTokens,
+                ]);
+            } else {
+                // Fetch cache metadata to get usage information
+                try {
+                    $metadata = $this->getCache($cacheName);
+                    if (isset($metadata['usageMetadata']['totalTokenCount'])) {
+                        $cacheTokens = $metadata['usageMetadata']['totalTokenCount'];
+                        $responseData['usageMetadata'] = $metadata['usageMetadata'];
+                        $this->logger?->debug('Got cache tokens from metadata API', [
+                            'cache_tokens' => $cacheTokens,
+                        ]);
+                    }
+                } catch (Throwable $e) {
+                    $this->logger?->warning('Failed to fetch cache metadata', [
+                        'error' => $e->getMessage(),
+                    ]);
+                }
+            }
+
+            $this->logger?->info('Gemini cache API response', [
+                'cache_name' => $cacheName,
+                'model' => $model,
+                'cache_tokens' => $cacheTokens,
+                'token_source' => $cacheTokens !== null ? 'api' : 'none',
+            ]);
+
+            return $responseData;
+        } catch (Throwable $e) {
+            $this->logger?->error('Failed to create Gemini cache', [
+                'error' => $e->getMessage(),
+                'model' => $model,
+            ]);
+            throw $e;
+        }
+    }
+
+    /**
+     * 删除缓存.
+     *
+     * @param string $cacheName 缓存名称（如 cachedContents/xxx）
+     * @throws Exception
+     */
+    public function deleteCache(string $cacheName): void
+    {
+        $url = $this->getBaseUri() . '/' . $cacheName;
+
+        $options = [
+            RequestOptions::HEADERS => $this->getHeaders(),
+        ];
+
+        try {
+            $this->logger?->debug('Deleting Gemini cache', [
+                'cache_name' => $cacheName,
+                'url' => $url,
+            ]);
+
+            $this->client->delete($url, $options);
+
+            $this->logger?->info('Gemini cache deleted successfully', [
+                'cache_name' => $cacheName,
+            ]);
+        } catch (Throwable $e) {
+            $this->logger?->error('Failed to delete Gemini cache', [
+                'error' => $e->getMessage(),
+                'cache_name' => $cacheName,
+            ]);
+            throw $e;
+        }
+    }
+
+    /**
+     * 获取缓存信息.
+     *
+     * @param string $cacheName 缓存名称（如 cachedContents/xxx）
+     * @return array 缓存信息
+     * @throws Exception
+     */
+    public function getCache(string $cacheName): array
+    {
+        $url = $this->getBaseUri() . '/' . $cacheName;
+
+        $options = [
+            RequestOptions::HEADERS => $this->getHeaders(),
+        ];
+
+        try {
+            $response = $this->client->get($url, $options);
+            return json_decode($response->getBody()->getContents(), true);
+        } catch (Throwable $e) {
+            $this->logger?->error('Failed to get Gemini cache', [
+                'error' => $e->getMessage(),
+                'cache_name' => $cacheName,
+            ]);
+            throw $e;
+        }
+    }
+
+    /**
+     * 获取认证头信息.
+     */
+    private function getHeaders(): array
+    {
+        $headers = [];
+        if ($this->config->getApiKey()) {
+            $headers['x-goog-api-key'] = $this->config->getApiKey();
+        }
+        return $headers;
+    }
+
+    /**
+     * 获取基础 URI.
+     */
+    private function getBaseUri(): string
+    {
+        return rtrim($this->config->getBaseUrl(), '/');
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/GeminiCacheConfig.php b/src/Api/Providers/Gemini/Cache/GeminiCacheConfig.php
new file mode 100644
index 0000000..44e6f08
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/GeminiCacheConfig.php
@@ -0,0 +1,123 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache;
+
+/**
+ * Gemini cache configuration.
+ * Unified cache strategy configuration for conversation caching.
+ */
+class GeminiCacheConfig
+{
+    /**
+     * Enable cache (master switch).
+     */
+    private bool $enableCache;
+
+    /**
+     * Minimum tokens threshold for creating cache.
+     * For initial cache (system+tools), this is the minimum.
+     * Default: 32768 tokens.
+     */
+    private int $minCacheTokens;
+
+    /**
+     * Cache refresh threshold (incremental tokens from last cache).
+     * When conversation grows by this many tokens, cache will be updated.
+     * Default: 8000 tokens.
+     */
+    private int $refreshThreshold;
+
+    /**
+     * Cache TTL in seconds.
+     * Range: 60s - 86400s (24 hours).
+     * Default: 3600 seconds (1 hour).
+     */
+    private int $cacheTtl;
+
+    /**
+     * Estimation ratio for token count adjustment.
+     * This ratio is applied to all token estimations to get more accurate values.
+     * Value range: 0.0 - 1.0 (e.g., 0.33 means actual tokens are typically 33% of estimated).
+     *
+     * Based on real-world data: Gemini actual tokens are typically ~32% of estimated tokens.
+     * We use 0.33 as a slightly conservative value.
+     */
+    private float $estimationRatio;
+
+    public function __construct(
+        bool $enableCache = false,
+        int $minCacheTokens = 4096,
+        int $refreshThreshold = 8000,
+        int $cacheTtl = 600,
+        float $estimationRatio = 0.33
+    ) {
+        $this->enableCache = $enableCache;
+        $this->minCacheTokens = $minCacheTokens;
+        $this->refreshThreshold = $refreshThreshold;
+        $this->cacheTtl = max(60, min(86400, $cacheTtl)); // Clamp to 60s-86400s
+        $this->estimationRatio = max(0.0, min(1.0, $estimationRatio)); // Clamp to 0.0-1.0
+    }
+
+    public function isEnableCache(): bool
+    {
+        return $this->enableCache;
+    }
+
+    public function getMinCacheTokens(): int
+    {
+        return $this->minCacheTokens;
+    }
+
+    public function getRefreshThreshold(): int
+    {
+        return $this->refreshThreshold;
+    }
+
+    public function getCacheTtl(): int
+    {
+        return $this->cacheTtl;
+    }
+
+    public function getEstimationRatio(): float
+    {
+        return $this->estimationRatio;
+    }
+
+    /**
+     * Get minimum cache tokens by model name.
+     * Based on official documentation:
+     * - Gemini 2.5 Flash / 2.0 Flash / 3.0 Flash: 2048 tokens
+     * - Gemini 2.5 Pro / 2.0 Pro / 3.0 Pro: 4096 tokens.
+     */
+    public static function getMinCacheTokensByModel(string $model): int
+    {
+        $modelLower = strtolower($model);
+
+        return match (true) {
+            // Gemini 2.5 Flash
+            str_contains($modelLower, 'gemini-2.5-flash')
+            || str_contains($modelLower, 'gemini-2-flash')
+            || str_contains($modelLower, 'gemini-3-flash')
+            || str_contains($modelLower, 'gemini-3.0-flash') => 2048,
+
+            // Gemini 2.5 Pro / 2.0 Pro / 3.0 Pro
+            str_contains($modelLower, 'gemini-2.5-pro')
+            || str_contains($modelLower, 'gemini-2-pro')
+            || str_contains($modelLower, 'gemini-3-pro')
+            || str_contains($modelLower, 'gemini-3.0-pro') => 4096,
+
+            // Default: use the highest threshold to be safe
+            default => 4096,
+        };
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/GeminiCacheManager.php b/src/Api/Providers/Gemini/Cache/GeminiCacheManager.php
new file mode 100644
index 0000000..d616cc2
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/GeminiCacheManager.php
@@ -0,0 +1,80 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache;
+
+use Hyperf\Odin\Api\Providers\Gemini\Cache\Strategy\CacheStrategyInterface;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\Strategy\ConversationCacheStrategy;
+use Hyperf\Odin\Api\Providers\Gemini\GeminiConfig;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Psr\Log\LoggerInterface;
+
+/**
+ * Gemini cache manager.
+ * Manages conversation caching using a unified progressive cache strategy.
+ */
+class GeminiCacheManager
+{
+    private GeminiCacheConfig $config;
+
+    private ?ApiOptions $apiOptions;
+
+    private ?GeminiConfig $geminiConfig;
+
+    private ?LoggerInterface $logger;
+
+    public function __construct(
+        GeminiCacheConfig $config,
+        ?ApiOptions $apiOptions = null,
+        ?GeminiConfig $geminiConfig = null,
+        ?LoggerInterface $logger = null,
+    ) {
+        $this->config = $config;
+        $this->apiOptions = $apiOptions;
+        $this->geminiConfig = $geminiConfig;
+        $this->logger = $logger;
+    }
+
+    /**
+     * Check or create cache (called before request).
+     *
+     * @param ChatCompletionRequest $request Request object
+     * @return null|CacheInfo Cache information object or null if no cache conditions are met
+     */
+    public function checkCache(ChatCompletionRequest $request): ?CacheInfo
+    {
+        // Use conversation cache strategy
+        $strategy = $this->createStrategy();
+        $cacheInfo = $strategy->apply($this->config, $request);
+
+        if ($cacheInfo) {
+            $this->logger?->info('Cache applied', [
+                'cache_name' => $cacheInfo->getCacheName(),
+                'is_newly_created' => $cacheInfo->isNewlyCreated(),
+                'cache_write_tokens' => $cacheInfo->getCacheWriteTokens(),
+            ]);
+        }
+
+        return $cacheInfo;
+    }
+
+    /**
+     * Create conversation cache strategy instance with proper dependencies.
+     */
+    private function createStrategy(): CacheStrategyInterface
+    {
+        // 目前就先这样吧，就一个
+        $cacheClient = new GeminiCacheClient($this->geminiConfig, $this->apiOptions, $this->logger);
+        return new ConversationCacheStrategy($cacheClient, $this->logger);
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/Strategy/CachePointMessage.php b/src/Api/Providers/Gemini/Cache/Strategy/CachePointMessage.php
new file mode 100644
index 0000000..b528304
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/Strategy/CachePointMessage.php
@@ -0,0 +1,55 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache\Strategy;
+
+use Hyperf\Odin\Contract\Message\MessageInterface;
+
+class CachePointMessage
+{
+    private mixed $originMessage;
+
+    private string $hash;
+
+    private int $tokens;
+
+    public function __construct(mixed $originMessage, int $tokens)
+    {
+        $this->originMessage = $originMessage;
+        $this->tokens = $tokens;
+        $this->getHash();
+    }
+
+    public function getOriginMessage(): mixed
+    {
+        return $this->originMessage;
+    }
+
+    public function getHash(): string
+    {
+        if (! empty($this->hash)) {
+            return $this->hash;
+        }
+
+        if ($this->originMessage instanceof MessageInterface) {
+            $this->hash = $this->originMessage->getHash();
+        } else {
+            $this->hash = md5(serialize($this->originMessage));
+        }
+        return $this->hash;
+    }
+
+    public function getTokens(): int
+    {
+        return $this->tokens;
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/Strategy/CacheStrategyInterface.php b/src/Api/Providers/Gemini/Cache/Strategy/CacheStrategyInterface.php
new file mode 100644
index 0000000..00bd7d6
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/Strategy/CacheStrategyInterface.php
@@ -0,0 +1,30 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache\Strategy;
+
+use Hyperf\Odin\Api\Providers\Gemini\Cache\CacheInfo;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\GeminiCacheConfig;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+
+interface CacheStrategyInterface
+{
+    /**
+     * Apply cache strategy to the request (called before request).
+     * Check if cache is available, create new cache if needed, and return cache info.
+     *
+     * @param GeminiCacheConfig $config Cache configuration
+     * @param ChatCompletionRequest $request Request object
+     * @return null|CacheInfo Cache information object or null if no cache
+     */
+    public function apply(GeminiCacheConfig $config, ChatCompletionRequest $request): ?CacheInfo;
+}
diff --git a/src/Api/Providers/Gemini/Cache/Strategy/ConversationCacheStrategy.php b/src/Api/Providers/Gemini/Cache/Strategy/ConversationCacheStrategy.php
new file mode 100644
index 0000000..46b52f2
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/Strategy/ConversationCacheStrategy.php
@@ -0,0 +1,481 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache\Strategy;
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\CacheInfo;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\GeminiCacheClient;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\GeminiCacheConfig;
+use Hyperf\Odin\Api\Providers\Gemini\RequestHandler;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Contract\Message\MessageInterface;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Utils\ToolUtil;
+use Psr\Log\LoggerInterface;
+use Psr\SimpleCache\CacheInterface;
+use Throwable;
+
+/**
+ * Conversation cache strategy - unified caching for conversations.
+ * Implements progressive caching:
+ * - Initial: cache system+tools only
+ * - Growth: cache system+tools+historical_messages (excluding last message)
+ * - Only works for continuous conversations.
+ */
+class ConversationCacheStrategy implements CacheStrategyInterface
+{
+    private CacheInterface $cache;
+
+    private GeminiCacheClient $cacheClient;
+
+    private ?LoggerInterface $logger;
+
+    public function __construct(
+        GeminiCacheClient $cacheClient,
+        ?LoggerInterface $logger = null,
+    ) {
+        $this->cache = ApplicationContext::getContainer()->get(CacheInterface::class);
+        $this->cacheClient = $cacheClient;
+        $this->logger = $logger;
+    }
+
+    /**
+     * Apply cache strategy to request.
+     *
+     * Logic:
+     * 1. Check if cache is enabled
+     * 2. Get cache key
+     * 3. Try to get from local cache
+     * 4. If no cache, create initial cache (system+tools)
+     * 5. If has cache, check if conversation is continuous
+     * 6. If continuous, check if should update cache
+     * 7. Return cache info or null
+     */
+    public function apply(GeminiCacheConfig $config, ChatCompletionRequest $request): ?CacheInfo
+    {
+        if (! $config->isEnableCache()) {
+            return null;
+        }
+        $messages = $request->getMessages();
+        if (empty($messages)) {
+            return null;
+        }
+        $messageCacheManager = $this->createMessageCacheManager($request);
+
+        // 至少需要 4 个消息点（tools + system + user），才考虑缓存，此时会缓存前 3 个消息，最后一个消息在本次用于请求
+        if (count($messageCacheManager->getCachePointMessages()) < 4) {
+            $this->logger?->debug('Not enough message points for caching');
+            return null;
+        }
+
+        // Get cache key
+        $cacheKey = $messageCacheManager->getCacheKey($request->getModel());
+
+        // Try to get from local cache
+        $cachedData = $this->getLocalCachedData($cacheKey);
+
+        // No existing cache, create initial cache
+        if ($cachedData === null) {
+            return $this->createInitialCache($config, $request, $cacheKey);
+        }
+
+        // Check if you should update cache
+        if ($this->shouldUpdateCache($config, $cachedData, $request)) {
+            return $this->updateCache($config, $cachedData, $request, $cacheKey);
+        }
+
+        // Use existing cache
+        $this->logger?->info('Using existing cache', [
+            'cache_name' => $cachedData->getCacheName(),
+        ]);
+
+        return new CacheInfo(
+            cacheName: $cachedData->getCacheName(),
+            isNewlyCreated: false,
+            cacheWriteTokens: 0,
+            cachedMessageHashes: $cachedData->getCachedMessageHashes()
+        );
+    }
+
+    private function createMessageCacheManager(ChatCompletionRequest $request): GeminiMessageCacheManager
+    {
+        $index = 2;
+        // tools 也当做是一个消息
+        $toolsArray = ToolUtil::filter($request->getTools());
+        $cachePointMessages[0] = new CachePointMessage($toolsArray, $request->getToolsTokenEstimate() ?? 0);
+        foreach ($request->getMessages() as $message) {
+            if ($message instanceof SystemMessage) {
+                $cachePointMessages[1] = new CachePointMessage($message, $message->getTokenEstimate() ?? 0);
+            } else {
+                $cachePointMessages[$index] = new CachePointMessage($message, $message->getTokenEstimate() ?? 0);
+                ++$index;
+            }
+        }
+
+        return new GeminiMessageCacheManager($cachePointMessages);
+    }
+
+    /**
+     * Create initial cache (system+tools or system+tools+first_messages).
+     * Initial cache is created when:
+     * - No existing cache
+     * - Estimated cache content meets minimum token threshold.
+     */
+    private function createInitialCache(
+        GeminiCacheConfig $config,
+        ChatCompletionRequest $request,
+        string $cacheKey
+    ): ?CacheInfo {
+        $estimatedCachedTokens = $this->calculateEstimatedCachedTokens($config, $request);
+
+        // Check minimum threshold
+        $minTokens = max(
+            $config->getMinCacheTokens(),
+            GeminiCacheConfig::getMinCacheTokensByModel($request->getModel())
+        );
+
+        if ($estimatedCachedTokens < $minTokens) {
+            $this->logger?->debug('Cache not created: below minimum tokens', [
+                'estimated_cached_tokens' => $estimatedCachedTokens,
+                'min_tokens' => $minTokens,
+            ]);
+            return null;
+        }
+
+        try {
+            $this->logger?->info('Creating initial cache', [
+                'model' => $request->getModel(),
+                'estimated_cached_tokens' => $estimatedCachedTokens,
+            ]);
+
+            return $this->performCacheCreation($config, $request, $cacheKey, $estimatedCachedTokens, 'Initial');
+        } catch (Throwable $e) {
+            $this->logger?->warning('Failed to create initial cache', [
+                'error' => $e->getMessage(),
+            ]);
+            return null;
+        }
+    }
+
+    /**
+     * Check if cache should be updated.
+     * Update when: incremental tokens reach refresh threshold.
+     */
+    private function shouldUpdateCache(
+        GeminiCacheConfig $config,
+        LocalCachedData $cachedData,
+        ChatCompletionRequest $request
+    ): bool {
+        $currentEstimatedCachedTokens = $this->calculateEstimatedCachedTokens($config, $request);
+
+        // Get last cached tokens
+        $lastActualTokens = $cachedData->getActualCachedTokens();
+        $lastEstimatedTokens = $cachedData->getEstimatedCachedTokens();
+
+        if ($lastEstimatedTokens === 0 && $lastActualTokens === null) {
+            $this->logger?->info('Cache should update: no last cached tokens record');
+            return true;
+        }
+
+        // Use estimated vs estimated for comparison (most fair)
+        $lastTokens = $lastEstimatedTokens ?: ($lastActualTokens ?? 0);
+        $incrementalTokens = $currentEstimatedCachedTokens - $lastTokens;
+
+        if ($incrementalTokens <= 0) {
+            $this->logger?->debug('Cache should NOT update: no token growth', [
+                'current_tokens' => $currentEstimatedCachedTokens,
+                'last_tokens' => $lastTokens,
+            ]);
+            return false;
+        }
+
+        $threshold = $config->getRefreshThreshold();
+        $shouldUpdate = $incrementalTokens >= $threshold;
+
+        if ($shouldUpdate) {
+            $this->logger?->info('Cache should update: threshold reached', [
+                'cache_name' => $cachedData->getCacheName(),
+                'current_estimated_tokens' => $currentEstimatedCachedTokens,
+                'last_tokens' => $lastTokens,
+                'incremental_tokens' => $incrementalTokens,
+                'threshold' => $threshold,
+            ]);
+        } else {
+            $this->logger?->debug('Cache should NOT update: below threshold', [
+                'current_tokens' => $currentEstimatedCachedTokens,
+                'last_tokens' => $lastTokens,
+                'incremental_tokens' => $incrementalTokens,
+                'threshold' => $threshold,
+            ]);
+        }
+
+        return $shouldUpdate;
+    }
+
+    /**
+     * Update cache (create new, delete old).
+     */
+    private function updateCache(
+        GeminiCacheConfig $config,
+        LocalCachedData $oldCachedData,
+        ChatCompletionRequest $request,
+        string $cacheKey
+    ): CacheInfo {
+        try {
+            $this->logger?->info('Updating cache', [
+                'model' => $request->getModel(),
+                'old_cache_name' => $oldCachedData->getCacheName(),
+            ]);
+
+            $estimatedCachedTokens = $this->calculateEstimatedCachedTokens($config, $request);
+            $cacheInfo = $this->performCacheCreation($config, $request, $cacheKey, $estimatedCachedTokens, 'Cache updated');
+
+            // Delete old cache (async, don't block)
+            $oldCacheName = $oldCachedData->getCacheName();
+            if ($oldCacheName && $oldCacheName !== $cacheInfo->getCacheName()) {
+                $this->deleteOldCache($oldCacheName);
+            }
+
+            return $cacheInfo;
+        } catch (Throwable $e) {
+            $this->logger?->warning('Failed to update cache, using old cache', [
+                'error' => $e->getMessage(),
+            ]);
+
+            // Update failed, use old cache with 0 write tokens
+            return new CacheInfo(
+                cacheName: $oldCachedData->getCacheName(),
+                isNewlyCreated: false,
+                cacheWriteTokens: 0,
+                cachedMessageHashes: $oldCachedData->getCachedMessageHashes()
+            );
+        }
+    }
+
+    /**
+     * Build cache config for API.
+     * Cache content: systemInstruction + tools + historical messages (exclude last).
+     */
+    private function buildCacheConfig(GeminiCacheConfig $config, ChatCompletionRequest $request): array
+    {
+        $cacheConfig = [];
+
+        // 1. Add systemInstruction
+        $systemMessage = $this->getSystemMessage($request);
+        if ($systemMessage) {
+            $systemText = $systemMessage->getContent();
+            if (! empty($systemText)) {
+                $cacheConfig['systemInstruction'] = [
+                    'parts' => [
+                        ['text' => $systemText],
+                    ],
+                ];
+            }
+        }
+
+        // 2. Add tools
+        $tools = $request->getTools();
+        if (! empty($tools)) {
+            $convertedTools = RequestHandler::convertTools($tools);
+            if (! empty($convertedTools)) {
+                $cacheConfig['tools'] = $convertedTools;
+            }
+        }
+
+        // 3. Add historical messages (exclude system and last message)
+        $messages = $request->getMessages();
+        $historicalMessages = array_slice($messages, 0, -1); // Exclude last message
+
+        if (! empty($historicalMessages)) {
+            $result = RequestHandler::convertMessages($historicalMessages);
+            if (! empty($result['contents'])) {
+                $cacheConfig['contents'] = $result['contents'];
+            }
+        }
+
+        // 4. Set TTL
+        $ttl = $config->getCacheTtl();
+        $cacheConfig['ttl'] = $ttl . 's';
+
+        return $cacheConfig;
+    }
+
+    /**
+     * @param array<MessageInterface> $messages
+     *                                          Calculate cached message hashes.
+     *                                          These are messages that are included in the cache (exclude system and last message).
+     */
+    private function calculateCachedMessageHashes(array $messages): array
+    {
+        $hashes = [];
+
+        // Exclude last message (current user message, not cached)
+        $messagesToCache = array_slice($messages, 0, -1);
+
+        foreach ($messagesToCache as $message) {
+            $hash = $message->getHash();
+            if ($hash) {
+                $hashes[] = $hash;
+            }
+        }
+
+        return $hashes;
+    }
+
+    /**
+     * Get system message from request.
+     */
+    private function getSystemMessage(ChatCompletionRequest $request): ?SystemMessage
+    {
+        foreach ($request->getMessages() as $message) {
+            if ($message instanceof SystemMessage) {
+                return $message;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Get local cached data from cache storage.
+     * Returns LocalCachedData object if found, null otherwise.
+     */
+    private function getLocalCachedData(string $cacheKey): ?LocalCachedData
+    {
+        $cachedDataArray = $this->cache->get($cacheKey);
+
+        if (! is_array($cachedDataArray)) {
+            return null;
+        }
+
+        return LocalCachedData::fromArray($cachedDataArray);
+    }
+
+    /**
+     * Calculate estimated cached tokens.
+     * Formula: (totalTokens - lastMessageTokens) * estimationRatio.
+     */
+    private function calculateEstimatedCachedTokens(
+        GeminiCacheConfig $config,
+        ChatCompletionRequest $request
+    ): int {
+        $messages = $request->getMessages();
+        $totalEstimate = $request->getTotalTokenEstimate() ?? 0;
+        $lastMessage = end($messages);
+        $lastMessageTokens = $lastMessage->getTokenEstimate() ?? 0;
+        $rawEstimate = $totalEstimate - $lastMessageTokens;
+
+        return (int) round($rawEstimate * $config->getEstimationRatio());
+    }
+
+    /**
+     * Perform cache creation (shared logic for initial and update).
+     * Returns CacheInfo with cache details.
+     */
+    private function performCacheCreation(
+        GeminiCacheConfig $config,
+        ChatCompletionRequest $request,
+        string $cacheKey,
+        int $estimatedCachedTokens,
+        string $logPrefix
+    ): CacheInfo {
+        $cacheConfig = $this->buildCacheConfig($config, $request);
+        $cacheResponse = $this->cacheClient->createCache($request->getModel(), $cacheConfig);
+        $cacheName = $cacheResponse['name'] ?? '';
+
+        // Get actual tokens from API response
+        $actualCacheTokens = $cacheResponse['usageMetadata']['totalTokenCount'] ?? null;
+        $finalTokens = $actualCacheTokens ?? $estimatedCachedTokens;
+
+        // Calculate cached message hashes
+        $messages = $request->getMessages();
+        $cachedMessageHashes = $this->calculateCachedMessageHashes($messages);
+
+        // Create LocalCachedData object
+        $localCachedData = new LocalCachedData(
+            cacheName: $cacheName,
+            model: $request->getModel(),
+            actualCachedTokens: $actualCacheTokens,
+            estimatedCachedTokens: $estimatedCachedTokens,
+            cachedMessageHashes: $cachedMessageHashes,
+            createdAt: time()
+        );
+
+        // Save to local cache
+        $this->saveCacheToLocalStorage($cacheKey, $localCachedData, $config->getCacheTtl());
+
+        // Log success
+        $this->logCacheOperationSuccess(
+            $logPrefix,
+            $cacheName,
+            $estimatedCachedTokens,
+            $actualCacheTokens,
+            $finalTokens,
+            count($cachedMessageHashes)
+        );
+
+        return new CacheInfo(
+            cacheName: $cacheName,
+            isNewlyCreated: true,
+            cacheWriteTokens: $finalTokens,
+            cachedMessageHashes: $cachedMessageHashes
+        );
+    }
+
+    /**
+     * Save cache data to local storage.
+     */
+    private function saveCacheToLocalStorage(
+        string $cacheKey,
+        LocalCachedData $localCachedData,
+        int $ttl
+    ): void {
+        $this->cache->set($cacheKey, $localCachedData->toArray(), $ttl);
+    }
+
+    /**
+     * Log cache operation success.
+     */
+    private function logCacheOperationSuccess(
+        string $prefix,
+        string $cacheName,
+        int $estimatedTokens,
+        ?int $actualTokens,
+        int $finalTokens,
+        int $cachedMessageCount
+    ): void {
+        $this->logger?->info($prefix . ' successfully', [
+            'cache_name' => $cacheName,
+            'estimated_tokens' => $estimatedTokens,
+            'actual_tokens' => $actualTokens,
+            'final_tokens' => $finalTokens,
+            'cached_message_count' => $cachedMessageCount,
+            'source' => $actualTokens !== null ? 'api' : 'estimated',
+        ]);
+    }
+
+    /**
+     * Delete old cache (async operation, don't block on failure).
+     */
+    private function deleteOldCache(string $oldCacheName): void
+    {
+        try {
+            $this->cacheClient->deleteCache($oldCacheName);
+            $this->logger?->debug('Deleted old cache', ['cache_name' => $oldCacheName]);
+        } catch (Throwable $e) {
+            $this->logger?->warning('Failed to delete old cache', [
+                'cache_name' => $oldCacheName,
+                'error' => $e->getMessage(),
+            ]);
+        }
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/Strategy/GeminiMessageCacheManager.php b/src/Api/Providers/Gemini/Cache/Strategy/GeminiMessageCacheManager.php
new file mode 100644
index 0000000..73993c9
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/Strategy/GeminiMessageCacheManager.php
@@ -0,0 +1,78 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache\Strategy;
+
+/**
+ * Message cache manager for Gemini caching.
+ * Manages cache point messages (tools, system, user messages) and their hashes.
+ * Used by both GlobalCacheStrategy and UserCacheStrategy for:
+ * - Calculating prefix hash (tools + system) for cache key
+ * - Checking conversation continuity
+ * - Token calculations.
+ */
+class GeminiMessageCacheManager
+{
+    /**
+     * 已经是排序好的数据.
+     * 索引说明：
+     * - 0: tools
+     * - 1: system message
+     * - 2+: user/assistant/tool messages.
+     *
+     * @var array<int, CachePointMessage>
+     */
+    private array $cachePointMessages;
+
+    public function __construct(array $cachePointMessages)
+    {
+        ksort($cachePointMessages);
+        $this->cachePointMessages = $cachePointMessages;
+    }
+
+    public function getCacheKey(string $model): string
+    {
+        return 'gemini_cache:' . md5($model . $this->getToolsHash() . $this->getSystemMessageHash() . $this->getFirstUserMessageHash());
+    }
+
+    public function getToolsHash(): string
+    {
+        if (! isset($this->cachePointMessages[0])) {
+            return '';
+        }
+        return $this->cachePointMessages[0]->getHash() ?? '';
+    }
+
+    public function getSystemMessageHash(): string
+    {
+        if (! isset($this->cachePointMessages[1])) {
+            return '';
+        }
+        return $this->cachePointMessages[1]->getHash() ?? '';
+    }
+
+    /**
+     * 获取第一个 user message 的 hash.
+     */
+    public function getFirstUserMessageHash(): string
+    {
+        if (! isset($this->cachePointMessages[2])) {
+            return '';
+        }
+        return $this->cachePointMessages[2]->getHash() ?? '';
+    }
+
+    public function getCachePointMessages(): array
+    {
+        return $this->cachePointMessages;
+    }
+}
diff --git a/src/Api/Providers/Gemini/Cache/Strategy/LocalCachedData.php b/src/Api/Providers/Gemini/Cache/Strategy/LocalCachedData.php
new file mode 100644
index 0000000..b9ac01b
--- /dev/null
+++ b/src/Api/Providers/Gemini/Cache/Strategy/LocalCachedData.php
@@ -0,0 +1,104 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini\Cache\Strategy;
+
+/**
+ * Local cached data object.
+ * Represents cache data stored in local cache (Redis/Memory).
+ */
+class LocalCachedData
+{
+    /**
+     * @param array<string> $cachedMessageHashes
+     */
+    public function __construct(
+        private string $cacheName,
+        private string $model,
+        private ?int $actualCachedTokens,
+        private int $estimatedCachedTokens,
+        private array $cachedMessageHashes,
+        private int $createdAt
+    ) {}
+
+    public function getCacheName(): string
+    {
+        return $this->cacheName;
+    }
+
+    public function getModel(): string
+    {
+        return $this->model;
+    }
+
+    public function getActualCachedTokens(): ?int
+    {
+        return $this->actualCachedTokens;
+    }
+
+    public function getEstimatedCachedTokens(): int
+    {
+        return $this->estimatedCachedTokens;
+    }
+
+    /**
+     * @return array<string>
+     */
+    public function getCachedMessageHashes(): array
+    {
+        return $this->cachedMessageHashes;
+    }
+
+    public function getCreatedAt(): int
+    {
+        return $this->createdAt;
+    }
+
+    /**
+     * Convert to array for storage.
+     */
+    public function toArray(): array
+    {
+        return [
+            'cache_name' => $this->cacheName,
+            'model' => $this->model,
+            'actual_cached_tokens' => $this->actualCachedTokens,
+            'estimated_cached_tokens' => $this->estimatedCachedTokens,
+            'cached_message_hashes' => $this->cachedMessageHashes,
+            'created_at' => $this->createdAt,
+        ];
+    }
+
+    /**
+     * Create from array retrieved from cache.
+     */
+    public static function fromArray(array $data): self
+    {
+        return new self(
+            cacheName: $data['cache_name'] ?? '',
+            model: $data['model'] ?? '',
+            actualCachedTokens: $data['actual_cached_tokens'] ?? null,
+            estimatedCachedTokens: $data['estimated_cached_tokens'] ?? 0,
+            cachedMessageHashes: $data['cached_message_hashes'] ?? [],
+            createdAt: $data['created_at'] ?? time()
+        );
+    }
+
+    /**
+     * Get the last cached tokens (prefer estimated, fallback to actual).
+     * Used for comparison in shouldUpdateCache.
+     */
+    public function getLastCachedTokens(): int
+    {
+        return $this->estimatedCachedTokens ?? $this->actualCachedTokens ?? 0;
+    }
+}
diff --git a/src/Api/Providers/Gemini/Client.php b/src/Api/Providers/Gemini/Client.php
new file mode 100644
index 0000000..8009e5d
--- /dev/null
+++ b/src/Api/Providers/Gemini/Client.php
@@ -0,0 +1,375 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini;
+
+use GuzzleHttp\RequestOptions;
+use Hyperf\Engine\Coroutine;
+use Hyperf\Odin\Api\Providers\AbstractClient;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\CacheInfo;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\GeminiCacheManager;
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Api\Response\ChatCompletionResponse;
+use Hyperf\Odin\Api\Response\ChatCompletionStreamResponse;
+use Hyperf\Odin\Api\Transport\OdinSimpleCurl;
+use Hyperf\Odin\Event\AfterChatCompletionsEvent;
+use Hyperf\Odin\Event\AfterChatCompletionsStreamEvent;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Utils\EventUtil;
+use Psr\Log\LoggerInterface;
+use Throwable;
+
+class Client extends AbstractClient
+{
+    public function __construct(GeminiConfig $config, ?ApiOptions $requestOptions = null, ?LoggerInterface $logger = null)
+    {
+        if (! $requestOptions) {
+            $requestOptions = new ApiOptions();
+        }
+        parent::__construct($config, $requestOptions, $logger);
+    }
+
+    /**
+     * Chat completions using Gemini native API.
+     */
+    public function chatCompletions(ChatCompletionRequest $chatRequest): ChatCompletionResponse
+    {
+        $chatRequest->validate();
+        $startTime = microtime(true);
+
+        try {
+            $model = $chatRequest->getModel();
+
+            // Prepare request with cache handling
+            ['geminiRequest' => $geminiRequest, 'cacheWriteTokens' => $cacheWriteTokens] = $this->prepareRequestWithCache($chatRequest, $model);
+
+            // Build URL for Gemini native API
+            $url = $this->buildGeminiUrl($model, false);
+
+            // Prepare request options
+            $options = [
+                RequestOptions::JSON => $geminiRequest,
+                RequestOptions::HEADERS => $this->getHeaders(),
+            ];
+
+            $requestId = $this->addRequestIdToOptions($options);
+
+            $this->logRequest('GeminiChatRequest', $url, $options, $requestId);
+
+            // Send request
+            $response = $this->client->post($url, $options);
+            $duration = $this->calculateDuration($startTime);
+
+            // Parse Gemini response
+            $geminiResponse = json_decode($response->getBody()->getContents(), true);
+
+            // Convert to OpenAI format with cache write tokens
+            $standardResponse = ResponseHandler::convertResponse($geminiResponse, $model, $cacheWriteTokens);
+            $chatResponse = new ChatCompletionResponse($standardResponse, $this->logger);
+
+            // Cache thought signatures from tool calls
+            $this->cacheThoughtSignatures($chatResponse);
+
+            $this->logResponse('GeminiChatResponse', $requestId, $duration, [
+                'content' => $chatResponse->getFirstChoice()?->getMessage()?->toArray(),
+                'usage' => $chatResponse->getUsage()?->toArray(),
+                'response_headers' => $response->getHeaders(),
+                'original_response_usage' => $geminiResponse['usageMetadata'] ?? [],
+            ]);
+
+            // Dispatch event (cache has already been created synchronously if needed)
+            EventUtil::dispatch(new AfterChatCompletionsEvent($chatRequest, $chatResponse, $duration));
+
+            return $chatResponse;
+        } catch (Throwable $e) {
+            throw $this->convertException($e, $this->createExceptionContext($url ?? '', $options ?? [], 'completions'));
+        }
+    }
+
+    /**
+     * Chat completions streaming using Gemini native API.
+     */
+    public function chatCompletionsStream(ChatCompletionRequest $chatRequest): ChatCompletionStreamResponse
+    {
+        $chatRequest->validate();
+        $chatRequest->setStream(true);
+        $startTime = microtime(true);
+
+        try {
+            $model = $chatRequest->getModel();
+
+            // Prepare request with cache handling
+            ['geminiRequest' => $geminiRequest, 'cacheWriteTokens' => $cacheWriteTokens] = $this->prepareRequestWithCache($chatRequest, $model);
+
+            // Build URL for Gemini streaming API
+            $url = $this->buildGeminiUrl($model, true);
+
+            // Prepare request options
+            $options = [
+                RequestOptions::JSON => $geminiRequest,
+                RequestOptions::STREAM => true,
+                RequestOptions::TIMEOUT => $this->requestOptions->getStreamFirstChunkTimeout(),
+            ];
+
+            $requestId = $this->addRequestIdToOptions($options);
+
+            $this->logRequest('GeminiChatStreamRequest', $url, $options, $requestId);
+
+            // Send streaming request
+            if (Coroutine::id()) {
+                foreach ($this->getHeaders() as $key => $value) {
+                    $options['headers'][$key] = $value;
+                }
+                $options['connect_timeout'] = $this->requestOptions->getConnectionTimeout();
+                $options['stream_chunk'] = $this->requestOptions->getStreamChunkTimeout();
+                $options['header_timeout'] = $this->requestOptions->getStreamFirstChunkTimeout();
+                if ($proxy = $this->requestOptions->getProxy()) {
+                    $options['proxy'] = $proxy;
+                }
+                $response = OdinSimpleCurl::send($url, $options);
+            } else {
+                $response = $this->client->post($url, $options);
+            }
+
+            $firstResponseDuration = $this->calculateDuration($startTime);
+
+            // Create stream converter with cache write tokens
+            $streamConverter = new StreamConverter($response, $this->logger, $model, $cacheWriteTokens);
+
+            $chatCompletionStreamResponse = new ChatCompletionStreamResponse(
+                logger: $this->logger,
+                streamIterator: $streamConverter
+            );
+
+            // Dispatch event (cache has already been created synchronously if needed)
+            $chatCompletionStreamResponse->setAfterChatCompletionsStreamEvent(
+                new AfterChatCompletionsStreamEvent($chatRequest, $firstResponseDuration)
+            );
+
+            $this->logResponse('GeminiChatStreamResponse', $requestId, $firstResponseDuration, [
+                'first_response_ms' => $firstResponseDuration,
+                'response_headers' => $response->getHeaders(),
+            ]);
+
+            return $chatCompletionStreamResponse;
+        } catch (Throwable $e) {
+            throw $this->convertException($e, $this->createExceptionContext($url ?? '', $options ?? [], 'stream'));
+        }
+    }
+
+    /**
+     * Build chat completions API URL (for compatibility).
+     */
+    protected function buildChatCompletionsUrl(): string
+    {
+        return $this->getBaseUri() . '/chat/completions';
+    }
+
+    /**
+     * Build embeddings API URL.
+     */
+    protected function buildEmbeddingsUrl(): string
+    {
+        return $this->getBaseUri() . '/embeddings';
+    }
+
+    /**
+     * Build text completions API URL.
+     */
+    protected function buildCompletionsUrl(): string
+    {
+        return $this->getBaseUri() . '/completions';
+    }
+
+    /**
+     * Get authentication headers for Gemini API.
+     */
+    protected function getAuthHeaders(): array
+    {
+        $headers = [];
+        /** @var GeminiConfig $config */
+        $config = $this->config;
+
+        // Gemini uses x-goog-api-key header instead of Authorization
+        if ($config->getApiKey()) {
+            $headers['x-goog-api-key'] = $config->getApiKey();
+        }
+
+        return $headers;
+    }
+
+    /**
+     * Check cache availability and create if needed.
+     * Returns cache info without modifying the request.
+     *
+     * @param ChatCompletionRequest $chatRequest Original request
+     * @return null|CacheInfo Cache information if cache is used/created, null otherwise
+     */
+    protected function checkCache(ChatCompletionRequest $chatRequest): ?CacheInfo
+    {
+        /** @var GeminiConfig $config */
+        $config = $this->config;
+
+        // Check if auto cache is enabled
+        if (! $config->isAutoCache()) {
+            return null;
+        }
+
+        $cacheConfig = $config->getCacheConfig();
+        if (! $cacheConfig) {
+            return null;
+        }
+
+        try {
+            /** @var GeminiConfig $geminiConfig */
+            $geminiConfig = $this->config;
+            $cacheManager = new GeminiCacheManager(
+                $cacheConfig,
+                $this->getRequestOptions(),
+                $geminiConfig,
+                $this->logger,
+            );
+            $cacheInfo = $cacheManager->checkCache($chatRequest);
+            if ($cacheInfo) {
+                $this->logger?->info('Gemini cache available', [
+                    'cache_name' => $cacheInfo->getCacheName(),
+                    'is_newly_created' => $cacheInfo->isNewlyCreated(),
+                    'cache_write_tokens' => $cacheInfo->getCacheWriteTokens(),
+                    'cached_message_count' => count($cacheInfo->getCachedMessageHashes()),
+                ]);
+                return $cacheInfo;
+            }
+        } catch (Throwable $e) {
+            // Log error but don't fail the request
+            $this->logger?->warning('Failed to check or create Gemini cache', [
+                'error' => $e->getMessage(),
+            ]);
+        }
+
+        return null;
+    }
+
+    /**
+     * Prepare ChatCompletionRequest for conversion by filtering cached messages.
+     * Returns a new request with only uncached messages and without cached tools/system if needed.
+     *
+     * @param ChatCompletionRequest $chatRequest Original request
+     * @param null|CacheInfo $cacheInfo Cache information
+     */
+    protected function prepareRequestForCache(ChatCompletionRequest $chatRequest, ?CacheInfo $cacheInfo): void
+    {
+        // If no cache, return original request
+        if (! $cacheInfo) {
+            return;
+        }
+
+        // Remove system message and filter cached messages
+        $messages = $chatRequest->getMessages();
+
+        // 过滤掉已经在缓存中的 hash 消息值，有缓存代表 system+tools 已经在缓存中了
+        $newMessages = [];
+        foreach ($messages as $message) {
+            $hash = $message->getHash();
+            if (! in_array($hash, $cacheInfo->getCachedMessageHashes(), true)) {
+                $newMessages[] = $message;
+            }
+        }
+
+        $chatRequest->setFilterMessages($newMessages);
+        $chatRequest->setMessages($newMessages);
+        $chatRequest->setTools([]);
+    }
+
+    /**
+     * Prepare Gemini request with cache handling.
+     * This method consolidates cache checking, request preparation, and cache reference application.
+     *
+     * @param ChatCompletionRequest $chatRequest Original request
+     * @return array{'geminiRequest': array, 'cacheWriteTokens': int}
+     */
+    private function prepareRequestWithCache(ChatCompletionRequest $chatRequest): array
+    {
+        $chatRequest->calculateTokenEstimates();
+
+        // Step 1: Check cache to get cache info
+        $cacheInfo = $this->checkCache($chatRequest);
+        $cacheWriteTokens = 0;
+
+        if ($cacheInfo && $cacheInfo->isNewlyCreated()) {
+            $cacheWriteTokens = $cacheInfo->getCacheWriteTokens();
+        }
+
+        // Step 2: Prepare request for conversion (filter cached messages if needed)
+        $this->prepareRequestForCache($chatRequest, $cacheInfo);
+
+        // Step 3: Convert to Gemini native format
+        $geminiRequest = RequestHandler::convertRequest($chatRequest);
+
+        // Step 4: Apply cache reference if cache is available
+        if ($cacheInfo) {
+            $geminiRequest['cachedContent'] = $cacheInfo->getCacheName();
+        }
+
+        return [
+            'geminiRequest' => $geminiRequest,
+            'cacheWriteTokens' => $cacheWriteTokens,
+        ];
+    }
+
+    /**
+     * Build Gemini native API URL.
+     */
+    private function buildGeminiUrl(string $model, bool $stream): string
+    {
+        $baseUri = $this->getBaseUri();
+        $endpoint = $stream ? 'streamGenerateContent' : 'generateContent';
+
+        // URL format: https://generativelanguage.googleapis.com/v1beta/models/{model}:{endpoint}
+        $url = "{$baseUri}/models/{$model}:{$endpoint}";
+
+        // Add alt=sse parameter for streaming requests (SSE format)
+        if ($stream) {
+            $url .= '?alt=sse';
+        }
+
+        return $url;
+    }
+
+    /**
+     * Cache thought signatures from tool calls in the response.
+     */
+    private function cacheThoughtSignatures(ChatCompletionResponse $response): void
+    {
+        $firstChoice = $response->getFirstChoice();
+        if ($firstChoice === null) {
+            return;
+        }
+
+        $message = $firstChoice->getMessage();
+        if (! $message instanceof AssistantMessage) {
+            return;
+        }
+
+        $toolCalls = $message->getToolCalls();
+        if (empty($toolCalls)) {
+            return;
+        }
+
+        foreach ($toolCalls as $toolCall) {
+            $thoughtSignature = $toolCall->getMetadata('thought_signature');
+            if ($thoughtSignature !== null) {
+                ThoughtSignatureCache::store($toolCall->getId(), $thoughtSignature);
+            }
+        }
+    }
+}
diff --git a/src/Api/Providers/Gemini/Gemini.php b/src/Api/Providers/Gemini/Gemini.php
new file mode 100644
index 0000000..c7d40b8
--- /dev/null
+++ b/src/Api/Providers/Gemini/Gemini.php
@@ -0,0 +1,50 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini;
+
+use Hyperf\Odin\Api\Providers\AbstractApi;
+use Hyperf\Odin\Api\RequestOptions\ApiOptions;
+use Hyperf\Odin\Exception\LLMException\Configuration\LLMInvalidApiKeyException;
+use Hyperf\Odin\Exception\LLMException\Configuration\LLMInvalidEndpointException;
+use Psr\Log\LoggerInterface;
+
+class Gemini extends AbstractApi
+{
+    /**
+     * @var Client[]
+     */
+    protected array $clients = [];
+
+    public function getClient(GeminiConfig $config, ?ApiOptions $requestOptions = null, ?LoggerInterface $logger = null): Client
+    {
+        // Check API Key, unless configured to skip validation
+        if (empty($config->getApiKey()) && ! $config->shouldSkipApiKeyValidation()) {
+            throw new LLMInvalidApiKeyException('API密钥不能为空', null, 'Gemini');
+        }
+
+        if (empty($config->getBaseUrl())) {
+            throw new LLMInvalidEndpointException('基础URL不能为空', null, $config->getBaseUrl());
+        }
+        $requestOptions = $requestOptions ?? new ApiOptions();
+
+        $key = md5(json_encode($config->toArray()) . json_encode($requestOptions->toArray()));
+        if (($this->clients[$key] ?? null) instanceof Client) {
+            return $this->clients[$key];
+        }
+
+        $client = new Client($config, $requestOptions, $logger);
+
+        $this->clients[$key] = $client;
+        return $this->clients[$key];
+    }
+}
diff --git a/src/Api/Providers/Gemini/GeminiConfig.php b/src/Api/Providers/Gemini/GeminiConfig.php
new file mode 100644
index 0000000..abacaa5
--- /dev/null
+++ b/src/Api/Providers/Gemini/GeminiConfig.php
@@ -0,0 +1,91 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini;
+
+use Hyperf\Odin\Api\Providers\Gemini\Cache\GeminiCacheConfig;
+use Hyperf\Odin\Contract\Api\ConfigInterface;
+
+class GeminiConfig implements ConfigInterface
+{
+    public string $baseUrl;
+
+    public string $apiKey;
+
+    /**
+     * Whether to skip API Key validation.
+     */
+    protected bool $skipApiKeyValidation = false;
+
+    /**
+     * Cache configuration.
+     */
+    protected ?GeminiCacheConfig $cacheConfig = null;
+
+    public function __construct(
+        string $apiKey,
+        string $baseUrl = 'https://generativelanguage.googleapis.com/v1beta',
+        bool $skipApiKeyValidation = false,
+    ) {
+        $this->apiKey = $apiKey;
+        $this->baseUrl = $baseUrl;
+        $this->skipApiKeyValidation = $skipApiKeyValidation;
+    }
+
+    public function getApiKey(): string
+    {
+        return $this->apiKey;
+    }
+
+    public function getBaseUrl(): string
+    {
+        return $this->baseUrl;
+    }
+
+    public function shouldSkipApiKeyValidation(): bool
+    {
+        return $this->skipApiKeyValidation;
+    }
+
+    public static function fromArray(array $config): self
+    {
+        return new self(
+            $config['api_key'] ?? '',
+            $config['base_url'] ?? 'https://generativelanguage.googleapis.com/v1beta',
+            $config['skip_api_key_validation'] ?? false,
+        );
+    }
+
+    public function toArray(): array
+    {
+        return [
+            'api_key' => $this->apiKey,
+            'base_url' => $this->baseUrl,
+            'skip_api_key_validation' => $this->skipApiKeyValidation,
+        ];
+    }
+
+    public function isAutoCache(): bool
+    {
+        return $this->cacheConfig !== null && $this->cacheConfig->isEnableCache();
+    }
+
+    public function getCacheConfig(): ?GeminiCacheConfig
+    {
+        return $this->cacheConfig;
+    }
+
+    public function setCacheConfig(GeminiCacheConfig $cacheConfig): void
+    {
+        $this->cacheConfig = $cacheConfig;
+    }
+}
diff --git a/src/Api/Providers/Gemini/RequestHandler.php b/src/Api/Providers/Gemini/RequestHandler.php
new file mode 100644
index 0000000..8f22a43
--- /dev/null
+++ b/src/Api/Providers/Gemini/RequestHandler.php
@@ -0,0 +1,422 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini;
+
+use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Contract\Message\MessageInterface;
+use Hyperf\Odin\Contract\Tool\ToolInterface;
+use Hyperf\Odin\Message\AssistantMessage;
+use Hyperf\Odin\Message\Role;
+use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\ToolMessage;
+use Hyperf\Odin\Message\UserMessage;
+use Hyperf\Odin\Message\UserMessageContent;
+use Hyperf\Odin\Tool\Definition\ToolDefinition;
+use Hyperf\Odin\Utils\ImageDownloader;
+use stdClass;
+
+/**
+ * Request Handler for converting OpenAI format to Gemini native format.
+ */
+class RequestHandler
+{
+    /**
+     * Convert ChatCompletionRequest to Gemini native format.
+     */
+    public static function convertRequest(ChatCompletionRequest $request): array
+    {
+        $geminiRequest = [];
+
+        // Convert messages to contents and extract system instructions
+        $result = self::convertMessages($request->getMessages());
+
+        $geminiRequest['contents'] = $result['contents'];
+
+        // Add system instruction if present
+        if (! empty($result['system_instruction'])) {
+            $geminiRequest['system_instruction'] = $result['system_instruction'];
+        }
+
+        // Build generation config (includes thinking config)
+        $generationConfig = self::buildGenerationConfig($request);
+        if (! empty($generationConfig)) {
+            $geminiRequest['generationConfig'] = $generationConfig;
+        }
+
+        // Convert tools if present
+        $tools = $request->getTools();
+        if (! empty($tools)) {
+            $convertedTools = self::convertTools($tools);
+            if (! empty($convertedTools)) {
+                $geminiRequest['tools'] = $convertedTools;
+            }
+        }
+
+        return $geminiRequest;
+    }
+
+    /**
+     * Convert UserMessage to Gemini format.
+     * Made public for use in GeminiCacheManager.
+     */
+    public static function convertUserMessage(UserMessage $message): array
+    {
+        $parts = [];
+
+        // Handle multimodal content (text + images)
+        if ($message->getContents() !== null) {
+            foreach ($message->getContents() as $content) {
+                // Use object methods directly
+                $type = $content->getType();
+
+                if ($type === UserMessageContent::TEXT) {
+                    $parts[] = ['text' => $content->getText()];
+                } elseif ($type === UserMessageContent::IMAGE_URL) {
+                    // Auto-detect URL format and convert accordingly:
+                    // - data:image/...;base64,... -> inline_data
+                    // - https://generativelanguage.googleapis.com/v1beta/files/... -> file_data
+                    // - other HTTP URLs -> text placeholder
+                    $imageUrl = $content->getImageUrl();
+                    $parts[] = self::convertImageUrl($imageUrl);
+                }
+            }
+        } else {
+            // Simple text content
+            $parts[] = ['text' => $message->getContent()];
+        }
+
+        return [
+            'role' => 'user',
+            'parts' => $parts,
+        ];
+    }
+
+    /**
+     * Convert tools from OpenAI format to Gemini FunctionDeclaration format.
+     * Made public for use in GeminiCacheManager.
+     */
+    public static function convertTools(array $tools): array
+    {
+        $functionDeclarations = [];
+
+        foreach ($tools as $tool) {
+            if ($tool instanceof ToolInterface) {
+                $tool = $tool->toToolDefinition();
+            }
+
+            if (! $tool instanceof ToolDefinition) {
+                continue;
+            }
+
+            $declaration = [
+                'name' => $tool->getName(),
+                'description' => $tool->getDescription(),
+            ];
+
+            // Add parameters if present
+            $parameters = $tool->getParameters();
+            if ($parameters !== null) {
+                $declaration['parameters'] = $parameters->toArray();
+            } else {
+                // Provide empty parameters schema
+                $declaration['parameters'] = [
+                    'type' => 'object',
+                    'properties' => new stdClass(),
+                ];
+            }
+
+            $functionDeclarations[] = $declaration;
+        }
+
+        if (empty($functionDeclarations)) {
+            return [];
+        }
+
+        // Gemini expects tools array with functionDeclarations
+        return [
+            [
+                'functionDeclarations' => $functionDeclarations,
+            ],
+        ];
+    }
+
+    /**
+     * Convert messages array from OpenAI format to Gemini contents format.
+     * Made public for use in cache strategies (GlobalCacheStrategy, UserCacheStrategy).
+     *
+     * @return array{contents: array, system_instruction: null|array}
+     */
+    public static function convertMessages(array $messages): array
+    {
+        $contents = [];
+        $systemInstructions = [];
+
+        // Track tool_call_id to function name mapping
+        // This is needed because OpenAI ToolMessage only has tool_call_id,
+        // but Gemini functionResponse requires the function name
+        $toolCallIdToName = [];
+
+        foreach ($messages as $message) {
+            if (! $message instanceof MessageInterface) {
+                continue;
+            }
+
+            // Handle system messages separately - extract to system_instruction
+            if ($message instanceof SystemMessage) {
+                if ($message->getContent() === '') {
+                    continue;
+                }
+                $systemInstructions[] = $message->getContent();
+                continue;
+            }
+
+            // Track tool calls from assistant messages
+            if ($message instanceof AssistantMessage && $message->hasToolCalls()) {
+                foreach ($message->getToolCalls() as $toolCall) {
+                    $toolCallIdToName[$toolCall->getId()] = $toolCall->getName();
+                }
+            }
+
+            $content = match (true) {
+                $message instanceof UserMessage => self::convertUserMessage($message),
+                $message instanceof AssistantMessage => self::convertAssistantMessage($message),
+                $message instanceof ToolMessage => self::convertToolMessage($message, $toolCallIdToName),
+                default => null,
+            };
+
+            if ($content !== null) {
+                $contents[] = $content;
+            }
+        }
+
+        // Build system instruction in Gemini format
+        $systemInstruction = null;
+        if (! empty($systemInstructions)) {
+            $systemText = implode("\n\n", $systemInstructions);
+            $systemInstruction = [
+                'parts' => [
+                    ['text' => $systemText],
+                ],
+            ];
+        }
+
+        return [
+            'contents' => $contents,
+            'system_instruction' => $systemInstruction,
+        ];
+    }
+
+    /**
+     * Convert AssistantMessage to Gemini format.
+     */
+    private static function convertAssistantMessage(AssistantMessage $message): array
+    {
+        $parts = [];
+
+        // Add text content if present
+        if ($message->getContent()) {
+            $parts[] = ['text' => $message->getContent()];
+        }
+
+        // Add tool calls as functionCall parts
+        if ($message->hasToolCalls()) {
+            foreach ($message->getToolCalls() as $toolCall) {
+                $arguments = $toolCall->getArguments();
+
+                // Build functionCall part
+                $functionCall = [
+                    'name' => $toolCall->getName(),
+                ];
+
+                // Only add args if there are actual arguments
+                // Gemini API doesn't accept empty args field, so omit it when empty
+                if (! empty($arguments) && ! array_is_list($arguments)) {
+                    // Convert associative array to object for JSON encoding
+                    $functionCall['args'] = (object) $arguments;
+                }
+
+                $part = [
+                    'functionCall' => $functionCall,
+                ];
+
+                // Get thought_signature if available (only for Gemini 3 and 2.5 models with thinking mode)
+                // Priority: ToolCall object -> Cache
+                $thoughtSignature = $toolCall->getThoughtSignature();
+                if (! $thoughtSignature) {
+                    $thoughtSignature = ThoughtSignatureCache::get($toolCall->getId());
+                    $toolCall->setThoughtSignature($thoughtSignature);
+                }
+
+                if ($thoughtSignature) {
+                    $part['thoughtSignature'] = $thoughtSignature;
+                }
+
+                $parts[] = $part;
+            }
+        }
+
+        return [
+            'role' => 'model', // Gemini uses 'model' instead of 'assistant'
+            'parts' => $parts,
+        ];
+    }
+
+    /**
+     * Convert ToolMessage to Gemini format.
+     *
+     * @param ToolMessage $message The tool message to convert
+     * @param array $toolCallIdToName Mapping of tool_call_id to function name
+     */
+    private static function convertToolMessage(ToolMessage $message, array $toolCallIdToName = []): array
+    {
+        $content = $message->getContent();
+        $result = json_decode($content, true);
+
+        // If not valid JSON, wrap it
+        if ($result === null) {
+            $result = ['result' => $content];
+        }
+
+        // Get tool name - Gemini requires it to be non-empty
+        // Priority: 1) message.name 2) lookup by tool_call_id 3) fallback
+        $toolName = $message->getName();
+
+        if (empty($toolName)) {
+            // Try to find name by tool_call_id from previous assistant message
+            $toolCallId = $message->getToolCallId();
+            $toolName = $toolCallIdToName[$toolCallId] ?? null;
+
+            if (empty($toolName)) {
+                // Use tool_call_id as last resort fallback
+                $toolName = $toolCallId ?: 'function_response';
+            }
+        }
+
+        return [
+            'role' => 'user', // Tool responses come back as user role in Gemini
+            'parts' => [
+                [
+                    'functionResponse' => [
+                        'name' => $toolName,
+                        'response' => $result,
+                    ],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * Convert image URL to Gemini format.
+     * Supports both inline_data (base64) and file_data (file URI) formats.
+     * For remote URLs, downloads and converts to base64 format first.
+     */
+    private static function convertImageUrl(string $imageUrl): array
+    {
+        // If it's a remote URL, download and convert to base64 first
+        if (ImageDownloader::isRemoteImageUrl($imageUrl)) {
+            $imageUrl = ImageDownloader::downloadAndConvertToBase64($imageUrl);
+        }
+
+        // Check if it's a data URL (base64 encoded)
+        if (str_starts_with($imageUrl, 'data:')) {
+            // Extract mime type and base64 data
+            if (preg_match('/^data:([^;]+);base64,(.+)$/', $imageUrl, $matches)) {
+                $mimeType = $matches[1];
+                // Only process if it's an image MIME type
+                if (self::isImageMimeType($mimeType)) {
+                    return [
+                        'inline_data' => [
+                            'mime_type' => $mimeType,
+                            'data' => $matches[2],
+                        ],
+                    ];
+                }
+            }
+            // If data URL but not an image, fall through to text
+        }
+
+        // For non-image URLs, return as text
+        return [
+            'text' => "[Image: {$imageUrl}]",
+        ];
+    }
+
+    /**
+     * Check if MIME type is a Gemini supported image type.
+     * Gemini supports: image/png, image/jpeg, image/webp, image/heic, image/heif.
+     */
+    private static function isImageMimeType(string $mimeType): bool
+    {
+        $supportedMimeTypes = [
+            'image/png',
+            'image/jpeg',
+            'image/webp',
+            'image/heic',
+            'image/heif',
+        ];
+
+        return in_array(strtolower($mimeType), $supportedMimeTypes, true);
+    }
+
+    /**
+     * Build generation config from request parameters.
+     */
+    private static function buildGenerationConfig(ChatCompletionRequest $request): array
+    {
+        $config = [];
+
+        // Temperature
+        $temperature = $request->getTemperature();
+        if ($temperature !== 0.5) { // Only add if not default
+            $config['temperature'] = $temperature;
+        }
+
+        // Max tokens
+        $maxTokens = $request->getMaxTokens();
+        if ($maxTokens > 0) {
+            $config['maxOutputTokens'] = $maxTokens;
+        }
+
+        // Stop sequences
+        $stop = $request->getStop();
+        if (! empty($stop)) {
+            $config['stopSequences'] = $stop;
+        }
+
+        // According to API docs, thinkingConfig should be inside generationConfig
+        $thinking = $request->getThinking();
+        if (! empty($thinking)) {
+            $thinkingConfig = self::convertThinkingConfig($thinking);
+            if (! empty($thinkingConfig)) {
+                $config['thinkingConfig'] = $thinkingConfig;
+            }
+        }
+
+        return $config;
+    }
+
+    /**
+     * Convert thinking config to Gemini format.
+     */
+    private static function convertThinkingConfig(array $thinking): array
+    {
+        $config = [];
+
+        // Map thinking budget if present
+        if (isset($thinking['thinking_budget'])) {
+            $config['thinkingBudget'] = $thinking['thinking_budget'];
+        }
+
+        return $config;
+    }
+}
diff --git a/src/Api/Providers/Gemini/ResponseHandler.php b/src/Api/Providers/Gemini/ResponseHandler.php
new file mode 100644
index 0000000..66345be
--- /dev/null
+++ b/src/Api/Providers/Gemini/ResponseHandler.php
@@ -0,0 +1,274 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini;
+
+use GuzzleHttp\Psr7\Response;
+use Hyperf\Odin\Api\Response\Usage;
+use Psr\Http\Message\ResponseInterface;
+use stdClass;
+
+/**
+ * Response Handler for converting Gemini native format to OpenAI format.
+ */
+class ResponseHandler
+{
+    /**
+     * Convert Gemini response to PSR-7 Response in OpenAI format.
+     *
+     * @param array $geminiResponse Gemini native response
+     * @param string $model Model name
+     * @param int $cacheWriteTokens Tokens written to cache (0 if no cache created)
+     */
+    public static function convertResponse(array $geminiResponse, string $model, int $cacheWriteTokens = 0): ResponseInterface
+    {
+        $openAIResponse = [
+            'id' => self::generateId(),
+            'object' => 'chat.completion',
+            'created' => time(),
+            'model' => $model,
+            'choices' => self::convertCandidates($geminiResponse['candidates'] ?? []),
+            'usage' => self::convertUsage($geminiResponse['usageMetadata'] ?? [], $cacheWriteTokens),
+        ];
+
+        $jsonResponse = json_encode($openAIResponse);
+
+        return new Response(
+            200,
+            ['Content-Type' => 'application/json'],
+            $jsonResponse
+        );
+    }
+
+    /**
+     * Convert Gemini candidates to OpenAI choices format.
+     */
+    private static function convertCandidates(array $candidates): array
+    {
+        $choices = [];
+
+        foreach ($candidates as $index => $candidate) {
+            $content = $candidate['content'] ?? [];
+            $message = self::convertContent($content);
+
+            // Add reasoning content if present (from thinking)
+            if (isset($candidate['thinkingTrace'])) {
+                $message['reasoning_content'] = self::extractThinkingContent($candidate['thinkingTrace']);
+            }
+
+            // Determine finish reason
+            // If there are tool calls, finish_reason should be 'tool_calls'
+            $finishReason = $candidate['finishReason'] ?? 'STOP';
+
+            // Check for tool calls first
+            $hasToolCalls = ! empty($message['tool_calls']);
+
+            // Log warning if finishMessage is present and it's not the expected tool call message
+            // Note: "Model generated function call(s)." is a normal message when tool calls are present
+            if (isset($candidate['finishMessage'])) {
+                $isNormalToolCallMessage = $hasToolCalls
+                    && $candidate['finishMessage'] === 'Model generated function call(s).';
+
+                if (! $isNormalToolCallMessage) {
+                    // Only log if it's an unexpected finish message
+                    error_log(sprintf(
+                        'Gemini response warning [finish_reason=%s, index=%d]: %s',
+                        $finishReason,
+                        $index,
+                        $candidate['finishMessage']
+                    ));
+                }
+            }
+
+            if ($hasToolCalls) {
+                $finishReason = 'tool_calls';
+            } else {
+                $finishReason = self::convertFinishReason($finishReason);
+            }
+
+            $choices[] = [
+                'index' => $index,
+                'message' => $message,
+                'finish_reason' => $finishReason,
+            ];
+        }
+
+        return $choices;
+    }
+
+    /**
+     * Convert Gemini content to OpenAI message format.
+     */
+    private static function convertContent(array $content): array
+    {
+        $message = [
+            'role' => 'assistant', // Gemini uses 'model', convert to 'assistant'
+        ];
+
+        $parts = $content['parts'] ?? [];
+        $textParts = [];
+        $toolCalls = [];
+
+        foreach ($parts as $part) {
+            // Handle text parts
+            if (isset($part['text'])) {
+                $textParts[] = $part['text'];
+            }
+
+            // Handle function calls (tool calls)
+            if (isset($part['functionCall'])) {
+                $functionCall = $part['functionCall'];
+                $args = $functionCall['args'] ?? new stdClass();
+
+                // Convert args to JSON string (OpenAI format)
+                $argumentsJson = json_encode($args, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
+
+                $toolCall = [
+                    'id' => self::generateToolCallId(),
+                    'type' => 'function',
+                    'function' => [
+                        'name' => $functionCall['name'] ?? '',
+                        'arguments' => $argumentsJson,
+                    ],
+                ];
+
+                // Preserve thought signature if present (Gemini-specific)
+                // This is required for Gemini 3 Pro multi-turn function calling
+                if (isset($functionCall['thoughtSignature'])) {
+                    $toolCall['thought_signature'] = $functionCall['thoughtSignature'];
+                }
+
+                $toolCalls[] = $toolCall;
+            }
+        }
+
+        // Combine text parts
+        $message['content'] = implode('', $textParts);
+
+        // Add tool calls if present
+        if (! empty($toolCalls)) {
+            $message['tool_calls'] = $toolCalls;
+        }
+
+        return $message;
+    }
+
+    /**
+     * Convert Gemini usage metadata to OpenAI usage format.
+     *
+     * @param array $usageMetadata Gemini usage metadata
+     * @param int $cacheWriteTokens Tokens written to cache in this request (0 if no cache created)
+     */
+    private static function convertUsage(array $usageMetadata, int $cacheWriteTokens = 0): array
+    {
+        // Gemini format:
+        // - promptTokenCount: tokens from new input (not from cache)
+        // - cachedContentTokenCount: tokens read from cache
+        $inputTokens = $usageMetadata['promptTokenCount'] ?? 0;
+        $cacheReadTokens = $usageMetadata['cachedContentTokenCount'] ?? 0;
+
+        // OpenAI format: prompt_tokens = total prompt tokens (including cache)
+        // Following AWS Bedrock's implementation for consistency
+        $promptTokens = $inputTokens + $cacheReadTokens + $cacheWriteTokens;
+
+        $candidatesTokens = $usageMetadata['candidatesTokenCount'] ?? 0;
+        $thoughtsTokens = $usageMetadata['thoughtsTokenCount'] ?? 0;
+
+        // completion_tokens includes both candidates tokens and thoughts tokens for billing
+        $completionTokens = $candidatesTokens + $thoughtsTokens;
+
+        // total_tokens = prompt_tokens + completion_tokens
+        $totalTokens = $promptTokens + $completionTokens;
+
+        $usage = [
+            'prompt_tokens' => $promptTokens,
+            'completion_tokens' => $completionTokens,
+            'total_tokens' => $totalTokens,
+        ];
+
+        // Build prompt_tokens_details
+        $promptTokensDetails = [];
+
+        // Add cached tokens if present (Gemini Context Caching - cache read)
+        if ($cacheReadTokens > 0) {
+            $promptTokensDetails['cached_tokens'] = $cacheReadTokens;
+            $promptTokensDetails['cache_read_input_tokens'] = $cacheReadTokens;
+        }
+
+        // Add cache write tokens if present (cache created in this request)
+        if ($cacheWriteTokens > 0) {
+            $promptTokensDetails['cache_write_input_tokens'] = $cacheWriteTokens;
+        }
+
+        // Add prompt_tokens_details if not empty
+        if (! empty($promptTokensDetails)) {
+            $usage['prompt_tokens_details'] = $promptTokensDetails;
+        }
+
+        // Build completion_tokens_details if thoughts tokens are present
+        // Record reasoning tokens separately for transparency (but already included in completion_tokens)
+        if ($thoughtsTokens > 0) {
+            $usage['completion_tokens_details'] = [
+                'reasoning_tokens' => $thoughtsTokens,
+            ];
+        }
+
+        return $usage;
+    }
+
+    /**
+     * Convert Gemini finish reason to OpenAI format.
+     */
+    private static function convertFinishReason(string $finishReason): string
+    {
+        return match ($finishReason) {
+            'STOP' => 'stop',
+            'MAX_TOKENS' => 'length',
+            'SAFETY', 'RECITATION' => 'content_filter',
+            'MALFORMED_FUNCTION_CALL' => 'stop', // Tool call format error, treated as stop but logged as error
+            'OTHER' => 'stop',
+            default => 'stop',
+        };
+    }
+
+    /**
+     * Extract thinking content from thinkingTrace.
+     */
+    private static function extractThinkingContent(array $thinkingTrace): string
+    {
+        $thoughts = [];
+
+        foreach ($thinkingTrace as $trace) {
+            if (isset($trace['thought'])) {
+                $thoughts[] = $trace['thought'];
+            }
+        }
+
+        return implode("\n", $thoughts);
+    }
+
+    /**
+     * Generate a unique ID for the response.
+     */
+    private static function generateId(): string
+    {
+        return 'chatcmpl-' . bin2hex(random_bytes(12));
+    }
+
+    /**
+     * Generate a unique tool call ID.
+     */
+    private static function generateToolCallId(): string
+    {
+        return 'call_' . bin2hex(random_bytes(12));
+    }
+}
diff --git a/src/Api/Providers/Gemini/StreamConverter.php b/src/Api/Providers/Gemini/StreamConverter.php
new file mode 100644
index 0000000..9d47a61
--- /dev/null
+++ b/src/Api/Providers/Gemini/StreamConverter.php
@@ -0,0 +1,639 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini;
+
+use Generator;
+use IteratorAggregate;
+use JsonException;
+use Psr\Http\Message\ResponseInterface;
+use Psr\Log\LoggerInterface;
+use stdClass;
+use Traversable;
+
+/**
+ * Stream Converter for converting Gemini streaming response to OpenAI format.
+ */
+class StreamConverter implements IteratorAggregate
+{
+    private ResponseInterface $response;
+
+    private ?LoggerInterface $logger;
+
+    private string $model;
+
+    /**
+     * Track tool calls by candidate index and tool call index.
+     * Structure: [candidateIndex => [toolCallIndex => [
+     *   'id' => string,
+     *   'name' => string,
+     *   'args' => string,
+     *   'args_array' => array,
+     *   'is_complete' => bool,
+     *   'chunk_count' => int
+     * ]]].
+     */
+    private array $toolCallTracker = [];
+
+    /**
+     * Track whether each candidate has had tool calls.
+     * Used to determine correct finish_reason when finishReason arrives.
+     * Structure: [candidateIndex => bool].
+     */
+    private array $candidateHasToolCalls = [];
+
+    /**
+     * Strategy for handling function call arguments in streaming mode.
+     * - 'complete': Each chunk contains complete args (Gemini's current behavior)
+     * - 'incremental': Each chunk contains partial args that need to be merged
+     * - 'auto': Automatically detect based on args changes.
+     */
+    private string $argsStrategy = 'auto';
+
+    private int $cacheWriteTokens;
+
+    public function __construct(
+        ResponseInterface $response,
+        ?LoggerInterface $logger,
+        string $model,
+        int $cacheWriteTokens = 0
+    ) {
+        $this->response = $response;
+        $this->logger = $logger;
+        $this->model = $model;
+        $this->cacheWriteTokens = $cacheWriteTokens;
+    }
+
+    /**
+     * Get iterator for streaming chunks.
+     */
+    public function getIterator(): Traversable
+    {
+        return $this->parseStream();
+    }
+
+    /**
+     * Parse streaming response and convert to OpenAI format.
+     */
+    private function parseStream(): Generator
+    {
+        $stream = $this->response->getBody();
+        $buffer = '';
+        $chunkCount = 0;
+
+        $this->logger?->info('GeminiStreamProcessingStarted', [
+            'model' => $this->model,
+        ]);
+
+        while (! $stream->eof()) {
+            $chunk = $stream->read(8192);
+            if ($chunk === '') {
+                continue;
+            }
+
+            $buffer .= $chunk;
+
+            // Process complete JSON objects in buffer
+            while (($pos = strpos($buffer, "\n")) !== false) {
+                $line = substr($buffer, 0, $pos);
+                $buffer = substr($buffer, $pos + 1);
+
+                // Skip empty lines
+                $line = trim($line);
+                if ($line === '') {
+                    continue;
+                }
+
+                // Remove data: prefix if present (SSE format)
+                if (str_starts_with($line, 'data: ')) {
+                    $line = substr($line, 6);
+                }
+
+                // Check for done signal
+                if ($line === '[DONE]') {
+                    $this->logger?->info('GeminiStreamCompleted', [
+                        'total_chunks' => $chunkCount,
+                    ]);
+                    break 2;
+                }
+
+                try {
+                    $geminiChunk = json_decode($line, true, 512, JSON_THROW_ON_ERROR);
+
+                    // Convert Gemini chunk to OpenAI format
+                    $openAIChunk = $this->convertStreamChunk($geminiChunk);
+
+                    if ($openAIChunk !== null) {
+                        ++$chunkCount;
+                        yield $openAIChunk;
+                    }
+                } catch (JsonException $e) {
+                    $this->logger?->warning('GeminiStreamJsonDecodeError', [
+                        'error' => $e->getMessage(),
+                        'line' => substr($line, 0, 200),
+                    ]);
+                    continue;
+                }
+            }
+        }
+
+        $this->logger?->info('GeminiStreamFinished', [
+            'total_chunks' => $chunkCount,
+        ]);
+
+        // Cache thought signatures from completed tool calls
+        $this->cacheThoughtSignatures();
+    }
+
+    /**
+     * Convert a single Gemini stream chunk to OpenAI format.
+     */
+    private function convertStreamChunk(array $geminiChunk): ?array
+    {
+        $candidates = $geminiChunk['candidates'] ?? [];
+
+        if (empty($candidates)) {
+            return null;
+        }
+
+        $choices = [];
+        foreach ($candidates as $index => $candidate) {
+            $delta = $this->convertDelta($candidate['content'] ?? [], $index);
+
+            $choice = [
+                'index' => $index,
+                'delta' => $delta,
+                'finish_reason' => null,
+            ];
+
+            // Add finish reason if present
+            if (isset($candidate['finishReason'])) {
+                $finishReason = $candidate['finishReason'];
+
+                // Check if this candidate has tool calls
+                $hasToolCalls = ! empty($delta['tool_calls']) || ! empty($this->candidateHasToolCalls[$index]);
+
+                // Log warning if finishMessage is present, and it's not the expected tool call message
+                // Note: "Model generated function call(s)." is a normal message when tool calls are present
+                if (isset($candidate['finishMessage'])) {
+                    $isNormalToolCallMessage = $hasToolCalls
+                        && $candidate['finishMessage'] === 'Model generated function call(s).';
+
+                    if (! $isNormalToolCallMessage) {
+                        // Only log if it's an unexpected finish message
+                        $this->logger?->warning('GeminiStreamFinishWithError', [
+                            'finish_reason' => $finishReason,
+                            'finish_message' => $candidate['finishMessage'],
+                            'candidate_index' => $index,
+                        ]);
+                    }
+                }
+
+                // If there are tool calls in current delta OR this candidate has had tool calls before,
+                // finish_reason should be 'tool_calls'
+                if ($hasToolCalls) {
+                    $choice['finish_reason'] = 'tool_calls';
+                } else {
+                    $choice['finish_reason'] = $this->convertFinishReason($finishReason);
+                }
+            }
+
+            $choices[] = $choice;
+        }
+
+        $chunk = [
+            'id' => 'chatcmpl-' . bin2hex(random_bytes(12)),
+            'object' => 'chat.completion.chunk',
+            'created' => time(),
+            'model' => $this->model,
+            'choices' => $choices,
+        ];
+
+        // Add usage if present (final chunk)
+        if (isset($geminiChunk['usageMetadata'])) {
+            $chunk['usage'] = $this->convertUsage($geminiChunk['usageMetadata']);
+        }
+
+        return $chunk;
+    }
+
+    /**
+     * Convert Gemini content to OpenAI delta format.
+     *
+     * @param array $content Gemini content
+     * @param int $candidateIndex Candidate index for tracking tool calls
+     */
+    private function convertDelta(array $content, int $candidateIndex): array
+    {
+        $delta = [];
+        $parts = $content['parts'] ?? [];
+
+        // Initialize tracker for this candidate if not exists
+        if (! isset($this->toolCallTracker[$candidateIndex])) {
+            $this->toolCallTracker[$candidateIndex] = [];
+        }
+
+        // Initialize candidateHasToolCalls flag if not exists
+        if (! isset($this->candidateHasToolCalls[$candidateIndex])) {
+            $this->candidateHasToolCalls[$candidateIndex] = false;
+        }
+
+        foreach ($parts as $part) {
+            // Handle text delta
+            if (isset($part['text'])) {
+                if (! isset($delta['content'])) {
+                    $delta['content'] = '';
+                }
+                $delta['content'] .= $part['text'];
+            }
+
+            // Handle function call delta
+            if (isset($part['functionCall'])) {
+                if (! isset($delta['tool_calls'])) {
+                    $delta['tool_calls'] = [];
+                }
+
+                // Pass the entire part (which includes thoughtSignature if present)
+                $toolCallDelta = $this->processFunctionCall(
+                    $part,
+                    $candidateIndex
+                );
+
+                if ($toolCallDelta !== null) {
+                    $delta['tool_calls'][] = $toolCallDelta;
+                    // Mark that this candidate has tool calls
+                    $this->candidateHasToolCalls[$candidateIndex] = true;
+                }
+            }
+        }
+
+        // Set role on first chunk
+        if (empty($delta)) {
+            $delta['role'] = 'assistant';
+        }
+
+        return $delta;
+    }
+
+    /**
+     * Convert Gemini usage metadata to OpenAI usage format.
+     */
+    private function convertUsage(array $usageMetadata): array
+    {
+        // Gemini format:
+        // - promptTokenCount: tokens from new input (not from cache)
+        // - cachedContentTokenCount: tokens read from cache
+        $inputTokens = $usageMetadata['promptTokenCount'] ?? 0;
+        $cacheReadTokens = $usageMetadata['cachedContentTokenCount'] ?? 0;
+
+        // OpenAI format: prompt_tokens = total prompt tokens (including cache)
+        // Following AWS Bedrock's implementation for consistency
+        $promptTokens = $inputTokens + $cacheReadTokens + $this->cacheWriteTokens;
+
+        $candidatesTokens = $usageMetadata['candidatesTokenCount'] ?? 0;
+        $thoughtsTokens = $usageMetadata['thoughtsTokenCount'] ?? 0;
+
+        // completion_tokens includes both candidates tokens and thoughts tokens for billing
+        $completionTokens = $candidatesTokens + $thoughtsTokens;
+
+        // total_tokens = prompt_tokens + completion_tokens
+        $totalTokens = $promptTokens + $completionTokens;
+
+        $usage = [
+            'prompt_tokens' => $promptTokens,
+            'completion_tokens' => $completionTokens,
+            'total_tokens' => $totalTokens,
+        ];
+
+        // Build prompt_tokens_details
+        $promptTokensDetails = [];
+
+        // Add cached tokens if present (Gemini Context Caching - cache read)
+        if ($cacheReadTokens > 0) {
+            $promptTokensDetails['cached_tokens'] = $cacheReadTokens;
+            $promptTokensDetails['cache_read_input_tokens'] = $cacheReadTokens;
+        }
+
+        // Add cache write tokens if present (cache created in this request)
+        if ($this->cacheWriteTokens > 0) {
+            $promptTokensDetails['cache_write_input_tokens'] = $this->cacheWriteTokens;
+        }
+
+        // Add prompt_tokens_details if not empty
+        if (! empty($promptTokensDetails)) {
+            $usage['prompt_tokens_details'] = $promptTokensDetails;
+        }
+
+        // Build completion_tokens_details if thoughts tokens are present
+        // Record reasoning tokens separately for transparency (but already included in completion_tokens)
+        if ($thoughtsTokens > 0) {
+            $usage['completion_tokens_details'] = [
+                'reasoning_tokens' => $thoughtsTokens,
+            ];
+        }
+
+        return $usage;
+    }
+
+    /**
+     * Convert Gemini finish reason to OpenAI format.
+     */
+    private function convertFinishReason(string $finishReason): string
+    {
+        return match ($finishReason) {
+            'STOP' => 'stop',
+            'MAX_TOKENS' => 'length',
+            'SAFETY', 'RECITATION' => 'content_filter',
+            'MALFORMED_FUNCTION_CALL' => 'stop', // Tool call format error, treated as stop but logged as warning
+            'OTHER' => 'stop',
+            default => 'stop',
+        };
+    }
+
+    /**
+     * Process a function call from Gemini stream chunk.
+     * Handles both complete and incremental argument updates intelligently.
+     *
+     * @param int $candidateIndex Candidate index for tracking
+     * @return null|array The tool call delta in OpenAI format, or null if invalid
+     */
+    private function processFunctionCall(array $part, int $candidateIndex): ?array
+    {
+        // Extract functionCall from part
+        $functionCall = $part['functionCall'] ?? [];
+        $functionName = $functionCall['name'] ?? '';
+        if ($functionName === '') {
+            $this->logger?->warning('GeminiStreamFunctionCallMissingName', [
+                'part' => $part,
+            ]);
+            return null;
+        }
+
+        $functionArgs = $functionCall['args'] ?? new stdClass();
+
+        // Find or create tool call tracker
+        $toolCallIndex = $this->findOrCreateToolCall($candidateIndex, $functionName);
+
+        // Process and merge arguments based on strategy
+        $mergedArgs = $this->mergeArguments(
+            $candidateIndex,
+            $toolCallIndex,
+            $functionArgs
+        );
+
+        // Extract thoughtSignature from part (it's at the same level as functionCall in Gemini response)
+        $thoughtSignature = $part['thoughtSignature'] ?? null;
+
+        // Store thought signature in tracker if present (for caching later)
+        if ($thoughtSignature !== null) {
+            $this->toolCallTracker[$candidateIndex][$toolCallIndex]['thought_signature'] = $thoughtSignature;
+        }
+
+        // Build tool call delta
+        $toolCallDelta = [
+            'index' => $toolCallIndex,
+            'id' => $this->toolCallTracker[$candidateIndex][$toolCallIndex]['id'],
+            'type' => 'function',
+            'function' => [
+                'name' => $functionName,
+                'arguments' => $mergedArgs,
+            ],
+        ];
+
+        // Preserve thought signature if present (Gemini-specific)
+        // Required for Gemini 3 Pro multi-turn function calling
+        if ($thoughtSignature !== null) {
+            $toolCallDelta['thought_signature'] = $thoughtSignature;
+        }
+
+        return $toolCallDelta;
+    }
+
+    /**
+     * Find existing tool call or create a new one.
+     *
+     * @param int $candidateIndex Candidate index
+     * @param string $functionName Function name
+     * @return int Tool call index
+     */
+    private function findOrCreateToolCall(int $candidateIndex, string $functionName): int
+    {
+        // Find existing tool call by name
+        foreach ($this->toolCallTracker[$candidateIndex] as $idx => $tracked) {
+            if ($tracked['name'] === $functionName) {
+                return $idx;
+            }
+        }
+
+        // Create new tool call
+        $toolCallIndex = count($this->toolCallTracker[$candidateIndex]);
+        $this->toolCallTracker[$candidateIndex][$toolCallIndex] = [
+            'id' => 'call_' . bin2hex(random_bytes(12)),
+            'name' => $functionName,
+            'args' => '{}',
+            'args_array' => [],
+            'is_complete' => false,
+            'chunk_count' => 0,
+        ];
+
+        $this->logger?->debug('GeminiStreamNewToolCall', [
+            'candidate_index' => $candidateIndex,
+            'tool_call_index' => $toolCallIndex,
+            'function_name' => $functionName,
+        ]);
+
+        return $toolCallIndex;
+    }
+
+    /**
+     * Merge arguments intelligently based on strategy.
+     * Supports both complete replacement and incremental merging.
+     *
+     * @param int $candidateIndex Candidate index
+     * @param int $toolCallIndex Tool call index
+     * @param mixed $newArgs New arguments from current chunk
+     * @return string JSON string of merged arguments
+     */
+    private function mergeArguments(int $candidateIndex, int $toolCallIndex, mixed $newArgs): string
+    {
+        $tracker = &$this->toolCallTracker[$candidateIndex][$toolCallIndex];
+        ++$tracker['chunk_count'];
+
+        // Convert new args to array
+        $newArgsArray = is_object($newArgs) ? (array) $newArgs : (is_array($newArgs) ? $newArgs : []);
+
+        // Empty args handling
+        if (empty($newArgsArray)) {
+            $this->logger?->debug('GeminiStreamEmptyArgs', [
+                'candidate_index' => $candidateIndex,
+                'tool_call_index' => $toolCallIndex,
+                'chunk_count' => $tracker['chunk_count'],
+            ]);
+            return $tracker['args'];
+        }
+
+        $previousArgsArray = $tracker['args_array'];
+
+        // Strategy: auto-detect or use configured strategy
+        $strategy = $this->detectStrategy($previousArgsArray, $newArgsArray, $tracker['chunk_count']);
+
+        $mergedArgsArray = match ($strategy) {
+            'incremental' => $this->mergeIncremental($previousArgsArray, $newArgsArray, $candidateIndex, $toolCallIndex),
+            default => $this->mergeComplete($previousArgsArray, $newArgsArray, $candidateIndex, $toolCallIndex),
+        };
+
+        // Update tracker
+        $tracker['args_array'] = $mergedArgsArray;
+        $tracker['args'] = json_encode($mergedArgsArray, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
+
+        // Check if args look complete (heuristic: no empty required fields)
+        $tracker['is_complete'] = ! empty($mergedArgsArray);
+
+        return $tracker['args'];
+    }
+
+    /**
+     * Detect the best strategy for merging arguments.
+     *
+     * @param array $previousArgs Previous arguments
+     * @param array $newArgs New arguments
+     * @param int $chunkCount Number of chunks received
+     * @return string Strategy: 'complete' or 'incremental'
+     */
+    private function detectStrategy(array $previousArgs, array $newArgs, int $chunkCount): string
+    {
+        // If strategy is explicitly set, use it
+        if ($this->argsStrategy !== 'auto') {
+            return $this->argsStrategy;
+        }
+
+        // First chunk: always use complete strategy
+        if ($chunkCount === 1) {
+            return 'complete';
+        }
+
+        // If new args have fewer keys than previous, likely complete replacement
+        if (count($newArgs) < count($previousArgs)) {
+            return 'complete';
+        }
+
+        // If new args have all the keys from previous args plus more, likely incremental
+        $previousKeys = array_keys($previousArgs);
+        $newKeys = array_keys($newArgs);
+        $hasAllPreviousKeys = empty(array_diff($previousKeys, $newKeys));
+
+        if ($hasAllPreviousKeys && count($newKeys) > count($previousKeys)) {
+            $this->logger?->debug('GeminiStreamDetectedIncremental', [
+                'previous_keys' => $previousKeys,
+                'new_keys' => $newKeys,
+            ]);
+            return 'incremental';
+        }
+
+        // Default to complete (Gemini's observed behavior)
+        return 'complete';
+    }
+
+    /**
+     * Merge arguments using complete replacement strategy.
+     * The new arguments completely replace the old ones.
+     *
+     * @param array $previousArgs Previous arguments
+     * @param array $newArgs New arguments
+     * @param int $candidateIndex Candidate index for logging
+     * @param int $toolCallIndex Tool call index for logging
+     * @return array Merged arguments
+     */
+    private function mergeComplete(array $previousArgs, array $newArgs, int $candidateIndex, int $toolCallIndex): array
+    {
+        // Check if args actually changed
+        $argsChanged = $previousArgs !== $newArgs;
+
+        if ($argsChanged) {
+            $this->logger?->debug('GeminiStreamArgsReplaced', [
+                'candidate_index' => $candidateIndex,
+                'tool_call_index' => $toolCallIndex,
+                'previous_args' => $previousArgs,
+                'new_args' => $newArgs,
+                'strategy' => 'complete',
+            ]);
+        }
+
+        // Complete replacement: use new args entirely
+        return $newArgs;
+    }
+
+    /**
+     * Merge arguments using incremental strategy.
+     * New arguments are merged into existing ones (deep merge).
+     *
+     * @param array $previousArgs Previous arguments
+     * @param array $newArgs New arguments to merge in
+     * @param int $candidateIndex Candidate index for logging
+     * @param int $toolCallIndex Tool call index for logging
+     * @return array Merged arguments
+     */
+    private function mergeIncremental(array $previousArgs, array $newArgs, int $candidateIndex, int $toolCallIndex): array
+    {
+        $merged = $this->deepMergeArrays($previousArgs, $newArgs);
+
+        $this->logger?->debug('GeminiStreamArgsIncremented', [
+            'candidate_index' => $candidateIndex,
+            'tool_call_index' => $toolCallIndex,
+            'previous_args' => $previousArgs,
+            'new_args' => $newArgs,
+            'merged_args' => $merged,
+            'strategy' => 'incremental',
+        ]);
+
+        return $merged;
+    }
+
+    /**
+     * Deep merge two arrays recursively.
+     * New values override old values at the same path.
+     *
+     * @param array $array1 First array
+     * @param array $array2 Second array (takes precedence)
+     * @return array Merged array
+     */
+    private function deepMergeArrays(array $array1, array $array2): array
+    {
+        $merged = $array1;
+
+        foreach ($array2 as $key => $value) {
+            if (is_array($value) && isset($merged[$key]) && is_array($merged[$key])) {
+                // Recursively merge arrays
+                $merged[$key] = $this->deepMergeArrays($merged[$key], $value);
+            } else {
+                // Override with new value
+                $merged[$key] = $value;
+            }
+        }
+
+        return $merged;
+    }
+
+    /**
+     * Cache thought signatures from all tool calls tracked during streaming.
+     */
+    private function cacheThoughtSignatures(): void
+    {
+        foreach ($this->toolCallTracker as $candidateIndex => $toolCalls) {
+            foreach ($toolCalls as $toolCallIndex => $toolCall) {
+                if (isset($toolCall['thought_signature'])) {
+                    ThoughtSignatureCache::store($toolCall['id'], $toolCall['thought_signature']);
+                }
+            }
+        }
+    }
+}
diff --git a/src/Api/Providers/Gemini/ThoughtSignatureCache.php b/src/Api/Providers/Gemini/ThoughtSignatureCache.php
new file mode 100644
index 0000000..ef473f7
--- /dev/null
+++ b/src/Api/Providers/Gemini/ThoughtSignatureCache.php
@@ -0,0 +1,96 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Providers\Gemini;
+
+use Hyperf\Context\ApplicationContext;
+use Hyperf\Odin\Exception\RuntimeException;
+use Psr\SimpleCache\CacheInterface;
+
+/**
+ * Manager for Gemini thought signatures.
+ *
+ * Thought signatures are cryptographic representations of the model's internal thinking process,
+ * used to preserve reasoning context across multi-turn interactions.
+ *
+ * @see https://ai.google.dev/gemini-api/docs/thought-signatures
+ */
+class ThoughtSignatureCache
+{
+    private const CACHE_PREFIX = 'gemini:thought_signature:';
+
+    private const CACHE_TTL = 3600;
+
+    /**
+     * Store a thought signature for a tool call.
+     *
+     * @param string $toolCallId The tool call ID
+     * @param string $thoughtSignature The thought signature from Gemini response
+     */
+    public static function store(string $toolCallId, string $thoughtSignature): void
+    {
+        $cache = self::getCacheDriver();
+        $key = self::getCacheKey($toolCallId);
+        $cache->set($key, $thoughtSignature, self::CACHE_TTL);
+    }
+
+    /**
+     * Retrieve a thought signature for a tool call.
+     *
+     * @param string $toolCallId The tool call ID
+     * @return null|string The thought signature, or null if not found
+     */
+    public static function get(string $toolCallId): ?string
+    {
+        $cache = self::getCacheDriver();
+        $key = self::getCacheKey($toolCallId);
+        $signature = $cache->get($key);
+        return is_string($signature) ? $signature : null;
+    }
+
+    /**
+     * Delete a thought signature for a tool call.
+     *
+     * @param string $toolCallId The tool call ID
+     */
+    public static function delete(string $toolCallId): void
+    {
+        $cache = self::getCacheDriver();
+        $key = self::getCacheKey($toolCallId);
+        $cache->delete($key);
+    }
+
+    /**
+     * Check if cache is available.
+     */
+    public static function isAvailable(): bool
+    {
+        return self::getCacheDriver() !== null;
+    }
+
+    /**
+     * Get cache key for a tool call ID.
+     */
+    private static function getCacheKey(string $toolCallId): string
+    {
+        return self::CACHE_PREFIX . $toolCallId;
+    }
+
+    private static function getCacheDriver(): CacheInterface
+    {
+        $cache = ApplicationContext::getContainer()->get(CacheInterface::class);
+        if (! $cache instanceof CacheInterface) {
+            throw new RuntimeException('CacheInterface must have a valid cache driver instance.');
+        }
+        return $cache;
+    }
+}
diff --git a/src/Api/Request/ChatCompletionRequest.php b/src/Api/Request/ChatCompletionRequest.php
index 0c45b29..f1ad332 100644
--- a/src/Api/Request/ChatCompletionRequest.php
+++ b/src/Api/Request/ChatCompletionRequest.php
@@ -19,10 +19,12 @@
 use Hyperf\Odin\Exception\LLMException\LLMModelException;
 use Hyperf\Odin\Message\Role;
 use Hyperf\Odin\Message\SystemMessage;
+use Hyperf\Odin\Message\UserMessage;
 use Hyperf\Odin\Tool\Definition\ToolDefinition;
 use Hyperf\Odin\Utils\MessageUtil;
 use Hyperf\Odin\Utils\TokenEstimator;
 use Hyperf\Odin\Utils\ToolUtil;
+use Hyperf\Odin\Utils\VisionMessageValidator;
 
 class ChatCompletionRequest implements RequestInterface
 {
@@ -95,6 +97,9 @@ public function validate(): void
 
         // 验证消息序列是否符合API规范
         $this->validateMessageSequence();
+
+        // 验证视觉理解消息中的图片格式
+        $this->validateImageFormats();
     }
 
     public function createOptions(): array
@@ -147,14 +152,18 @@ public function createOptions(): array
     /**
      * 为所有消息和工具计算token估算
      * 对于已经有估算的消息不会重新计算.
+     * 优先使用实际返回的 tokens（如果已设置），否则使用估算值.
      *
      * @return int 所有消息和工具的总token数量
      */
     public function calculateTokenEstimates(): int
     {
-        if ($this->totalTokenEstimate) {
+        // 如果已经有实际的 tokens（从 usage 中获取），直接返回
+        if ($this->totalTokenEstimate !== null) {
             return $this->totalTokenEstimate;
         }
+
+        // 否则进行估算
         $estimator = new TokenEstimator($this->model);
         $totalTokens = 0;
 
@@ -185,6 +194,34 @@ public function calculateTokenEstimates(): int
         return $totalTokens;
     }
 
+    /**
+     * 使用实际的 tokens 更新估算值（从 API 返回的 usage 中获取）.
+     * 优先使用实际的 tokens，比估算值更准确.
+     *
+     * @param int $promptTokens 实际的 prompt tokens（输入 tokens）
+     * @param null|int $toolsTokens 实际的 tools tokens（如果有单独统计）
+     */
+    public function updateTokenEstimateFromUsage(int $promptTokens, ?int $toolsTokens = null): void
+    {
+        // 使用实际的 prompt tokens 更新总估算值
+        $this->totalTokenEstimate = $promptTokens;
+
+        // 如果提供了 tools tokens，更新 tools 估算值
+        if ($toolsTokens !== null) {
+            $this->toolsTokenEstimate = $toolsTokens;
+        }
+    }
+
+    public function setFilterMessages(?array $filterMessages): void
+    {
+        $this->filterMessages = $filterMessages;
+    }
+
+    public function setMessages(array $messages): void
+    {
+        $this->messages = $messages;
+    }
+
     public function setModel(string $model): void
     {
         $this->model = $model;
@@ -346,6 +383,11 @@ public function getTokenEstimateDetail(): array
         ];
     }
 
+    public function setTools(array $tools): void
+    {
+        $this->tools = $tools;
+    }
+
     public function toArray(): array
     {
         return [
@@ -549,4 +591,19 @@ private function truncateContent(string $content, int $maxLength = 100): string
 
         return mb_substr($content, 0, $maxLength - 3) . '...';
     }
+
+    /**
+     * 验证视觉理解消息中的图片格式.
+     *
+     * 检查用户消息中的图片URL是否使用了支持的格式。
+     * 只有当URL包含文件扩展名且不在支持列表中时才会抛出异常。
+     */
+    private function validateImageFormats(): void
+    {
+        foreach ($this->messages as $message) {
+            if ($message instanceof UserMessage) {
+                VisionMessageValidator::validateUserMessage($message);
+            }
+        }
+    }
 }
diff --git a/src/Api/RequestOptions/ApiOptions.php b/src/Api/RequestOptions/ApiOptions.php
index f5d40b4..f065824 100644
--- a/src/Api/RequestOptions/ApiOptions.php
+++ b/src/Api/RequestOptions/ApiOptions.php
@@ -27,8 +27,9 @@ class ApiOptions
         'read' => 300.0,      // 读取超时
         'total' => 350.0,     // 总体超时
         'thinking' => 120.0,  // 思考超时（初始响应前的时间）
-        'stream_chunk' => 30.0, // 流式响应块间超时
+        'stream_chunk' => 60.0, // 流式响应块间超时
         'stream_first' => 60.0, // 流式响应首个块超时
+        'stream_total' => 600.0, // 流式总超时
     ];
 
     /**
@@ -52,6 +53,7 @@ class ApiOptions
     protected array $logging = [
         'enable_whitelist' => false,
         'whitelist_fields' => [],
+        'max_text_length' => 2000,
     ];
 
     protected int $networkRetryCount = 0;
@@ -167,6 +169,12 @@ public function getStreamChunkTimeout(): float
         return $this->timeout['stream_chunk'];
     }
 
+    public function setStreamChunkTimeout(float $timeout): self
+    {
+        $this->timeout['stream_chunk'] = $timeout;
+        return $this;
+    }
+
     /**
      * 获取流式响应首个块超时.
      */
@@ -175,6 +183,20 @@ public function getStreamFirstChunkTimeout(): float
         return $this->timeout['stream_first'];
     }
 
+    public function setStreamFirstChunkTimeout(float $timeout): self
+    {
+        $this->timeout['stream_first'] = $timeout;
+        return $this;
+    }
+
+    /**
+     * 获取流式响应总体超时.
+     */
+    public function getStreamTotalTimeout(): float
+    {
+        return $this->timeout['stream_total'];
+    }
+
     /**
      * 获取自定义错误映射规则.
      */
@@ -240,6 +262,14 @@ public function isLoggingWhitelistEnabled(): bool
         return (bool) ($this->logging['enable_whitelist'] ?? false);
     }
 
+    /**
+     * 获取日志最大文本长度限制.
+     */
+    public function getLoggingMaxTextLength(): int
+    {
+        return (int) ($this->logging['max_text_length'] ?? 2000);
+    }
+
     /**
      * 获取网络重试次数.
      */
diff --git a/src/Api/Response/ChatCompletionResponse.php b/src/Api/Response/ChatCompletionResponse.php
index dd5b42f..2460996 100644
--- a/src/Api/Response/ChatCompletionResponse.php
+++ b/src/Api/Response/ChatCompletionResponse.php
@@ -65,7 +65,7 @@ public function getCreated(): ?int
         return $this->created;
     }
 
-    public function setCreated(null|int|string $created): self
+    public function setCreated(int|string|null $created): self
     {
         $this->created = (int) $created;
         return $this;
diff --git a/src/Api/Response/ChatCompletionStreamResponse.php b/src/Api/Response/ChatCompletionStreamResponse.php
index 9ee7536..3e91207 100644
--- a/src/Api/Response/ChatCompletionStreamResponse.php
+++ b/src/Api/Response/ChatCompletionStreamResponse.php
@@ -20,6 +20,8 @@
 use Hyperf\Odin\Exception\LLMException;
 use Hyperf\Odin\Message\AssistantMessage;
 use Hyperf\Odin\Utils\EventUtil;
+use Hyperf\Odin\Utils\LoggingConfigHelper;
+use Hyperf\Odin\Utils\TimeUtil;
 use IteratorAggregate;
 use JsonException;
 use Psr\Http\Message\ResponseInterface as PsrResponseInterface;
@@ -133,7 +135,7 @@ public function getCreated(): ?int
         return $this->created;
     }
 
-    public function setCreated(null|int|string $created): self
+    public function setCreated(int|string|null $created): self
     {
         $this->created = (int) $created;
         return $this;
@@ -166,17 +168,49 @@ protected function parseContent(): self
         return $this;
     }
 
+    /**
+     * 获取流式处理检查点间隔数量.
+     */
+    protected function getCheckpointInterval(): int
+    {
+        return 200;
+    }
+
+    /**
+     * 判断是否应该记录检查点日志.
+     */
+    protected function shouldLogCheckpoint(int $chunkCount): bool
+    {
+        // 前5个块都记录
+        if ($chunkCount <= 5) {
+            return true;
+        }
+
+        // 之后每200个块记录一次
+        return $chunkCount % $this->getCheckpointInterval() === 0;
+    }
+
     /**
      * 使用自定义迭代器（IteratorAggregate）处理流数据.
      */
     private function iterateWithCustomIterator(): Generator
     {
+        $startTime = microtime(true);
+        $chunkCount = 0;
+        $lastLogTime = $startTime;
+        $lastChunkData = null;
+
         try {
-            $startTime = microtime(true);
+            $this->logger?->info('StreamProcessingStartedWithCustomIterator', [
+                'iterator_class' => get_class($this->iterator),
+                'start_time' => $startTime,
+            ]);
+
             foreach ($this->iterator->getIterator() as $data) {
+                ++$chunkCount;
                 // 处理结束标记
                 if ($data === '[DONE]' || $data === json_encode('[DONE]')) {
-                    $this->logger?->debug('Stream completed');
+                    $this->logger?->debug('StreamCompleted');
                     break;
                 }
 
@@ -185,33 +219,81 @@ private function iterateWithCustomIterator(): Generator
                     try {
                         $data = json_decode($data, true, 512, JSON_THROW_ON_ERROR);
                     } catch (JsonException $e) {
-                        $this->logger?->warning('Invalid JSON in stream', ['data' => $data, 'error' => $e->getMessage()]);
+                        $this->logger?->warning('InvalidJsonInStream', ['data' => $data, 'error' => $e->getMessage()]);
                         continue;
                     }
                 }
 
                 // 确保数据是有效的数组
                 if (! is_array($data)) {
-                    $this->logger?->warning('Invalid data format', ['data' => $data]);
+                    $this->logger?->warning('InvalidDataFormat', ['data' => $data, 'chunk_count' => $chunkCount]);
                     continue;
                 }
 
+                // Store last valid chunk data
+                $lastChunkData = $data;
+
+                // Log checkpoint (first 5 chunks and every 200 chunks)
+                if ($this->shouldLogCheckpoint($chunkCount)) {
+                    $currentTime = microtime(true);
+
+                    if ($chunkCount === 1) {
+                        // First chunk gets detailed information
+                        $this->logger?->info('FirstChunkReceivedFromCustomIterator', [
+                            'chunk_count' => $chunkCount,
+                            'id' => $data['id'] ?? null,
+                            'model' => $data['model'] ?? null,
+                            'choices_count' => count($data['choices'] ?? []),
+                            'time_since_start_ms' => TimeUtil::calculateIntervalMs($startTime, $currentTime, 2),
+                        ]);
+                        $lastLogTime = $currentTime;
+                    } else {
+                        // Regular checkpoint
+                        $this->logger?->info('StreamProcessingCheckpoint', [
+                            'chunks_processed' => $chunkCount,
+                            'interval_time_ms' => TimeUtil::calculateIntervalMs($lastLogTime, $currentTime, 2),
+                            'total_time_ms' => TimeUtil::calculateDurationMs($startTime, 2),
+                            'choices_accumulated' => count($this->choices),
+                        ]);
+                        $lastLogTime = $currentTime;
+                    }
+                }
+
                 // 更新响应元数据
                 $this->updateMetadata($data);
 
                 // 生成ChatCompletionChoice对象
                 yield from $this->yieldChoices($data['choices'] ?? []);
             }
-
-            // Set duration and create completion response
-            $this->handleStreamCompletion($startTime);
         } catch (Throwable $e) {
-            $this->logger?->error('Error processing custom iterator', [
+            $this->logger?->error('ErrorProcessingCustomIterator', [
                 'exception' => get_class($e),
                 'message' => $e->getMessage(),
                 'trace' => $e->getTraceAsString(),
             ]);
             throw $e; // 重新抛出异常，让调用方可以处理
+        } finally {
+            // Log last chunk content if available
+            if ($lastChunkData !== null) {
+                $this->logger?->info('LastChunkReceivedFromCustomIterator', [
+                    'chunk_count' => $chunkCount,
+                    'id' => $lastChunkData['id'] ?? null,
+                    'model' => $lastChunkData['model'] ?? null,
+                    'choices' => $lastChunkData['choices'] ?? [],
+                    'usage' => $lastChunkData['usage'] ?? null,
+                    'finish_reason' => $lastChunkData['choices'][0]['finish_reason'] ?? null,
+                ]);
+            }
+
+            // Log completion summary (always executed)
+            $this->logger?->info('CustomIteratorStreamCompleted', [
+                'total_chunks' => $chunkCount,
+                'total_time_ms' => TimeUtil::calculateDurationMs($startTime, 2),
+                'total_choices' => count($this->choices),
+            ]);
+
+            // Set duration and create completion response
+            $this->handleStreamCompletion($startTime);
         }
     }
 
@@ -220,46 +302,110 @@ private function iterateWithCustomIterator(): Generator
      */
     private function iterateWithSSEClient(): Generator
     {
+        $startTime = microtime(true);
+        $chunkCount = 0;
+        $lastLogTime = $startTime;
+        $lastChunkData = null;
+
         try {
-            $startTime = microtime(true);
+            $this->logger?->info('StreamProcessingStartedWithSseClient', [
+                'client_class' => get_class($this->sseClient),
+                'start_time' => $startTime,
+            ]);
+
             /** @var SSEEvent $event */
             foreach ($this->sseClient->getIterator() as $event) {
                 $data = $event->getData();
 
                 // 处理结束标记
-                if ($data === '[DONE]') {
-                    $this->logger?->debug('SSE stream completed');
+                if ($data === '[DONE]' || $event->getEvent() === 'done') {
+                    $this->logger?->debug('SseStreamCompleted', [
+                        'event_type' => $event->getEvent(),
+                        'data' => $data,
+                    ]);
+                    // Signal the SSE client to close early to prevent waiting for more data
+                    $this->sseClient->closeEarly();
                     break;
                 }
 
                 // 只处理数据事件
                 if ($event->getEvent() !== 'message') {
-                    $this->logger?->debug('Skipping non-message event', ['event' => $event->getEvent()]);
+                    $this->logger?->debug('SkippingNonMessageEvent', ['event' => $event->getEvent()]);
                     continue;
                 }
 
+                ++$chunkCount;
+
                 // 确保数据是有效的数组
                 if (! is_array($data)) {
-                    $this->logger?->warning('Invalid data format', ['data' => $data]);
+                    $this->logger?->warning('InvalidDataFormat', ['data' => $data, 'chunk_count' => $chunkCount]);
                     continue;
                 }
 
+                // Store last valid chunk data
+                $lastChunkData = $data;
+
+                // Log checkpoint (first 5 chunks and every 200 chunks)
+                if ($this->shouldLogCheckpoint($chunkCount)) {
+                    $currentTime = microtime(true);
+
+                    if ($chunkCount === 1) {
+                        // First chunk gets detailed information
+                        $this->logger?->info('FirstChunkReceivedFromSseClient', [
+                            'chunk_count' => $chunkCount,
+                            'id' => $data['id'] ?? null,
+                            'model' => $data['model'] ?? null,
+                            'choices_count' => count($data['choices'] ?? []),
+                            'time_since_start_ms' => TimeUtil::calculateIntervalMs($startTime, $currentTime, 2),
+                        ]);
+                        $lastLogTime = $currentTime;
+                    } else {
+                        // Regular checkpoint
+                        $this->logger?->info('SseStreamProcessingCheckpoint', [
+                            'chunks_processed' => $chunkCount,
+                            'interval_time_ms' => TimeUtil::calculateIntervalMs($lastLogTime, $currentTime, 2),
+                            'total_time_ms' => TimeUtil::calculateDurationMs($startTime, 2),
+                            'choices_accumulated' => count($this->choices),
+                        ]);
+                        $lastLogTime = $currentTime;
+                    }
+                }
+
                 // 更新响应元数据
                 $this->updateMetadata($data);
 
                 // 生成ChatCompletionChoice对象
                 yield from $this->yieldChoices($data['choices'] ?? []);
             }
-
-            // Set duration and create completion response
-            $this->handleStreamCompletion($startTime);
         } catch (Throwable $e) {
-            $this->logger?->error('Error processing SSE stream', [
+            $this->logger?->error('ErrorProcessingSseStream', [
                 'exception' => get_class($e),
                 'message' => $e->getMessage(),
                 'trace' => $e->getTraceAsString(),
             ]);
             throw $e; // 重新抛出异常，让调用方可以处理
+        } finally {
+            // Log last chunk content if available
+            if ($lastChunkData !== null) {
+                $this->logger?->info('LastChunkReceivedFromSseClient', [
+                    'chunk_count' => $chunkCount,
+                    'id' => $lastChunkData['id'] ?? null,
+                    'model' => $lastChunkData['model'] ?? null,
+                    'choices' => $lastChunkData['choices'] ?? [],
+                    'usage' => $lastChunkData['usage'] ?? null,
+                    'finish_reason' => $lastChunkData['choices'][0]['finish_reason'] ?? null,
+                ]);
+            }
+
+            // Log completion summary (always executed)
+            $this->logger?->info('SseClientStreamCompleted', [
+                'total_chunks' => $chunkCount,
+                'total_time_ms' => TimeUtil::calculateDurationMs($startTime, 2),
+                'total_choices' => count($this->choices),
+            ]);
+
+            // Set duration and create completion response
+            $this->handleStreamCompletion($startTime);
         }
     }
 
@@ -273,10 +419,46 @@ private function updateMetadata(array $data): void
         $this->setCreated($data['created'] ?? null);
         $this->setModel($data['model'] ?? null);
         if (! empty($data['usage'])) {
-            $this->setUsage(Usage::fromArray($data['usage']));
+            $usage = $data['usage'];
+            // 检测并转换DashScope格式的字段
+            if ($this->isDashScopeUsage($usage)) {
+                $usage = $this->convertDashScopeUsage($usage);
+            }
+            $this->setUsage(Usage::fromArray($usage));
         }
     }
 
+    /**
+     * 检测是否为DashScope格式的usage数据.
+     */
+    private function isDashScopeUsage(array $usage): bool
+    {
+        return isset($usage['prompt_tokens_details']['cache_creation_input_tokens'])
+            || isset($usage['prompt_tokens_details']['cache_type'])
+            || isset($usage['prompt_tokens_details']['cache_creation']);
+    }
+
+    /**
+     * 转换DashScope格式的usage数据为标准格式.
+     */
+    private function convertDashScopeUsage(array $usage): array
+    {
+        if (isset($usage['prompt_tokens_details'])) {
+            $promptTokensDetails = $usage['prompt_tokens_details'];
+
+            // 1. 优先转换外层的 cache_creation_input_tokens -> cache_write_input_tokens
+            if (isset($promptTokensDetails['cache_creation_input_tokens'])) {
+                $usage['prompt_tokens_details']['cache_write_input_tokens'] = $promptTokensDetails['cache_creation_input_tokens'];
+            }
+            // 2. 如果外层没有，再尝试从内层 cache_creation 获取
+            elseif (isset($promptTokensDetails['cache_creation']['ephemeral_5m_input_tokens'])) {
+                $usage['prompt_tokens_details']['cache_write_input_tokens'] = $promptTokensDetails['cache_creation']['ephemeral_5m_input_tokens'];
+            }
+        }
+
+        return $usage;
+    }
+
     /**
      * 生成选择对象
      */
@@ -284,7 +466,7 @@ private function yieldChoices(array $choices): Generator
     {
         foreach ($choices as $choice) {
             if (! is_array($choice)) {
-                $this->logger?->warning('Invalid choice format', ['choice' => $choice]);
+                $this->logger?->warning('InvalidChoiceFormat', ['choice' => $choice]);
                 continue;
             }
             $chatCompletionChoice = ChatCompletionChoice::fromArray($choice);
@@ -300,8 +482,17 @@ private function iterateWithLegacyMethod(): Generator
     {
         // 保留原有的实现作为后备
         $startTime = microtime(true);
+        $chunkCount = 0;
+        $lastLogTime = $startTime;
+        $lastChunkData = null;
         $body = $this->originResponse->getBody();
 
+        $this->logger?->info('StreamProcessingStartedWithLegacyMethod', [
+            'response_status' => $this->originResponse->getStatusCode(),
+            'content_type' => $this->originResponse->getHeaderLine('Content-Type'),
+            'start_time' => $startTime,
+        ]);
+
         $buffer = '';
         while (! $body->eof()) {
             $chunk = $body->read(4096);
@@ -329,15 +520,67 @@ private function iterateWithLegacyMethod(): Generator
 
                 try {
                     $data = json_decode(trim($line), true, 512, JSON_THROW_ON_ERROR);
+                    ++$chunkCount;
+
+                    // Store last valid chunk data
+                    $lastChunkData = $data;
+
+                    // Log checkpoint (first 5 chunks and every 200 chunks)
+                    if ($this->shouldLogCheckpoint($chunkCount)) {
+                        $currentTime = microtime(true);
+
+                        if ($chunkCount === 1) {
+                            // First chunk gets detailed information
+                            $this->logger?->info('FirstChunkReceivedFromLegacyMethod', [
+                                'chunk_count' => $chunkCount,
+                                'id' => $data['id'] ?? null,
+                                'model' => $data['model'] ?? null,
+                                'choices_count' => count($data['choices'] ?? []),
+                                'time_since_start_ms' => TimeUtil::calculateIntervalMs($startTime, $currentTime, 2),
+                                'raw_line_length' => strlen(trim($line)),
+                            ]);
+                            $lastLogTime = $currentTime;
+                        } else {
+                            // Regular checkpoint
+                            $this->logger?->info('LegacyStreamProcessingCheckpoint', [
+                                'chunks_processed' => $chunkCount,
+                                'interval_time_ms' => TimeUtil::calculateIntervalMs($lastLogTime, $currentTime, 2),
+                                'total_time_ms' => TimeUtil::calculateDurationMs($startTime, 2),
+                                'choices_accumulated' => count($this->choices),
+                                'buffer_size' => strlen($buffer),
+                            ]);
+                            $lastLogTime = $currentTime;
+                        }
+                    }
+
                     $this->updateMetadata($data);
                     yield from $this->yieldChoices($data['choices'] ?? []);
                 } catch (JsonException $e) {
-                    $this->logger?->warning('InvalidJsonResponse', ['line' => $line, 'error' => $e->getMessage()]);
+                    $this->logger?->warning('InvalidJsonResponse', ['line' => $line, 'error' => $e->getMessage(), 'chunk_count' => $chunkCount]);
                     continue;
                 }
             }
         }
 
+        // Log last chunk content if available
+        if ($lastChunkData !== null) {
+            $this->logger?->info('LastChunkReceivedFromLegacyMethod', [
+                'chunk_count' => $chunkCount,
+                'id' => $lastChunkData['id'] ?? null,
+                'model' => $lastChunkData['model'] ?? null,
+                'choices' => $lastChunkData['choices'] ?? [],
+                'usage' => $lastChunkData['usage'] ?? null,
+                'finish_reason' => $lastChunkData['choices'][0]['finish_reason'] ?? null,
+            ]);
+        }
+
+        // Log completion summary
+        $this->logger?->info('LegacyMethodStreamCompleted', [
+            'total_chunks' => $chunkCount,
+            'total_time_ms' => TimeUtil::calculateDurationMs($startTime, 2),
+            'total_choices' => count($this->choices),
+        ]);
+
         // Set duration and create completion response
         $this->handleStreamCompletion($startTime);
     }
@@ -352,12 +595,19 @@ private function handleStreamCompletion(float $startTime): void
         }
 
         // Set duration and create completion response
-        $this->afterChatCompletionsStreamEvent->setDuration(microtime(true) - $startTime);
+        $this->afterChatCompletionsStreamEvent->setDuration(TimeUtil::calculateDurationMs($startTime));
 
         // Create and set the completed ChatCompletionResponse
         $completionResponse = $this->createChatCompletionResponse();
         $this->afterChatCompletionsStreamEvent->setCompletionResponse($completionResponse);
 
+        $logData = [
+            'content' => $completionResponse->getFirstChoice()?->getMessage()?->toArray(),
+            'usage' => $completionResponse->getUsage()?->toArray(),
+        ];
+        $this->logger?->info('ChatCompletionsStreamResponse', LoggingConfigHelper::filterAndFormatLogData($logData));
+
+        // Event listener will execute callbacks
         EventUtil::dispatch($this->afterChatCompletionsStreamEvent);
     }
 
diff --git a/src/Api/Response/TextCompletionResponse.php b/src/Api/Response/TextCompletionResponse.php
index 6dec8d5..0f50ad2 100644
--- a/src/Api/Response/TextCompletionResponse.php
+++ b/src/Api/Response/TextCompletionResponse.php
@@ -90,7 +90,7 @@ public function getCreated(): ?int
         return $this->created;
     }
 
-    public function setCreated(null|int|string $created): self
+    public function setCreated(int|string|null $created): self
     {
         $this->created = (int) $created;
         return $this;
diff --git a/src/Api/Response/ToolCall.php b/src/Api/Response/ToolCall.php
index 4994c02..bf6e011 100644
--- a/src/Api/Response/ToolCall.php
+++ b/src/Api/Response/ToolCall.php
@@ -16,6 +16,11 @@
 
 class ToolCall implements Arrayable
 {
+    /**
+     * Metadata for provider-specific extensions (e.g., Gemini thought signatures).
+     */
+    protected array $metadata = [];
+
     public function __construct(
         protected string $name,
         protected array $arguments,
@@ -43,8 +48,14 @@ public static function fromArray(array $toolCalls): array
             $name = $function['name'] ?? '';
             $id = $toolCall['id'] ?? '';
             $type = $toolCall['type'] ?? 'function';
-            $static = new self($name, $arguments, $id, $type, $function['arguments']);
-            $toolCallsResult[] = $static;
+            $instance = new self($name, $arguments, $id, $type, $function['arguments']);
+
+            // Preserve thought signature if present (Gemini-specific)
+            if (isset($toolCall['thought_signature'])) {
+                $instance->setThoughtSignature($toolCall['thought_signature']);
+            }
+
+            $toolCallsResult[] = $instance;
         }
         return $toolCallsResult;
     }
@@ -147,4 +158,48 @@ public function appendStreamArguments(string $arguments): void
     {
         $this->streamArguments .= $arguments;
     }
+
+    /**
+     * Get metadata value.
+     */
+    public function getMetadata(string $key): mixed
+    {
+        return $this->metadata[$key] ?? null;
+    }
+
+    /**
+     * Set metadata value.
+     */
+    public function setMetadata(string $key, mixed $value): self
+    {
+        $this->metadata[$key] = $value;
+        return $this;
+    }
+
+    /**
+     * Get all metadata.
+     */
+    public function getAllMetadata(): array
+    {
+        return $this->metadata;
+    }
+
+    /**
+     * Get thought signature (Gemini-specific).
+     * Thought signatures are used to preserve reasoning context across multi-turn interactions.
+     *
+     * @see https://ai.google.dev/gemini-api/docs/thought-signatures
+     */
+    public function getThoughtSignature(): ?string
+    {
+        return $this->getMetadata('thought_signature');
+    }
+
+    /**
+     * Set thought signature (Gemini-specific).
+     */
+    public function setThoughtSignature(?string $thoughtSignature): self
+    {
+        return $this->setMetadata('thought_signature', $thoughtSignature);
+    }
 }
diff --git a/src/Api/Response/Usage.php b/src/Api/Response/Usage.php
index a4806af..9e62063 100644
--- a/src/Api/Response/Usage.php
+++ b/src/Api/Response/Usage.php
@@ -14,6 +14,16 @@
 
 class Usage
 {
+    /**
+     * @param int $promptTokens 提示词的令牌数量
+     * @param int $completionTokens 完成内容的令牌数量
+     * @param int $totalTokens 使用的总令牌数量
+     * @param array $completionTokensDetails 完成令牌的详细信息
+     * @param array $promptTokensDetails 提示令牌的详细信息，可能包含：
+     *                                   - cache_write_input_tokens: 写入缓存的令牌数量
+     *                                   - cache_read_input_tokens: 从缓存读取的令牌数量（命中的缓存）
+     *                                   - cached_tokens: 从缓存读取的令牌数量（命中的缓存）
+     */
     public function __construct(
         public int $promptTokens,
         public int $completionTokens,
@@ -58,6 +68,61 @@ public function getPromptTokensDetails(): array
         return $this->promptTokensDetails;
     }
 
+    /**
+     * 获取写入缓存的令牌数量.
+     */
+    public function getCacheWriteInputTokens(): int
+    {
+        return (int) ($this->promptTokensDetails['cache_write_input_tokens'] ?? 0);
+    }
+
+    /**
+     * 获取从缓存读取的令牌数量（命中的缓存）.
+     */
+    public function getCacheReadInputTokens(): int
+    {
+        return (int) ($this->promptTokensDetails['cache_read_input_tokens'] ?? 0);
+    }
+
+    /**
+     * 获取缓存令牌数量（命中的缓存）.
+     */
+    public function getCachedTokens(): int
+    {
+        return (int) ($this->promptTokensDetails['cached_tokens'] ?? 0);
+    }
+
+    /**
+     * 检查是否有缓存命中.
+     */
+    public function hasCacheHit(): bool
+    {
+        return $this->getCacheReadInputTokens() > 0 || $this->getCachedTokens() > 0;
+    }
+
+    /**
+     * 获取缓存命中率（0-1之间的浮点数）
+     * 统一使用Qwen的计算方式：cached_tokens / prompt_tokens.
+     */
+    public function getCacheHitRate(): float
+    {
+        if ($this->promptTokens === 0) {
+            return 0.0;
+        }
+
+        // 统一使用cached_tokens字段（现在Claude和Qwen都使用相同格式）
+        $cachedTokens = $this->getCachedTokens();
+        return round($cachedTokens / $this->promptTokens, 4);
+    }
+
+    /**
+     * 获取缓存命中率的百分比表示（0-100%）.
+     */
+    public function getCacheHitRatePercentage(): float
+    {
+        return round($this->getCacheHitRate() * 100, 2);
+    }
+
     public function toArray(): array
     {
         $data = [
diff --git a/src/Api/Transport/OdinSimpleCurl.php b/src/Api/Transport/OdinSimpleCurl.php
new file mode 100644
index 0000000..45870ea
--- /dev/null
+++ b/src/Api/Transport/OdinSimpleCurl.php
@@ -0,0 +1,152 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Transport;
+
+use GuzzleHttp\Psr7\Response;
+use Hyperf\Odin\Exception\LLMException\Api\LLMInvalidRequestException;
+use Hyperf\Odin\Exception\LLMException\LLMApiException;
+use Hyperf\Odin\Exception\LLMException\LLMConfigurationException;
+use Hyperf\Odin\Exception\LLMException\LLMNetworkException;
+use Hyperf\Odin\Exception\LLMException\Network\LLMConnectionTimeoutException;
+use Hyperf\Odin\Exception\LLMException\Network\LLMReadTimeoutException;
+use Hyperf\Odin\Exception\RuntimeException;
+
+class OdinSimpleCurl
+{
+    public static function send(string $url, array $options, bool $skipContentTypeCheck = false): Response
+    {
+        $options['url'] = $url;
+
+        $stream = @fopen('OdinSimpleCurl://' . json_encode($options), 'r', false);
+
+        if ($stream === false) {
+            $error = error_get_last();
+            throw new LLMNetworkException(
+                'Failed to open SimpleCURL stream: ' . ($error['message'] ?? 'Unknown error')
+            );
+        }
+
+        $metadata = stream_get_meta_data($stream);
+        $wrapper = $metadata['wrapper_data'] ?? null;
+
+        if (! $wrapper instanceof SimpleCURLClient) {
+            fclose($stream);
+            throw new LLMConfigurationException('Invalid stream wrapper: expected SimpleCURLClient instance');
+        }
+
+        $metadataInfo = $wrapper->stream_metadata();
+        $statusCode = $metadataInfo['http_code'] ?? 0;
+        $responseHeaders = $metadataInfo['headers'] ?? [];
+
+        if (isset($metadataInfo['error'])) {
+            fclose($stream);
+            $curlCode = $metadataInfo['error_code'] ?? 0;
+            $errorMessage = $metadataInfo['error'];
+
+            if ($curlCode === 28) {
+                throw new LLMReadTimeoutException(
+                    "Connection timeout: {$errorMessage}",
+                    new RuntimeException($errorMessage, $curlCode)
+                );
+            }
+
+            if (in_array($curlCode, [6, 7, 52, 56])) {
+                throw new LLMNetworkException(
+                    "Network connection error: {$errorMessage}",
+                    $curlCode,
+                    new RuntimeException($errorMessage, $curlCode)
+                );
+            }
+
+            if ($curlCode === 35) {
+                throw new LLMNetworkException(
+                    "SSL/TLS error: {$errorMessage}",
+                    $curlCode,
+                    new RuntimeException($errorMessage, $curlCode)
+                );
+            }
+
+            throw new LLMNetworkException(
+                "HTTP request failed: {$errorMessage} (code: {$curlCode})",
+                $curlCode,
+                new RuntimeException($errorMessage, $curlCode)
+            );
+        }
+
+        if ($statusCode === 0) {
+            fclose($stream);
+            throw new LLMConnectionTimeoutException(
+                'Connection error: No valid HTTP response received from server',
+                new RuntimeException('Invalid HTTP status code: 0')
+            );
+        }
+
+        if ($statusCode >= 400) {
+            $errorBody = stream_get_contents($stream);
+            fclose($stream);
+
+            $errorMessage = "HTTP {$statusCode} error";
+
+            if (! empty($errorBody)) {
+                $errorData = @json_decode($errorBody, true);
+                if (json_last_error() === JSON_ERROR_NONE && isset($errorData['error'])) {
+                    if (is_array($errorData['error'])) {
+                        $errorMessage .= ": {$errorData['error']['message']}";
+                    } else {
+                        $errorMessage .= ": {$errorData['error']}";
+                    }
+                } elseif (! empty($errorBody)) {
+                    $truncatedBody = strlen($errorBody) > 200
+                        ? substr($errorBody, 0, 200) . '...'
+                        : $errorBody;
+                    $errorMessage .= ": {$truncatedBody}";
+                }
+            }
+
+            if ($statusCode >= 500) {
+                throw new LLMApiException(
+                    $errorMessage,
+                    $statusCode,
+                    new RuntimeException($errorMessage, $statusCode),
+                    0,
+                    $statusCode
+                );
+            }
+
+            throw new LLMInvalidRequestException(
+                $errorMessage,
+                new RuntimeException($errorMessage, $statusCode),
+                $statusCode
+            );
+        }
+
+        if (! $skipContentTypeCheck) {
+            $contentType = $responseHeaders['content-type'] ?? '';
+            if (! empty($contentType) && ! str_contains($contentType, 'text/event-stream')) {
+                $body = stream_get_contents($stream);
+                fclose($stream);
+
+                $errorMessage = "Expected 'text/event-stream' response but got '{$contentType}'. Response: "
+                    . (strlen($body) > 200 ? substr($body, 0, 200) . '...' : $body);
+
+                throw new LLMInvalidRequestException(
+                    $errorMessage,
+                    new RuntimeException($errorMessage),
+                    400
+                );
+            }
+        }
+
+        return new Response($statusCode, $responseHeaders, $stream);
+    }
+}
diff --git a/src/Api/Transport/SSEClient.php b/src/Api/Transport/SSEClient.php
index ef4f027..2df5612 100644
--- a/src/Api/Transport/SSEClient.php
+++ b/src/Api/Transport/SSEClient.php
@@ -14,7 +14,6 @@
 
 use Generator;
 use Hyperf\Odin\Exception\InvalidArgumentException;
-use Hyperf\Odin\Exception\RuntimeException;
 use IteratorAggregate;
 use JsonException;
 use Psr\Log\LoggerInterface;
@@ -27,33 +26,19 @@ class SSEClient implements IteratorAggregate
 
     private const BUFFER_SIZE = 8192;
 
-    private const DEFAULT_RETRY = 3000; // 默认重试时间，单位毫秒
-
-    private ?int $timeout = null;
-
-    private ?float $connectionStartTime = null;
+    private const DEFAULT_RETRY = 3000;
 
     private int $retryTimeout = self::DEFAULT_RETRY;
 
     private ?string $lastEventId = null;
 
-    /**
-     * 流式异常检测器.
-     */
     private ?StreamExceptionDetector $exceptionDetector = null;
 
-    /**
-     * 日志记录器.
-     */
-    private ?LoggerInterface $logger = null;
+    private bool $shouldClose = false;
 
-    /**
-     * @param resource $stream
-     */
     public function __construct(
         private $stream,
         private bool $autoClose = true,
-        ?int $timeout = null,
         ?array $timeoutConfig = null,
         ?LoggerInterface $logger = null
     ) {
@@ -61,22 +46,11 @@ public function __construct(
             throw new InvalidArgumentException('Stream must be a resource');
         }
 
-        $this->timeout = $timeout;
-        $this->connectionStartTime = microtime(true);
-        $this->logger = $logger;
-
-        // 如果提供了超时配置，初始化流异常检测器
         if ($timeoutConfig !== null) {
             $this->exceptionDetector = new StreamExceptionDetector($timeoutConfig, $logger);
-            $this->logger?->debug('Stream exception detector initialized', [
-                'timeout_config' => $timeoutConfig,
-            ]);
         }
     }
 
-    /**
-     * 确保流资源在对象销毁时被释放.
-     */
     public function __destruct()
     {
         if ($this->autoClose && is_resource($this->stream)) {
@@ -88,39 +62,23 @@ public function getIterator(): Generator
     {
         try {
             $lastCheckTime = microtime(true);
+            $chunkCounter = 0;
 
-            while (! feof($this->stream)) {
-                // 定期检查超时状态，每1秒检查一次
+            while (! feof($this->stream) && ! $this->shouldClose) {
                 $now = microtime(true);
                 if ($now - $lastCheckTime > 1.0) {
                     $lastCheckTime = $now;
-
-                    // 使用标准超时检查
-                    if ($this->isTimedOut()) {
-                        throw new RuntimeException('Periodic check timeout - Connection exceeds wait time limit');
-                    }
-
-                    // 如果启用了更复杂的超时检测，使用流异常检测器
                     $this->exceptionDetector?->checkTimeout();
                 }
 
                 $chunk = stream_get_line($this->stream, self::BUFFER_SIZE, self::EVENT_END);
 
                 if ($chunk === false) {
-                    // 使用标准超时检查
-                    if ($this->isTimedOut()) {
-                        throw new RuntimeException('Read operation failed timeout - Stream read returned false and exceeded timeout limit');
-                    }
-
-                    // 如果启用了更复杂的超时检测，使用流异常检测器
                     $this->exceptionDetector?->checkTimeout();
-
                     continue;
                 }
-                // 检查流是否仍然有效
-                if (! is_resource($this->stream) || feof($this->stream)) {
-                    break;
-                }
+
+                ++$chunkCounter;
 
                 $eventData = $this->parseEvent($chunk);
                 $event = SSEEvent::fromArray($eventData);
@@ -131,21 +89,30 @@ public function getIterator(): Generator
 
                 if ($event->getRetry() !== null) {
                     $retryInt = (int) $event->getRetry();
-                    // 设置合理的上下限，避免极端值
-                    if ($retryInt > 0 && $retryInt <= 600000) { // 最大10分钟
+                    if ($retryInt > 0 && $retryInt <= 600000) {
                         $this->retryTimeout = $retryInt;
                     }
                 }
 
-                // 如果是注释或空行，则跳过
                 if ($event->isEmpty()) {
                     continue;
                 }
 
-                // 通知流异常检测器已接收到块
-                $this->exceptionDetector?->onChunkReceived();
+                $chunkInfo = [
+                    'event_type' => $event->getEvent(),
+                    'event_id' => $event->getId(),
+                    'data_preview' => is_string($event->getData())
+                        ? substr($event->getData(), 0, 200)
+                        : (is_array($event->getData()) ? json_encode($event->getData()) : 'non-string-data'),
+                    'raw_chunk_size' => strlen($chunk),
+                ];
+                $this->exceptionDetector?->onChunkReceived($chunkInfo);
 
                 yield $event;
+
+                if (! is_resource($this->stream) || feof($this->stream)) {
+                    break;
+                }
             }
         } finally {
             if ($this->autoClose && is_resource($this->stream)) {
@@ -154,31 +121,21 @@ public function getIterator(): Generator
         }
     }
 
-    /**
-     * 获取最后一个事件 ID.
-     */
     public function getLastEventId(): ?string
     {
         return $this->lastEventId;
     }
 
-    /**
-     * 获取重试超时时间（毫秒）.
-     */
     public function getRetryTimeout(): int
     {
         return $this->retryTimeout;
     }
 
-    /**
-     * 解析 SSE 事件.
-     *
-     * SSE 格式规范：
-     * - event: 事件类型
-     * - data: 事件数据
-     * - id: 事件 ID
-     * - retry: 重连等待时间
-     */
+    public function closeEarly(): void
+    {
+        $this->shouldClose = true;
+    }
+
     protected function parseEvent(string $chunk): array
     {
         $result = [
@@ -188,19 +145,14 @@ protected function parseEvent(string $chunk): array
             'retry' => null,
         ];
 
-        // 移除 UTF-8 BOM
         $chunk = preg_replace('/^\xEF\xBB\xBF/', '', $chunk);
-
-        // 按行分割
         $lines = preg_split('/' . self::EOL . '/', $chunk);
 
         foreach ($lines as $line) {
-            // 忽略注释和空行
             if (empty($line) || str_starts_with($line, ':')) {
                 continue;
             }
 
-            // 解析字段
             if (str_contains($line, ':')) {
                 [$field, $value] = explode(':', $line, 2);
                 $value = ltrim($value, ' ');
@@ -218,23 +170,20 @@ protected function parseEvent(string $chunk): array
                     case 'retry':
                         if (is_numeric($value)) {
                             $retry = (int) $value;
-                            if ($retry > 0) {  // 只接受正整数
+                            if ($retry > 0) {
                                 $result['retry'] = $retry;
                             }
                         }
                         break;
                 }
             } else {
-                // 如果行中没有冒号，则视为字段名，值为空
                 if ($line === 'data') {
                     $result['data'] = $result['data'] ? $result['data'] . "\n" : '';
                 }
             }
         }
 
-        // 尝试解析 JSON 数据
         if (! empty($result['data'])) {
-            // 特殊处理 [DONE] 标记，这通常表示流结束
             if ($result['data'] === '[DONE]') {
                 $result['event'] = 'done';
             } else {
@@ -242,28 +191,10 @@ protected function parseEvent(string $chunk): array
                     $jsonData = json_decode($result['data'], true, 512, JSON_THROW_ON_ERROR);
                     $result['data'] = $jsonData;
                 } catch (JsonException $e) {
-                    // 保持原始字符串数据，不进行转换
-                    // 可以选择记录错误，但不影响处理流程
-                    $this->logger?->debug('Failed to parse JSON data in SSE event', [
-                        'error' => $e->getMessage(),
-                        'data' => $result['data'],
-                    ]);
                 }
             }
         }
 
         return $result;
     }
-
-    /**
-     * 检查连接是否超时.
-     */
-    private function isTimedOut(): bool
-    {
-        if ($this->timeout === null || $this->connectionStartTime === null) {
-            return false;
-        }
-
-        return (microtime(true) - $this->connectionStartTime) > $this->timeout;
-    }
 }
diff --git a/src/Api/Transport/SSEEvent.php b/src/Api/Transport/SSEEvent.php
index b9fb6cd..73edff7 100644
--- a/src/Api/Transport/SSEEvent.php
+++ b/src/Api/Transport/SSEEvent.php
@@ -14,34 +14,16 @@
 
 use JsonSerializable;
 
-/**
- * SSE 事件封装类.
- */
 class SSEEvent implements JsonSerializable
 {
-    /**
-     * 事件类型.
-     */
     private string $event;
 
-    /**
-     * 事件数据.
-     */
     private mixed $data;
 
-    /**
-     * 事件 ID.
-     */
     private ?string $id;
 
-    /**
-     * 重连等待时间（毫秒）.
-     */
     private ?int $retry;
 
-    /**
-     * 创建一个新的 SSE 事件.
-     */
     public function __construct(
         mixed $data = '',
         string $event = 'message',
@@ -54,9 +36,6 @@ public function __construct(
         $this->retry = $retry;
     }
 
-    /**
-     * 从数组创建 SSE 事件.
-     */
     public static function fromArray(array $data): self
     {
         return new self(
@@ -67,77 +46,50 @@ public static function fromArray(array $data): self
         );
     }
 
-    /**
-     * 获取事件类型.
-     */
     public function getEvent(): string
     {
         return $this->event;
     }
 
-    /**
-     * 设置事件类型.
-     */
     public function setEvent(string $event): self
     {
         $this->event = $event;
         return $this;
     }
 
-    /**
-     * 获取事件数据.
-     */
     public function getData(): mixed
     {
         return $this->data;
     }
 
-    /**
-     * 设置事件数据.
-     */
     public function setData(mixed $data): self
     {
         $this->data = $data;
         return $this;
     }
 
-    /**
-     * 获取事件 ID.
-     */
     public function getId(): ?string
     {
         return $this->id;
     }
 
-    /**
-     * 设置事件 ID.
-     */
     public function setId(?string $id): self
     {
         $this->id = $id;
         return $this;
     }
 
-    /**
-     * 获取重连等待时间.
-     */
     public function getRetry(): ?int
     {
         return $this->retry;
     }
 
-    /**
-     * 设置重连等待时间.
-     */
     public function setRetry(?int $retry): self
     {
         $this->retry = $retry;
         return $this;
     }
 
-    /**
-     * 转换为数组.
-     */
     public function toArray(): array
     {
         return [
@@ -148,25 +100,16 @@ public function toArray(): array
         ];
     }
 
-    /**
-     * 检查事件是否为空.
-     */
     public function isEmpty(): bool
     {
         return empty($this->data);
     }
 
-    /**
-     * 实现 JsonSerializable 接口.
-     */
     public function jsonSerialize(): array
     {
         return $this->toArray();
     }
 
-    /**
-     * 格式化为 SSE 文本格式.
-     */
     public function format(): string
     {
         $result = '';
@@ -175,14 +118,12 @@ public function format(): string
             $result .= "event: {$this->event}\n";
         }
 
-        // 处理多行数据
         $data = $this->data;
         if (is_array($data) || is_object($data)) {
             $data = json_encode($data, JSON_UNESCAPED_UNICODE);
         }
 
         if (is_string($data)) {
-            // 处理多行数据，每行前面加上 "data: "
             $dataLines = explode("\n", $data);
             foreach ($dataLines as $line) {
                 $result .= "data: {$line}\n";
diff --git a/src/Api/Transport/SimpleCURLClient.php b/src/Api/Transport/SimpleCURLClient.php
new file mode 100644
index 0000000..cf8d95a
--- /dev/null
+++ b/src/Api/Transport/SimpleCURLClient.php
@@ -0,0 +1,393 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Api\Transport;
+
+use CurlHandle;
+use Hyperf\Engine\Channel;
+use Hyperf\Engine\Coroutine;
+use Hyperf\Odin\Exception\LLMException\LLMNetworkException;
+use Hyperf\Odin\Exception\LLMException\Network\LLMConnectionTimeoutException;
+use Hyperf\Odin\Exception\LLMException\Network\LLMReadTimeoutException;
+use Hyperf\Odin\Exception\RuntimeException;
+use Hyperf\Odin\Utils\LogUtil;
+use Throwable;
+
+if (! in_array('OdinSimpleCurl', stream_get_wrappers())) {
+    stream_wrapper_register('OdinSimpleCurl', SimpleCURLClient::class);
+}
+
+class SimpleCURLClient
+{
+    private const MAX_BUFFER_SIZE = 1024 * 1024;
+
+    public $context;
+
+    private CurlHandle $ch;
+
+    private Channel $writeChannel;
+
+    private Channel $headerChannel;
+
+    private string $remaining = '';
+
+    private bool $eof = false;
+
+    private array $options = [];
+
+    private array $responseHeaders = [];
+
+    private int $statusCode = 0;
+
+    private ?string $curlError = null;
+
+    private int $curlErrorCode = 0;
+
+    private bool $headersReceived = false;
+
+    public function __construct()
+    {
+        $this->writeChannel = new Channel(100);
+        $this->headerChannel = new Channel(1);
+    }
+
+    public function __destruct()
+    {
+        $this->stream_close();
+    }
+
+    public function stream_open(string $path, string $mode, int $options, ?string &$opened_path): bool
+    {
+        $optionsStr = substr($path, strlen('OdinSimpleCurl://'));
+        $this->options = json_decode($optionsStr, true);
+
+        $this->ch = curl_init($this->options['url']);
+
+        $headers = [];
+        $hasContentType = false;
+        if (isset($this->options['headers']) && is_array($this->options['headers'])) {
+            foreach ($this->options['headers'] as $key => $value) {
+                $headers[] = $key . ': ' . $value;
+                if (strtolower($key) === 'content-type') {
+                    $hasContentType = true;
+                }
+            }
+        }
+
+        if (! $hasContentType) {
+            $headers[] = 'Content-Type: application/json';
+        }
+
+        if (isset($this->options['body'])) {
+            $postData = $this->options['body'];
+        } elseif (isset($this->options['json'])) {
+            $postData = json_encode($this->options['json']);
+        } else {
+            $postData = '';
+        }
+
+        curl_setopt_array($this->ch, [
+            CURLOPT_POST => 1,
+            CURLOPT_HTTPHEADER => $headers,
+            CURLOPT_BUFFERSIZE => 0,
+            CURLOPT_HEADERFUNCTION => [$this, 'headerFunction'],
+            CURLOPT_WRITEFUNCTION => [$this, 'writeFunction'],
+            CURLOPT_POSTFIELDS => $postData,
+
+            CURLOPT_CONNECTTIMEOUT => $this->options['connect_timeout'] ?? 30,
+            CURLOPT_TIMEOUT => 0,
+            CURLOPT_LOW_SPEED_LIMIT => 1,
+            CURLOPT_LOW_SPEED_TIME => $this->options['stream_chunk'] ?? 120,
+
+            CURLOPT_SSL_VERIFYPEER => $this->options['verify'] ?? true,
+            CURLOPT_SSL_VERIFYHOST => $this->options['verify'] ?? 2,
+        ]);
+
+        if (isset($this->options['proxy'])) {
+            curl_setopt($this->ch, CURLOPT_PROXY, $this->options['proxy']);
+        }
+
+        $curlExecutor = function () {
+            try {
+                $startTime = microtime(true);
+                $result = curl_exec($this->ch);
+                $elapsed = microtime(true) - $startTime;
+
+                if ($result === false) {
+                    $this->curlError = curl_error($this->ch);
+                    $this->curlErrorCode = curl_errno($this->ch);
+
+                    $this->log('curl_exec执行失败', [
+                        'error' => $this->curlError,
+                        'error_code' => $this->curlErrorCode,
+                        'elapsed' => $elapsed,
+                    ]);
+
+                    if (! $this->headersReceived) {
+                        $this->headerChannel->push(false);
+                    }
+                } else {
+                    if (! $this->headersReceived) {
+                        $this->curlError = 'No HTTP response received (headers incomplete)';
+                        $this->curlErrorCode = 0;
+                        $this->log('curl_exec成功但响应头不完整', [
+                            'elapsed' => $elapsed,
+                        ]);
+                        $this->headerChannel->push(false);
+                    }
+                }
+
+                $this->writeChannel->push(null);
+            } catch (Throwable $e) {
+                $this->curlError = $e->getMessage();
+                $this->curlErrorCode = $e->getCode();
+                $this->log('curl_exec协程异常', [
+                    'error' => $e->getMessage(),
+                    'code' => $e->getCode(),
+                    'trace' => $e->getTraceAsString(),
+                ]);
+                if (! $this->headersReceived) {
+                    $this->headerChannel->push(false);
+                }
+                $this->writeChannel->push(null);
+            } finally {
+                if (isset($this->ch)) {
+                    curl_close($this->ch);
+                }
+            }
+        };
+
+        // Check if coroutine is available and create method exists
+        if ($this->isCoroutineAvailable()) {
+            Coroutine::create($curlExecutor);
+        } else {
+            // Execute synchronously in non-coroutine environment
+            call_user_func($curlExecutor);
+        }
+
+        $headerTimeout = $this->options['header_timeout'] ?? 60;
+        $headerReceived = $this->headerChannel->pop($headerTimeout);
+
+        if ($headerReceived === false) {
+            $this->stream_close();
+            if ($this->curlError) {
+                $curlCode = $this->curlErrorCode;
+                $errorMessage = $this->curlError;
+
+                if ($curlCode === 28) {
+                    throw new LLMReadTimeoutException(
+                        "Connection timeout: {$errorMessage}",
+                        new RuntimeException($errorMessage, $curlCode)
+                    );
+                }
+
+                throw new LLMConnectionTimeoutException(
+                    "cURL error ({$curlCode}): {$errorMessage}",
+                    new RuntimeException($errorMessage, $curlCode)
+                );
+            }
+
+            throw new LLMConnectionTimeoutException(
+                "Connection timeout: Failed to receive HTTP headers within {$headerTimeout} seconds",
+                new RuntimeException('Failed to receive HTTP headers within timeout'),
+                (float) $headerTimeout
+            );
+        }
+
+        return true;
+    }
+
+    public function stream_read(int $length): false|string
+    {
+        if ($this->remaining) {
+            $ret = substr($this->remaining, 0, $length);
+            $this->remaining = substr($this->remaining, $length);
+            return $ret;
+        }
+
+        $chunkTimeout = $this->options['stream_chunk'] ?? 120;
+        $startTime = microtime(true);
+        $data = $this->writeChannel->pop(timeout: $chunkTimeout);
+        $elapsed = microtime(true) - $startTime;
+
+        if ($data === false) {
+            $this->log('Channel读取超时', [
+                'requested_length' => $length,
+                'timeout' => $chunkTimeout,
+                'elapsed' => $elapsed,
+                'eof' => $this->eof,
+                'remaining_buffer' => substr($this->remaining, 0, 200),
+            ]);
+            return false;
+        }
+
+        if ($data === null) {
+            $this->eof = true;
+            return '';
+        }
+
+        $dataLength = strlen($data);
+
+        if ($dataLength > self::MAX_BUFFER_SIZE) {
+            $this->log('缓冲区溢出', [
+                'received_length' => $dataLength,
+                'max_buffer_size' => self::MAX_BUFFER_SIZE,
+                'data_preview' => substr($data, 0, 500),
+            ]);
+            throw new LLMNetworkException('Buffer overflow: received chunk larger than MAX_BUFFER_SIZE');
+        }
+
+        $ret = substr($data, 0, $length);
+        $this->remaining = substr($data, $length);
+
+        return $ret;
+    }
+
+    public function stream_eof(): bool
+    {
+        return $this->eof;
+    }
+
+    public function stream_close(): void
+    {
+        if (isset($this->writeChannel)) {
+            $this->writeChannel->close();
+        }
+        if (isset($this->headerChannel)) {
+            $this->headerChannel->close();
+        }
+    }
+
+    public function writeFunction(CurlHandle $ch, $data): int
+    {
+        $dataLength = strlen($data);
+
+        try {
+            $result = $this->writeChannel->push($data, timeout: 60);
+
+            if ($result === false) {
+                $this->curlError = 'Channel push timeout: consumer not reading data';
+                $this->curlErrorCode = CURLE_WRITE_ERROR;
+                $this->log('推送数据到Channel超时', [
+                    'data_length' => $dataLength,
+                    'data_preview' => substr($data, 0, 200),
+                ]);
+                return 0;
+            }
+
+            return $dataLength;
+        } catch (Throwable $e) {
+            $this->curlError = 'Channel push error: ' . $e->getMessage();
+            $this->curlErrorCode = CURLE_WRITE_ERROR;
+            $this->log('推送数据到Channel异常', [
+                'data_length' => $dataLength,
+                'data_preview' => substr($data, 0, 200),
+                'error' => $e->getMessage(),
+                'code' => $e->getCode(),
+            ]);
+            return 0;
+        }
+    }
+
+    public function headerFunction(CurlHandle $ch, $header): int
+    {
+        $len = strlen($header);
+        $trimmed = trim($header);
+
+        if (empty($trimmed)) {
+            $this->statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+
+            if ($this->statusCode > 0) {
+                $this->headersReceived = true;
+                $this->headerChannel->push(true);
+            } else {
+                $this->responseHeaders = [];
+            }
+        } else {
+            $headerParts = explode(':', $header, 2);
+            if (count($headerParts) === 2) {
+                $name = strtolower(trim($headerParts[0]));
+                $value = trim($headerParts[1]);
+                $this->responseHeaders[$name] = $value;
+            }
+        }
+        return $len;
+    }
+
+    public function stream_stat(): array|false
+    {
+        return [
+            'dev' => 0,
+            'ino' => 0,
+            'mode' => 33206,
+            'nlink' => 0,
+            'uid' => 0,
+            'gid' => 0,
+            'rdev' => 0,
+            'size' => 0,
+            'atime' => 0,
+            'mtime' => 0,
+            'ctime' => 0,
+            'blksize' => -1,
+            'blocks' => -1,
+        ];
+    }
+
+    public function stream_metadata(): array
+    {
+        $metadata = [
+            'headers' => $this->responseHeaders,
+            'http_code' => $this->statusCode,
+        ];
+
+        if ($this->curlError) {
+            $metadata['error'] = $this->curlError;
+            $metadata['error_code'] = $this->curlErrorCode;
+        }
+
+        return $metadata;
+    }
+
+    private function log(string $message, array $context = []): void
+    {
+        $logger = LogUtil::getHyperfLogger();
+        if (! $logger) {
+            return;
+        }
+
+        $context['coroutine_id'] = $this->getCurrentCoroutineId();
+        $logger->info('[SimpleCURLClient] ' . $message, $context);
+    }
+
+    /**
+     * Check if coroutine is available.
+     *
+     * @return bool Whether coroutine is available
+     */
+    private function isCoroutineAvailable(): bool
+    {
+        return class_exists(Coroutine::class) && method_exists(Coroutine::class, 'create');
+    }
+
+    /**
+     * Get current coroutine ID.
+     *
+     * @return int Current coroutine ID or -1 if not in coroutine environment
+     */
+    private function getCurrentCoroutineId(): int
+    {
+        if (class_exists(Coroutine::class) && method_exists(Coroutine::class, 'id')) {
+            return Coroutine::id();
+        }
+        return -1;
+    }
+}
diff --git a/src/Api/Transport/StreamExceptionDetector.php b/src/Api/Transport/StreamExceptionDetector.php
index de7f895..4671f2f 100644
--- a/src/Api/Transport/StreamExceptionDetector.php
+++ b/src/Api/Transport/StreamExceptionDetector.php
@@ -16,39 +16,22 @@
 use Hyperf\Odin\Exception\LLMException\Network\LLMThinkingStreamTimeoutException;
 use Psr\Log\LoggerInterface;
 
-/**
- * 流式响应异常检测器.
- */
 class StreamExceptionDetector
 {
-    /**
-     * 初始化时间戳.
-     */
     private float $startTime;
 
-    /**
-     * 上一个块接收时间戳.
-     */
     private float $lastChunkTime;
 
-    /**
-     * 是否已接收第一个块.
-     */
     private bool $firstChunkReceived = false;
 
-    /**
-     * 超时配置.
-     */
     private array $timeoutConfig;
 
-    /**
-     * 日志记录器.
-     */
     private ?LoggerInterface $logger;
 
-    /**
-     * 构造函数.
-     */
+    private ?array $lastChunkInfo = null;
+
+    private int $totalChunksReceived = 0;
+
     public function __construct(array $timeoutConfig, ?LoggerInterface $logger = null)
     {
         $this->startTime = microtime(true);
@@ -57,54 +40,68 @@ public function __construct(array $timeoutConfig, ?LoggerInterface $logger = nul
         $this->logger = $logger;
     }
 
-    /**
-     * 检测超时情况.
-     *
-     * @throws LLMStreamTimeoutException 流式响应超时
-     * @throws LLMThinkingStreamTimeoutException 思考阶段超时
-     */
     public function checkTimeout(): void
     {
         $now = microtime(true);
         $elapsedTotal = $now - $this->startTime;
 
-        // 检查总体超时
         if ($elapsedTotal > $this->timeoutConfig['total']) {
-            $this->logger?->warning('Stream total timeout detected', [
+            $debugInfo = [
                 'elapsed' => $elapsedTotal,
                 'timeout' => $this->timeoutConfig['total'],
-            ]);
+                'total_chunks_received' => $this->totalChunksReceived,
+                'time_since_last_chunk' => $this->firstChunkReceived ? $now - $this->lastChunkTime : null,
+                'last_chunk_info' => $this->lastChunkInfo,
+            ];
+
+            $this->logger?->warning('检测到流式响应总体超时', $debugInfo);
+
+            $message = sprintf('流式响应总体超时，已经等待 %.2f 秒', $elapsedTotal);
+
             throw new LLMStreamTimeoutException(
-                sprintf('流式响应总体超时，已经等待 %.2f 秒', $elapsedTotal),
+                $message,
                 null,
                 'total',
                 $elapsedTotal
             );
         }
 
-        // 如果尚未收到第一个块，检查思考超时
         if (! $this->firstChunkReceived) {
             if ($elapsedTotal > $this->timeoutConfig['stream_first']) {
-                $this->logger?->warning('Stream first chunk timeout detected', [
+                $debugInfo = [
                     'elapsed' => $elapsedTotal,
                     'timeout' => $this->timeoutConfig['stream_first'],
-                ]);
+                    'total_chunks_received' => $this->totalChunksReceived,
+                    'waiting_for_first_chunk' => true,
+                ];
+
+                $this->logger?->warning('检测到等待首个流式响应块超时', $debugInfo);
+
+                $message = sprintf('等待首个流式响应块超时，已经等待 %.2f 秒', $elapsedTotal);
+
                 throw new LLMThinkingStreamTimeoutException(
-                    sprintf('等待首个流式响应块超时，已经等待 %.2f 秒', $elapsedTotal),
+                    $message,
                     null,
                     $elapsedTotal
                 );
             }
         } else {
-            // 如果已收到第一个块，检查块间超时
             $elapsedSinceLastChunk = $now - $this->lastChunkTime;
             if ($elapsedSinceLastChunk > $this->timeoutConfig['stream_chunk']) {
-                $this->logger?->warning('Stream chunk interval timeout detected', [
+                $debugInfo = [
                     'elapsed_since_last' => $elapsedSinceLastChunk,
                     'timeout' => $this->timeoutConfig['stream_chunk'],
-                ]);
+                    'total_chunks_received' => $this->totalChunksReceived,
+                    'total_elapsed_time' => $now - $this->startTime,
+                    'last_chunk_info' => $this->lastChunkInfo,
+                ];
+
+                $this->logger?->warning('检测到流式响应块间隔超时', $debugInfo);
+
+                $message = sprintf('流式响应块间超时，已经等待 %.2f 秒', $elapsedSinceLastChunk);
+
                 throw new LLMStreamTimeoutException(
-                    sprintf('流式响应块间超时，已经等待 %.2f 秒', $elapsedSinceLastChunk),
+                    $message,
                     null,
                     'chunk_interval',
                     $elapsedSinceLastChunk
@@ -113,28 +110,27 @@ public function checkTimeout(): void
         }
     }
 
-    /**
-     * 接收到块后调用此方法更新时间戳.
-     */
-    public function onChunkReceived(): void
+    public function onChunkReceived(array $chunkInfo = []): void
     {
         $this->lastChunkTime = microtime(true);
+        ++$this->totalChunksReceived;
+
+        $this->lastChunkInfo = [
+            'chunk_number' => $this->totalChunksReceived,
+            'timestamp' => $this->lastChunkTime,
+            'time_since_start' => $this->lastChunkTime - $this->startTime,
+            'chunk_data' => $chunkInfo,
+        ];
+
         if (! $this->firstChunkReceived) {
             $this->firstChunkReceived = true;
-            $initialResponseTime = $this->lastChunkTime - $this->startTime;
-            $this->logger?->debug('First chunk received', [
-                'initial_response_time' => $initialResponseTime,
-            ]);
         }
     }
 
-    /**
-     * 规范化超时配置，设置默认值.
-     */
     private function normalizeTimeoutConfig(array $config): array
     {
         return [
-            'total' => $config['total'] ?? 300.0,
+            'total' => $config['stream_total'] ?? $config['total'] ?? 600.0,
             'stream_first' => $config['stream_first'] ?? 60.0,
             'stream_chunk' => $config['stream_chunk'] ?? 30.0,
         ];
diff --git a/src/ConfigProvider.php b/src/ConfigProvider.php
index ae7fbeb..49265ec 100644
--- a/src/ConfigProvider.php
+++ b/src/ConfigProvider.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin;
 
+use Hyperf\Odin\Event\EventCallbackListener;
 use Hyperf\Odin\VectorStore\Qdrant\Qdrant;
 use Hyperf\Odin\VectorStore\Qdrant\QdrantFactory;
 
@@ -31,6 +32,9 @@ public function __invoke(): array
             'dependencies' => [
                 Qdrant::class => QdrantFactory::class,
             ],
+            'listeners' => [
+                EventCallbackListener::class,
+            ],
         ];
     }
 }
diff --git a/src/Event/AfterChatCompletionsEvent.php b/src/Event/AfterChatCompletionsEvent.php
index 96c68c7..8d8bf8c 100644
--- a/src/Event/AfterChatCompletionsEvent.php
+++ b/src/Event/AfterChatCompletionsEvent.php
@@ -23,6 +23,11 @@ class AfterChatCompletionsEvent
 
     public float $duration;
 
+    /**
+     * @var callable[]
+     */
+    private array $callbacks = [];
+
     public function __construct(
         ChatCompletionRequest $completionRequest,
         ?ChatCompletionResponse $completionResponse,
@@ -33,6 +38,29 @@ public function __construct(
         $this->duration = $duration;
     }
 
+    /**
+     * 添加回调函数.
+     */
+    public function addCallback(callable $callback): void
+    {
+        $this->callbacks[] = $callback;
+    }
+
+    /**
+     * 获取所有回调函数.
+     *
+     * @return callable[]
+     */
+    public function getCallbacks(): array
+    {
+        return $this->callbacks;
+    }
+
+    public function clearCallbacks(): void
+    {
+        $this->callbacks = [];
+    }
+
     public function getCompletionRequest(): ChatCompletionRequest
     {
         return $this->completionRequest;
diff --git a/src/Event/EventCallbackListener.php b/src/Event/EventCallbackListener.php
new file mode 100644
index 0000000..1eb8950
--- /dev/null
+++ b/src/Event/EventCallbackListener.php
@@ -0,0 +1,70 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Event;
+
+use Hyperf\Event\Annotation\Listener;
+use Hyperf\Event\Contract\ListenerInterface;
+use Psr\Container\ContainerInterface;
+use Psr\Log\LoggerInterface;
+use Throwable;
+
+/**
+ * 事件回调监听器.
+ * 监听请求完成事件，执行事件中注册的回调函数.
+ * 支持所有提供商的功能扩展（缓存、统计等）.
+ */
+#[Listener(priority: 1000)]
+class EventCallbackListener implements ListenerInterface
+{
+    protected LoggerInterface $logger;
+
+    public function __construct(protected ContainerInterface $container)
+    {
+        $this->logger = $this->container->get(LoggerInterface::class);
+    }
+
+    public function listen(): array
+    {
+        return [
+            AfterChatCompletionsEvent::class,
+            AfterChatCompletionsStreamEvent::class,
+        ];
+    }
+
+    public function process(object $event): void
+    {
+        if ($event instanceof AfterChatCompletionsEvent) {
+            $this->handleCallbacks($event);
+        }
+    }
+
+    /**
+     * 执行事件中注册的回调函数.
+     */
+    public function handleCallbacks(AfterChatCompletionsEvent $event): void
+    {
+        // 执行事件中注册的回调函数
+        foreach ($event->getCallbacks() as $callback) {
+            try {
+                $callback($event);
+            } catch (Throwable $e) {
+                $this->logger->error('Event callback execution failed: ' . $e->getMessage(), [
+                    'exception' => $e,
+                ]);
+                continue;
+            }
+        }
+        // 清理
+        $event->clearCallbacks();
+    }
+}
diff --git a/src/Exception/LLMException/Api/LLMInvalidRequestException.php b/src/Exception/LLMException/Api/LLMInvalidRequestException.php
index 2acb9d4..8a0b8b2 100644
--- a/src/Exception/LLMException/Api/LLMInvalidRequestException.php
+++ b/src/Exception/LLMException/Api/LLMInvalidRequestException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Api;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMApiException;
 use Throwable;
 
@@ -30,23 +31,28 @@ class LLMInvalidRequestException extends LLMApiException
      */
     protected ?array $invalidFields = null;
 
+    /**
+     * 服务商返回的原始错误信息.
+     */
+    protected ?array $providerErrorDetails = null;
+
     /**
      * 创建一个新的无效请求异常实例.
      */
     public function __construct(
-        string $message = '无效的API请求',
+        string $message = ErrorMessage::INVALID_REQUEST,
         ?Throwable $previous = null,
         ?int $statusCode = 400,
-        ?array $invalidFields = null
+        ?array $invalidFields = null,
+        ?array $providerErrorDetails = null
     ) {
         $this->invalidFields = $invalidFields;
+        $this->providerErrorDetails = $providerErrorDetails;
 
-        if (! empty($invalidFields)) {
-            $fieldsStr = implode(', ', array_keys($invalidFields));
-            $message = sprintf('%s，问题字段: %s', $message, $fieldsStr);
-        }
+        // 构建详细的错误消息
+        $detailedMessage = $this->buildDetailedMessage($message, $invalidFields, $providerErrorDetails);
 
-        parent::__construct($message, self::ERROR_CODE, $previous, 0, $statusCode);
+        parent::__construct($detailedMessage, self::ERROR_CODE, $previous, 0, $statusCode);
     }
 
     /**
@@ -56,4 +62,49 @@ public function getInvalidFields(): ?array
     {
         return $this->invalidFields;
     }
+
+    /**
+     * 获取服务商返回的原始错误详情.
+     */
+    public function getProviderErrorDetails(): ?array
+    {
+        return $this->providerErrorDetails;
+    }
+
+    /**
+     * 构建详细的错误消息.
+     */
+    private function buildDetailedMessage(string $baseMessage, ?array $invalidFields, ?array $providerErrorDetails): string
+    {
+        $message = $baseMessage;
+
+        // 如果有问题字段，添加到消息中
+        if (! empty($invalidFields)) {
+            $fieldsStr = implode(', ', array_keys($invalidFields));
+            $message = sprintf('%s, invalid fields: %s', $message, $fieldsStr);
+        }
+
+        // 如果有服务商详细错误信息，添加到消息中
+        if (! empty($providerErrorDetails)) {
+            $providerDetails = [];
+
+            if (isset($providerErrorDetails['code'])) {
+                $providerDetails[] = sprintf('code: %s', $providerErrorDetails['code']);
+            }
+
+            if (isset($providerErrorDetails['message'])) {
+                $providerDetails[] = sprintf('message: %s', $providerErrorDetails['message']);
+            }
+
+            if (isset($providerErrorDetails['type'])) {
+                $providerDetails[] = sprintf('type: %s', $providerErrorDetails['type']);
+            }
+
+            if (! empty($providerDetails)) {
+                $message .= ', error details: [' . implode(', ', $providerDetails) . ']';
+            }
+        }
+
+        return $message;
+    }
 }
diff --git a/src/Exception/LLMException/Api/LLMRateLimitException.php b/src/Exception/LLMException/Api/LLMRateLimitException.php
index 8b8ab7e..0ae2b5a 100644
--- a/src/Exception/LLMException/Api/LLMRateLimitException.php
+++ b/src/Exception/LLMException/Api/LLMRateLimitException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Api;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMApiException;
 use Throwable;
 
@@ -34,7 +35,7 @@ class LLMRateLimitException extends LLMApiException
      * 创建一个新的速率限制异常实例.
      */
     public function __construct(
-        string $message = 'API请求频率超出限制',
+        string $message = ErrorMessage::RATE_LIMIT,
         ?Throwable $previous = null,
         ?int $statusCode = 429,
         ?int $retryAfter = null
@@ -42,7 +43,7 @@ public function __construct(
         $this->retryAfter = $retryAfter;
 
         if ($retryAfter !== null) {
-            $message = sprintf('%s，建议 %d 秒后重试', $message, $retryAfter);
+            $message = sprintf('%s, retry after %d seconds', $message, $retryAfter);
         }
 
         parent::__construct($message, self::ERROR_CODE, $previous, 0, $statusCode);
diff --git a/src/Exception/LLMException/Configuration/LLMInvalidApiKeyException.php b/src/Exception/LLMException/Configuration/LLMInvalidApiKeyException.php
index 92c9bf0..544ed82 100644
--- a/src/Exception/LLMException/Configuration/LLMInvalidApiKeyException.php
+++ b/src/Exception/LLMException/Configuration/LLMInvalidApiKeyException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Configuration;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMConfigurationException;
 use Throwable;
 
@@ -28,7 +29,7 @@ class LLMInvalidApiKeyException extends LLMConfigurationException
     /**
      * 创建一个新的无效API密钥异常实例.
      */
-    public function __construct(string $message = '无效的API密钥或API密钥缺失', ?Throwable $previous = null, string $provider = '')
+    public function __construct(string $message = ErrorMessage::INVALID_API_KEY, ?Throwable $previous = null, string $provider = '')
     {
         $message = $provider ? sprintf('[%s] %s', $provider, $message) : $message;
         parent::__construct($message, self::ERROR_CODE, $previous, 0, 401);
diff --git a/src/Exception/LLMException/Configuration/LLMInvalidEndpointException.php b/src/Exception/LLMException/Configuration/LLMInvalidEndpointException.php
index 61df0ee..dba49dd 100644
--- a/src/Exception/LLMException/Configuration/LLMInvalidEndpointException.php
+++ b/src/Exception/LLMException/Configuration/LLMInvalidEndpointException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Configuration;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMConfigurationException;
 use Throwable;
 
@@ -33,7 +34,7 @@ class LLMInvalidEndpointException extends LLMConfigurationException
     /**
      * 创建一个新的无效终端点异常实例.
      */
-    public function __construct(string $message = '无效的API终端点URL', ?Throwable $previous = null, ?string $endpoint = null)
+    public function __construct(string $message = ErrorMessage::INVALID_ENDPOINT, ?Throwable $previous = null, ?string $endpoint = null, int $statusCode = 400)
     {
         $this->endpoint = $endpoint;
 
@@ -41,7 +42,7 @@ public function __construct(string $message = '无效的API终端点URL', ?Throw
             $message = sprintf('%s: %s', $message, $endpoint);
         }
 
-        parent::__construct($message, self::ERROR_CODE, $previous);
+        parent::__construct($message, self::ERROR_CODE, $previous, 0, $statusCode);
     }
 
     /**
diff --git a/src/Exception/LLMException/ErrorCode.php b/src/Exception/LLMException/ErrorCode.php
index 82e404b..002b1ad 100644
--- a/src/Exception/LLMException/ErrorCode.php
+++ b/src/Exception/LLMException/ErrorCode.php
@@ -13,12 +13,12 @@
 namespace Hyperf\Odin\Exception\LLMException;
 
 /**
- * LLM错误码定义.
+ * LLM error code definitions.
  */
 class ErrorCode
 {
     /**
-     * 错误类型基数.
+     * Error type base values.
      */
     public const CONFIG_ERROR_BASE = 1000;
 
@@ -85,78 +85,78 @@ class ErrorCode
     public const MODEL_EMBEDDING_INPUT_TOO_LARGE = self::MODEL_ERROR_BASE + 7;
 
     /**
-     * 错误码映射表.
+     * Error code mapping table.
      */
     public static function getErrorMessages(): array
     {
         return [
-            // 配置错误
-            self::CONFIG_INVALID_API_KEY => '无效的API密钥或API密钥缺失',
-            self::CONFIG_INVALID_ENDPOINT => '无效的API终端点URL',
-            self::CONFIG_INVALID_MODEL => '无效的模型名称或模型不可用',
-            self::CONFIG_INVALID_PARAMETER => '无效的配置参数',
-
-            // 网络错误
-            self::NETWORK_CONNECTION_TIMEOUT => '连接LLM服务超时',
-            self::NETWORK_READ_TIMEOUT => '从LLM服务读取响应超时',
-            self::NETWORK_WRITE_TIMEOUT => '向LLM服务发送请求超时',
-            self::NETWORK_CONNECTION_ERROR => '连接LLM服务失败',
-            self::NETWORK_SSL_ERROR => 'SSL/TLS连接错误',
-
-            // API错误
-            self::API_RATE_LIMIT => 'API请求频率超出限制',
-            self::API_INVALID_REQUEST => '无效的API请求',
-            self::API_SERVER_ERROR => 'LLM服务端错误',
-            self::API_AUTHENTICATION_ERROR => 'API认证失败',
-            self::API_PERMISSION_DENIED => 'API权限不足',
-            self::API_QUOTA_EXCEEDED => 'API配额已用尽',
-
-            // 模型错误
-            self::MODEL_CONTENT_FILTER => '内容被系统安全过滤',
-            self::MODEL_CONTEXT_LENGTH => '上下文长度超出模型限制',
-            self::MODEL_FUNCTION_CALL_NOT_SUPPORTED => '模型不支持函数调用功能',
-            self::MODEL_MULTI_MODAL_NOT_SUPPORTED => '模型不支持多模态输入',
-            self::MODEL_EMBEDDING_NOT_SUPPORTED => '模型不支持嵌入向量生成',
-            self::MODEL_IMAGE_URL_ACCESS_ERROR => '多模态图片URL不可访问',
-            self::MODEL_EMBEDDING_INPUT_TOO_LARGE => '嵌入请求输入内容过大，超出模型处理限制',
+            // Configuration errors
+            self::CONFIG_INVALID_API_KEY => ErrorMessage::INVALID_API_KEY,
+            self::CONFIG_INVALID_ENDPOINT => ErrorMessage::INVALID_ENDPOINT,
+            self::CONFIG_INVALID_MODEL => ErrorMessage::INVALID_MODEL,
+            self::CONFIG_INVALID_PARAMETER => ErrorMessage::INVALID_PARAMETER,
+
+            // Network errors
+            self::NETWORK_CONNECTION_TIMEOUT => ErrorMessage::CONNECTION_TIMEOUT,
+            self::NETWORK_READ_TIMEOUT => ErrorMessage::READ_TIMEOUT,
+            self::NETWORK_WRITE_TIMEOUT => ErrorMessage::WRITE_TIMEOUT,
+            self::NETWORK_CONNECTION_ERROR => ErrorMessage::CONNECTION_ERROR,
+            self::NETWORK_SSL_ERROR => ErrorMessage::SSL_ERROR,
+
+            // API errors
+            self::API_RATE_LIMIT => ErrorMessage::RATE_LIMIT,
+            self::API_INVALID_REQUEST => ErrorMessage::INVALID_REQUEST,
+            self::API_SERVER_ERROR => ErrorMessage::SERVER_ERROR,
+            self::API_AUTHENTICATION_ERROR => ErrorMessage::AUTHENTICATION_ERROR,
+            self::API_PERMISSION_DENIED => ErrorMessage::PERMISSION_DENIED,
+            self::API_QUOTA_EXCEEDED => ErrorMessage::QUOTA_EXCEEDED,
+
+            // Model errors
+            self::MODEL_CONTENT_FILTER => ErrorMessage::CONTENT_FILTER,
+            self::MODEL_CONTEXT_LENGTH => ErrorMessage::CONTEXT_LENGTH,
+            self::MODEL_FUNCTION_CALL_NOT_SUPPORTED => ErrorMessage::FUNCTION_NOT_SUPPORTED,
+            self::MODEL_MULTI_MODAL_NOT_SUPPORTED => ErrorMessage::MULTIMODAL_NOT_SUPPORTED,
+            self::MODEL_EMBEDDING_NOT_SUPPORTED => ErrorMessage::EMBEDDING_NOT_SUPPORTED,
+            self::MODEL_IMAGE_URL_ACCESS_ERROR => ErrorMessage::IMAGE_URL_ACCESS,
+            self::MODEL_EMBEDDING_INPUT_TOO_LARGE => ErrorMessage::EMBEDDING_INPUT_TOO_LARGE,
         ];
     }
 
     /**
-     * 获取错误提示消息.
+     * Get error message.
      */
     public static function getMessage(int $code): string
     {
         $messages = self::getErrorMessages();
-        return $messages[$code] ?? '未知错误';
+        return $messages[$code] ?? ErrorMessage::UNKNOWN_ERROR;
     }
 
     /**
-     * 获取错误建议.
+     * Get error suggestion.
      */
     public static function getSuggestion(int $code): string
     {
         $suggestions = [
-            // 配置错误建议
-            self::CONFIG_INVALID_API_KEY => '请检查API密钥是否正确配置，或联系服务提供商获取有效的API密钥',
-            self::CONFIG_INVALID_ENDPOINT => '请检查API终端点URL是否正确，确保包含协议前缀(http/https)',
-            self::CONFIG_INVALID_MODEL => '请检查模型名称是否正确，或查询可用的模型列表',
-
-            // 网络错误建议
-            self::NETWORK_CONNECTION_TIMEOUT => '请检查网络连接或增加连接超时时间，稍后重试',
-            self::NETWORK_READ_TIMEOUT => '请增加读取超时时间或减少请求复杂度，稍后重试',
-
-            // API错误建议
-            self::API_RATE_LIMIT => '请降低请求频率，实现请求节流或等待后重试',
-            self::API_QUOTA_EXCEEDED => '请检查账户额度或升级账户计划',
-
-            // 模型错误建议
-            self::MODEL_CONTEXT_LENGTH => '请减少输入内容长度，或使用支持更长上下文的模型',
-            self::MODEL_FUNCTION_CALL_NOT_SUPPORTED => '请选择支持函数调用功能的模型',
-            self::MODEL_MULTI_MODAL_NOT_SUPPORTED => '请选择支持多模态输入的模型',
-            self::MODEL_IMAGE_URL_ACCESS_ERROR => '请检查图片URL是否正确、可公开访问，并确保图片格式受支持',
+            // Configuration error suggestions
+            self::CONFIG_INVALID_API_KEY => 'Please check your API key configuration or contact the service provider for a valid API key',
+            self::CONFIG_INVALID_ENDPOINT => 'Please verify the API endpoint URL is correct and includes the protocol prefix (http/https)',
+            self::CONFIG_INVALID_MODEL => 'Please verify the model name is correct or check the list of available models',
+
+            // Network error suggestions
+            self::NETWORK_CONNECTION_TIMEOUT => 'Please check your network connection or increase the connection timeout, then retry',
+            self::NETWORK_READ_TIMEOUT => 'Please increase the read timeout or reduce request complexity, then retry',
+
+            // API error suggestions
+            self::API_RATE_LIMIT => 'Please reduce request frequency, implement rate limiting, or wait before retrying',
+            self::API_QUOTA_EXCEEDED => 'Please check your account quota or upgrade your account plan',
+
+            // Model error suggestions
+            self::MODEL_CONTEXT_LENGTH => 'Please reduce input length or use a model that supports longer context',
+            self::MODEL_FUNCTION_CALL_NOT_SUPPORTED => 'Please select a model that supports function calling',
+            self::MODEL_MULTI_MODAL_NOT_SUPPORTED => 'Please select a model that supports multimodal input',
+            self::MODEL_IMAGE_URL_ACCESS_ERROR => 'Please verify the image URL is correct, publicly accessible, and in a supported format',
         ];
 
-        return $suggestions[$code] ?? '请检查输入参数和配置，如问题持续存在请联系技术支持';
+        return $suggestions[$code] ?? 'Please check input parameters and configuration. If the issue persists, contact technical support';
     }
 }
diff --git a/src/Exception/LLMException/ErrorMapping.php b/src/Exception/LLMException/ErrorMapping.php
index c2d2949..c81a2d5 100644
--- a/src/Exception/LLMException/ErrorMapping.php
+++ b/src/Exception/LLMException/ErrorMapping.php
@@ -44,9 +44,9 @@ class ErrorMapping
     public static function getDefaultMapping(): array
     {
         return [
-            // 连接超时异常
+            // Connection timeout exception
             ConnectException::class => [
-                // 连接超时异常
+                // Connection timeout exception
                 [
                     'regex' => '/timeout|timed\s+out/i',
                     'factory' => function (Throwable $e) {
@@ -54,31 +54,32 @@ public static function getDefaultMapping(): array
                         // 尝试从消息中提取超时时间
                         preg_match('/(\d+(?:\.\d+)?)\s*s/i', $message, $matches);
                         $timeout = isset($matches[1]) ? (float) $matches[1] : null;
-                        return new LLMConnectionTimeoutException('连接LLM服务超时', $e, $timeout);
+                        $statusCode = ($e instanceof RequestException && $e->getResponse()) ? $e->getResponse()->getStatusCode() : 408;
+                        return new LLMConnectionTimeoutException(ErrorMessage::CONNECTION_TIMEOUT, $e, $timeout, $statusCode);
                     },
                 ],
-                // 无法解析主机名异常
+                // Unable to resolve hostname exception
                 [
                     'regex' => '/Could not resolve host/i',
                     'factory' => function (Throwable $e) {
                         $message = $e->getMessage();
                         // 尝试从消息中提取主机名
                         preg_match('/Could not resolve host: ([^\s\(\)]+)/i', $message, $matches);
-                        $hostname = $matches[1] ?? '未知主机';
+                        $hostname = $matches[1] ?? 'unknown host';
                         return new LLMNetworkException(
-                            sprintf('无法解析LLM服务域名: %s', $hostname),
+                            sprintf('%s: %s', ErrorMessage::RESOLVE_HOST_ERROR, $hostname),
                             4,
                             $e,
                             ErrorCode::NETWORK_CONNECTION_ERROR
                         );
                     },
                 ],
-                // 默认网络连接异常处理
+                // Default network connection exception handling
                 [
                     'default' => true,
                     'factory' => function (Throwable $e) {
                         return new LLMNetworkException(
-                            sprintf('LLM网络连接错误: %s', $e->getMessage()),
+                            sprintf('%s: %s', ErrorMessage::NETWORK_CONNECTION_ERROR, $e->getMessage()),
                             4,
                             $e,
                             ErrorCode::NETWORK_CONNECTION_ERROR
@@ -87,34 +88,74 @@ public static function getDefaultMapping(): array
                 ],
             ],
 
-            // 请求异常
+            // Request exception
             RequestException::class => [
-                // API密钥无效
+                // Invalid API key (supports both English and Chinese)
                 [
-                    'regex' => '/invalid.+api.+key|api.+key.+invalid|authentication|unauthorized/i',
+                    'regex' => '/invalid.+api.+key|api.+key.+invalid|authentication|unauthorized|invalid.+missing.+api.+key|API密钥无效/i',
                     'status' => [401, 403],
                     'factory' => function (RequestException $e) {
                         $provider = '';
+                        $message = ErrorMessage::INVALID_API_KEY;
+
                         if ($e->getRequest()->getUri()->getHost()) {
                             $provider = $e->getRequest()->getUri()->getHost();
                         }
-                        return new LLMInvalidApiKeyException('API密钥无效或已过期', $e, $provider);
+
+                        // Extract message from response body
+                        if ($e->getResponse()) {
+                            $response = $e->getResponse();
+                            $body = $response->getBody();
+                            if ($body->isSeekable()) {
+                                $body->rewind();
+                            }
+                            $responseBody = (string) $body;
+                            $data = json_decode($responseBody, true);
+                            if (is_array($data)) {
+                                if (isset($data['error']['message'])) {
+                                    $message = $data['error']['message'];
+                                } elseif (isset($data['message'])) {
+                                    $message = $data['message'];
+                                }
+                            }
+                        }
+
+                        return new LLMInvalidApiKeyException($message, $e, $provider);
                     },
                 ],
-                // 速率限制
+                // Rate limit (supports both English and Chinese)
                 [
-                    'regex' => '/rate\s+limit|too\s+many\s+requests/i',
+                    'regex' => '/rate\s+limit|too\s+many\s+requests|API请求频率超出限制|rate.+limit.+exceeded/i',
                     'status' => [429],
                     'factory' => function (RequestException $e) {
                         $retryAfter = null;
+                        $message = ErrorMessage::RATE_LIMIT;
+
                         if ($e->getResponse()) {
                             $retryAfter = $e->getResponse()->getHeaderLine('Retry-After');
                             $retryAfter = $retryAfter ? (int) $retryAfter : null;
+
+                            // Extract message from response body
+                            $response = $e->getResponse();
+                            $body = $response->getBody();
+                            if ($body->isSeekable()) {
+                                $body->rewind();
+                            }
+                            $responseBody = (string) $body;
+                            $data = json_decode($responseBody, true);
+                            if (is_array($data)) {
+                                if (isset($data['error']['message'])) {
+                                    $message = $data['error']['message'];
+                                } elseif (isset($data['message'])) {
+                                    $message = $data['message'];
+                                }
+                            }
                         }
-                        return new LLMRateLimitException('API请求频率超出限制', $e, 429, $retryAfter);
+
+                        return new LLMRateLimitException($message, $e, 429, $retryAfter);
                     },
                 ],
-                // Azure OpenAI 模型内容过滤错误
+                // Azure OpenAI model content filter error
                 [
                     'regex' => '/model\s+produced\s+invalid\s+content|model_error/i',
                     'status' => [500],
@@ -132,20 +173,20 @@ public static function getDefaultMapping(): array
                             if (isset($data['error'])) {
                                 $errorType = $data['error']['type'] ?? 'model_error';
                                 if (isset($data['error']['message']) && str_contains($data['error']['message'], 'modifying your prompt')) {
-                                    $suggestion = '建议修改您的提示词内容';
+                                    $suggestion = 'Please modify your prompt content';
                                 }
                             }
                         }
 
-                        $message = '模型生成了无效内容';
+                        $message = ErrorMessage::MODEL_INVALID_CONTENT;
                         if ($suggestion) {
-                            $message .= '，' . $suggestion;
+                            $message .= ', ' . $suggestion;
                         }
 
                         return new LLMContentFilterException($message, $e, null, [$errorType], $statusCode);
                     },
                 ],
-                // 嵌入输入过大错误
+                // Embedding input too large error
                 [
                     'regex' => '/input\s+is\s+too\s+large|input\s+too\s+large|input\s+size\s+exceeds|batch\s+size\s+too\s+large|increase.+batch.+size/i',
                     'status' => [400, 413, 500],
@@ -190,9 +231,9 @@ public static function getDefaultMapping(): array
                             }
                         }
 
-                        $message = '嵌入请求输入内容过大，超出模型处理限制';
+                        $message = ErrorMessage::EMBEDDING_INPUT_TOO_LARGE;
                         if ($model) {
-                            $message .= "（模型：{$model}）";
+                            $message .= " (model: {$model})";
                         }
 
                         return new LLMEmbeddingInputTooLargeException(
@@ -205,14 +246,14 @@ public static function getDefaultMapping(): array
                         );
                     },
                 ],
-                // Azure OpenAI 服务端内部错误 (可重试的网络错误)
+                // Azure OpenAI server internal error (retryable network error)
                 [
                     'regex' => '/server\s+had\s+an\s+error|server_error/i',
                     'status' => [500, 502, 503, 504],
                     'factory' => function (RequestException $e) {
                         $statusCode = $e->getResponse() ? $e->getResponse()->getStatusCode() : 500;
                         return new LLMNetworkException(
-                            'Azure OpenAI 服务暂时不可用，建议稍后重试',
+                            ErrorMessage::AZURE_UNAVAILABLE,
                             4,
                             $e,
                             ErrorCode::NETWORK_CONNECTION_ERROR,
@@ -220,41 +261,93 @@ public static function getDefaultMapping(): array
                         );
                     },
                 ],
-                // 内容过滤
+                // Content filter (supports both English and Chinese)
                 [
-                    'regex' => '/content\s+filter|content\s+policy|inappropriate|unsafe content|violate|policy/i',
+                    'regex' => '/content\s+filter|content\s+policy|inappropriate|unsafe content|violate|policy|内容被系统安全过滤|filtered.+safety.+system/i',
                     'factory' => function (RequestException $e) {
                         $labels = null;
+                        $message = ErrorMessage::CONTENT_FILTER;
+
                         if ($e->getResponse()) {
                             $response = $e->getResponse();
                             $response->getBody()->rewind(); // 重置流位置
                             $body = $response->getBody()->getContents();
                             $data = json_decode($body, true);
-                            if (isset($data['error']['content_filter_results'])) {
-                                $labels = array_keys($data['error']['content_filter_results']);
+
+                            // Extract message from response
+                            if (is_array($data)) {
+                                if (isset($data['error']['message'])) {
+                                    $message = $data['error']['message'];
+                                } elseif (isset($data['message'])) {
+                                    $message = $data['message'];
+                                }
+
+                                // Extract content filter labels if available
+                                if (isset($data['error']['content_filter_results'])) {
+                                    $labels = array_keys($data['error']['content_filter_results']);
+                                }
                             }
                         }
+
                         $statusCode = $e->getResponse() ? $e->getResponse()->getStatusCode() : 400;
-                        return new LLMContentFilterException('内容被系统安全过滤', $e, null, $labels, $statusCode);
+                        return new LLMContentFilterException($message, $e, null, $labels, $statusCode);
                     },
                 ],
-                // 上下文长度超出限制
+                // Context length exceeded (supports both English and Chinese)
                 [
-                    'regex' => '/context\s+length|token\s+limit|maximum\s+context\s+length/i',
+                    'regex' => '/context\s+length|token\s+limit|maximum\s+context\s+length|input\s+is\s+too\s+long|input\s+too\s+long|上下文长度超出模型限制|context.+exceeds.+limit|exceeds.+model.+limit/i',
                     'factory' => function (RequestException $e) {
                         $currentLength = null;
                         $maxLength = null;
-                        // 尝试从消息中提取长度信息
-                        $message = $e->getMessage();
-                        preg_match('/(\d+)\s*\/\s*(\d+)/i', $message, $matches);
-                        if (isset($matches[1], $matches[2])) {
+                        $statusCode = $e->getResponse() ? $e->getResponse()->getStatusCode() : 400;
+                        $message = null;
+
+                        // Try to extract message from response body for proxy scenarios
+                        if ($e->getResponse()) {
+                            $response = $e->getResponse();
+                            $body = $response->getBody();
+                            if ($body->isSeekable()) {
+                                $body->rewind();
+                            }
+                            $responseBody = (string) $body;
+                            $decodedBody = json_decode($responseBody, true);
+                            if (is_array($decodedBody)) {
+                                // Support both formats:
+                                // 1. {"error": {"message": "...", "code": 4002}}
+                                // 2. {"code": 4017, "message": "..."}
+                                if (isset($decodedBody['error']['message'])) {
+                                    $message = $decodedBody['error']['message'];
+                                } elseif (isset($decodedBody['message'])) {
+                                    $message = $decodedBody['message'];
+                                }
+                            }
+                        }
+
+                        // Fallback to exception message
+                        if (! $message) {
+                            $message = $e->getMessage();
+                        }
+
+                        // Try to extract length information from message
+                        // Support multiple formats:
+                        // 1. "8000 / 4096" or "8000/4096"
+                        // 2. "current length: 8000, max limit: 4096"
+                        // 3. "当前长度: 8000，最大限制: 4096" (Chinese, legacy support)
+                        if (preg_match('/(\d+)\s*\/\s*(\d+)/i', $message, $matches)) {
+                            $currentLength = (int) $matches[1];
+                            $maxLength = (int) $matches[2];
+                        } elseif (preg_match('/当前长度[：:]\s*(\d+).*最大限制[：:]\s*(\d+)/i', $message, $matches)) {
+                            $currentLength = (int) $matches[1];
+                            $maxLength = (int) $matches[2];
+                        } elseif (preg_match('/current\s+length[：:]\s*(\d+).*max\s+limit[：:]\s*(\d+)/i', $message, $matches)) {
                             $currentLength = (int) $matches[1];
                             $maxLength = (int) $matches[2];
                         }
-                        return new LLMContextLengthException('上下文长度超出模型限制', $e, null, $currentLength, $maxLength);
+
+                        return new LLMContextLengthException($message ?: ErrorMessage::CONTEXT_LENGTH, $e, null, $currentLength, $maxLength, $statusCode);
                     },
                 ],
-                // 多模态图片URL不可访问
+                // Multimodal image URL not accessible (supports both English and Chinese)
                 [
                     'regex' => '/image\s+url\s+is\s+not\s+accessible|invalid\s+image\s+url|image\s+could\s+not\s+be\s+accessed/i',
                     'factory' => function (RequestException $e) {
@@ -277,52 +370,90 @@ public static function getDefaultMapping(): array
                                 }
                             }
                         }
-                        return new LLMImageUrlAccessException('多模态图片URL不可访问', $e, null, $imageUrl);
+                        $statusCode = $e->getResponse() ? $e->getResponse()->getStatusCode() : 400;
+                        return new LLMImageUrlAccessException(ErrorMessage::IMAGE_URL_ACCESS, $e, null, $imageUrl, $statusCode);
                     },
                 ],
-                // 无效请求 (更精确的匹配，避免误匹配模型错误)
+                // Invalid request (more precise matching to avoid model error mismatch)
                 [
                     'regex' => '/invalid\s+(request|parameter|api|endpoint)|bad\s+request|malformed/i',
                     'status' => [400],
                     'factory' => function (RequestException $e) {
                         $invalidFields = null;
+                        $providerErrorDetails = null;
+
                         if ($e->getResponse()) {
                             $response = $e->getResponse();
                             $response->getBody()->rewind(); // 重置流位置
                             $body = $response->getBody()->getContents();
                             $data = json_decode($body, true);
+
+                            // 提取无效字段信息（保持原有逻辑）
                             if (isset($data['error']['param'])) {
                                 $invalidFields = [$data['error']['param'] => $data['error']['message'] ?? '无效参数'];
                             }
+
+                            // 提取完整的服务商错误详情
+                            if (isset($data['error']) && is_array($data['error'])) {
+                                $providerErrorDetails = [];
+
+                                // 提取错误码
+                                if (isset($data['error']['code'])) {
+                                    $providerErrorDetails['code'] = $data['error']['code'];
+                                }
+
+                                // 提取错误消息
+                                if (isset($data['error']['message'])) {
+                                    $providerErrorDetails['message'] = $data['error']['message'];
+                                }
+
+                                // 提取错误类型
+                                if (isset($data['error']['type'])) {
+                                    $providerErrorDetails['type'] = $data['error']['type'];
+                                }
+
+                                // 提取参数字段
+                                if (isset($data['error']['param'])) {
+                                    $providerErrorDetails['param'] = $data['error']['param'];
+                                }
+
+                                // 如果有其他字段，也一并保存
+                                foreach ($data['error'] as $key => $value) {
+                                    if (! in_array($key, ['code', 'message', 'type', 'param']) && is_scalar($value)) {
+                                        $providerErrorDetails[$key] = $value;
+                                    }
+                                }
+                            }
                         }
-                        return new LLMInvalidRequestException('无效的API请求', $e, 400, $invalidFields);
+
+                        return new LLMInvalidRequestException(ErrorMessage::INVALID_REQUEST, $e, 400, $invalidFields, $providerErrorDetails);
                     },
                 ],
-                // 默认异常处理
+                // Default exception handling
                 [
                     'default' => true,
                     'factory' => function (RequestException $e) {
                         if ($e->getResponse()) {
                             $statusCode = $e->getResponse()->getStatusCode();
-                            // 根据状态码分类
+                            // Classify by status code
                             if ($statusCode >= 500) {
-                                return new LLMApiException('LLM服务端错误: ' . $e->getMessage(), 3, $e, ErrorCode::API_SERVER_ERROR, $statusCode);
+                                return new LLMApiException(ErrorMessage::SERVER_ERROR . ': ' . $e->getMessage(), 3, $e, ErrorCode::API_SERVER_ERROR, $statusCode);
                             }
                             if ($statusCode >= 400) {
-                                return new LLMApiException('LLM客户端请求错误: ' . $e->getMessage(), 2, $e, ErrorCode::API_INVALID_REQUEST, $statusCode);
+                                return new LLMApiException(ErrorMessage::CLIENT_ERROR . ': ' . $e->getMessage(), 2, $e, ErrorCode::API_INVALID_REQUEST, $statusCode);
                             }
-                            // 其他状态码仍然当作网络异常，但记录状态码
-                            return new LLMNetworkException('LLM网络请求错误: ' . $e->getMessage(), 4, $e, ErrorCode::NETWORK_CONNECTION_ERROR, $statusCode);
+                            // Other status codes are still treated as network exceptions, but record the status code
+                            return new LLMNetworkException(ErrorMessage::NETWORK_REQUEST_ERROR . ': ' . $e->getMessage(), 4, $e, ErrorCode::NETWORK_CONNECTION_ERROR, $statusCode);
                         }
-                        return new LLMNetworkException('LLM网络请求错误: ' . $e->getMessage(), 4, $e, ErrorCode::NETWORK_CONNECTION_ERROR);
+                        return new LLMNetworkException(ErrorMessage::NETWORK_REQUEST_ERROR . ': ' . $e->getMessage(), 4, $e, ErrorCode::NETWORK_CONNECTION_ERROR, 500);
                     },
                 ],
             ],
 
-            // 默认异常处理
+            // Default exception handling
             'default' => [
                 'factory' => function (Throwable $e) {
-                    return new LLMException('LLM调用错误: ' . $e->getMessage(), 0, $e);
+                    return new LLMException(ErrorMessage::LLM_INVOCATION_ERROR . ': ' . $e->getMessage(), 0, $e);
                 },
             ],
         ];
diff --git a/src/Exception/LLMException/ErrorMappingManager.php b/src/Exception/LLMException/ErrorMappingManager.php
index fb2303f..526348f 100644
--- a/src/Exception/LLMException/ErrorMappingManager.php
+++ b/src/Exception/LLMException/ErrorMappingManager.php
@@ -178,9 +178,31 @@ protected function matchesPattern(Throwable $exception, array $handler): bool
             // 对于RequestException，也检查响应体内容
             if ($exception instanceof RequestException && $exception->getResponse()) {
                 $response = $exception->getResponse();
-                $response->getBody()->rewind(); // 重置流位置
-                $responseBody = (string) $response->getBody();
-                $message .= ' ' . $responseBody; // 将响应体内容加入匹配文本中
+                $body = $response->getBody();
+
+                // Check if the stream is seekable before attempting to rewind
+                if ($body->isSeekable()) {
+                    $body->rewind(); // 重置流位置
+                }
+
+                $responseBody = (string) $body;
+
+                // Try to parse JSON response and extract the message field for matching
+                // This is important for proxy scenarios where downstream Odin services return structured errors
+                $decodedBody = json_decode($responseBody, true);
+                if (is_array($decodedBody)) {
+                    // Extract message from common error response structures
+                    if (isset($decodedBody['message'])) {
+                        // Direct message field: {"code": 4017, "message": "上下文长度超出模型限制"}
+                        $message .= ' ' . $decodedBody['message'];
+                    } elseif (isset($decodedBody['error']['message'])) {
+                        // Nested message field: {"error": {"code": "...", "message": "..."}}
+                        $message .= ' ' . $decodedBody['error']['message'];
+                    }
+                }
+
+                // Also include the full response body for fallback matching
+                $message .= ' ' . $responseBody;
             }
 
             if (! preg_match($handler['regex'], $message)) {
diff --git a/src/Exception/LLMException/ErrorMessage.php b/src/Exception/LLMException/ErrorMessage.php
new file mode 100644
index 0000000..52f3e14
--- /dev/null
+++ b/src/Exception/LLMException/ErrorMessage.php
@@ -0,0 +1,108 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Exception\LLMException;
+
+/**
+ * LLM error message constants.
+ *
+ * Centralized management of all error messages for better maintainability.
+ */
+class ErrorMessage
+{
+    /**
+     * Configuration error messages.
+     */
+    public const INVALID_API_KEY = 'Invalid or missing API key';
+
+    public const INVALID_ENDPOINT = 'Invalid API endpoint URL';
+
+    public const INVALID_MODEL = 'Invalid model name or model unavailable';
+
+    public const INVALID_PARAMETER = 'Invalid configuration parameter';
+
+    /**
+     * Network error messages.
+     */
+    public const CONNECTION_TIMEOUT = 'Connection to LLM service timed out';
+
+    public const READ_TIMEOUT = 'Reading response from LLM service timed out';
+
+    public const WRITE_TIMEOUT = 'Sending request to LLM service timed out';
+
+    public const CONNECTION_ERROR = 'Failed to connect to LLM service';
+
+    public const SSL_ERROR = 'SSL/TLS connection error';
+
+    public const NETWORK_REQUEST_ERROR = 'LLM network request error';
+
+    public const NETWORK_CONNECTION_ERROR = 'LLM network connection error';
+
+    public const RESOLVE_HOST_ERROR = 'Unable to resolve LLM service hostname';
+
+    /**
+     * API error messages.
+     */
+    public const RATE_LIMIT = 'API rate limit exceeded';
+
+    public const INVALID_REQUEST = 'Invalid API request';
+
+    public const SERVER_ERROR = 'LLM service error';
+
+    public const CLIENT_ERROR = 'LLM client request error';
+
+    public const AUTHENTICATION_ERROR = 'API authentication failed';
+
+    public const PERMISSION_DENIED = 'API permission denied';
+
+    public const QUOTA_EXCEEDED = 'API quota exceeded';
+
+    /**
+     * Model error messages.
+     */
+    public const CONTENT_FILTER = 'Content filtered by safety system';
+
+    public const CONTEXT_LENGTH = 'Context length exceeds model limit';
+
+    public const FUNCTION_NOT_SUPPORTED = 'Model does not support function calling';
+
+    public const MULTIMODAL_NOT_SUPPORTED = 'Model does not support multimodal input';
+
+    public const EMBEDDING_NOT_SUPPORTED = 'Model does not support embedding generation';
+
+    public const IMAGE_URL_ACCESS = 'Multimodal image URL is not accessible';
+
+    public const EMBEDDING_INPUT_TOO_LARGE = 'Embedding input exceeds model processing limit';
+
+    public const UNSUPPORTED_IMAGE_FORMAT = 'Unsupported image format';
+
+    public const MODEL_INVALID_CONTENT = 'Model produced invalid content';
+
+    /**
+     * Stream error messages.
+     */
+    public const STREAM_TIMEOUT = 'Stream response timed out';
+
+    public const FIRST_CHUNK_TIMEOUT = 'Waiting for first stream chunk timed out';
+
+    /**
+     * Azure specific messages.
+     */
+    public const AZURE_UNAVAILABLE = 'Azure OpenAI service temporarily unavailable, please retry later';
+
+    /**
+     * Generic messages.
+     */
+    public const UNKNOWN_ERROR = 'Unknown error';
+
+    public const LLM_INVOCATION_ERROR = 'LLM invocation error';
+}
diff --git a/src/Exception/LLMException/LLMErrorHandler.php b/src/Exception/LLMException/LLMErrorHandler.php
index 2cb8e9c..c2582c3 100644
--- a/src/Exception/LLMException/LLMErrorHandler.php
+++ b/src/Exception/LLMException/LLMErrorHandler.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException;
 
+use GuzzleHttp\Exception\RequestException;
 use Hyperf\Odin\Exception\LLMException;
 use Psr\Log\LoggerInterface;
 use Psr\Log\LogLevel;
@@ -62,12 +63,15 @@ public function __construct(?LoggerInterface $logger = null, array $customMappin
     public function handle(Throwable $exception, array $context = []): LLMException
     {
         try {
+            // 主动提取Guzzle RequestException的响应头信息
+            $enrichedContext = $this->enrichContextWithResponseInfo($exception, $context);
+
             // 将异常映射为标准的LLM异常
-            $llmException = $this->errorMappingManager->mapException($exception, $context);
+            $llmException = $this->errorMappingManager->mapException($exception, $enrichedContext);
 
             // 记录错误信息
             if ($this->logErrors) {
-                $this->logError($llmException, $context);
+                $this->logError($llmException, $enrichedContext);
             }
 
             return $llmException;
@@ -274,10 +278,17 @@ protected function filterSensitiveInfo(array $context): array
         $sensitiveKeys = ['api_key', 'api-key', 'apiKey', 'password', 'secret', 'token', 'authorization'];
 
         foreach ($context as $key => $value) {
+            // 对于数字索引，直接处理值
             if (! is_string($key)) {
+                if (is_array($value)) {
+                    $filtered[$key] = $this->filterSensitiveInfo($value);
+                } else {
+                    $filtered[$key] = $value;
+                }
                 continue;
             }
-            // 检查是否为敏感信息
+
+            // 检查是否为敏感信息（只针对字符串键）
             $isSensitive = false;
             foreach ($sensitiveKeys as $sensitiveKey) {
                 if (stripos($key, $sensitiveKey) !== false) {
@@ -299,4 +310,61 @@ protected function filterSensitiveInfo(array $context): array
 
         return $filtered;
     }
+
+    /**
+     * 从异常中提取响应信息并丰富上下文.
+     *
+     * @param Throwable $exception 原始异常
+     * @param array $context 原始上下文
+     * @return array 丰富后的上下文
+     */
+    protected function enrichContextWithResponseInfo(Throwable $exception, array $context): array
+    {
+        $previous = $exception->getPrevious();
+        // 如果是Guzzle的RequestException且有响应对象，提取响应信息
+        if ($previous instanceof RequestException && $previous->getResponse()) {
+            $response = $previous->getResponse();
+
+            // 提取响应头
+            $context['response_headers'] = $response->getHeaders();
+            $context['response_status_code'] = $response->getStatusCode();
+            $context['response_reason_phrase'] = $response->getReasonPhrase();
+
+            // 提取响应体（如果有且不是流）
+            try {
+                $body = $response->getBody();
+                if ($body->isSeekable()) {
+                    $body->rewind();
+                }
+                $responseContent = $body->getContents();
+
+                // 如果响应体不为空且较小（避免记录过大的响应体）
+                if (! empty($responseContent) && strlen($responseContent) < 10240) {
+                    $context['response_body'] = $responseContent;
+                }
+
+                // 重新设置流位置，以便后续处理
+                if ($body->isSeekable()) {
+                    $body->rewind();
+                }
+            } catch (Throwable $e) {
+                // 如果无法读取响应体，记录但不影响主流程
+                $this->logger?->debug('无法读取响应体内容', [
+                    'error' => $e->getMessage(),
+                    'status_code' => $response->getStatusCode(),
+                ]);
+            }
+
+            // 记录HTTP错误响应信息到日志
+            $this->logger?->info('HTTPErrorResponseInfo', [
+                'status_code' => $response->getStatusCode(),
+                'reason_phrase' => $response->getReasonPhrase(),
+                'headers' => $response->getHeaders(),
+                'has_body' => isset($context['response_body']),
+                'content' => $context['response_body'] ?? null,
+            ]);
+        }
+
+        return $context;
+    }
 }
diff --git a/src/Exception/LLMException/Model/LLMContentFilterException.php b/src/Exception/LLMException/Model/LLMContentFilterException.php
index ee6233d..613fb9d 100644
--- a/src/Exception/LLMException/Model/LLMContentFilterException.php
+++ b/src/Exception/LLMException/Model/LLMContentFilterException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Model;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMModelException;
 use Throwable;
 
@@ -34,7 +35,7 @@ class LLMContentFilterException extends LLMModelException
      * 创建一个新的内容过滤异常实例.
      */
     public function __construct(
-        string $message = '内容被系统安全过滤',
+        string $message = ErrorMessage::CONTENT_FILTER,
         ?Throwable $previous = null,
         ?string $model = null,
         ?array $contentLabels = null,
@@ -44,7 +45,7 @@ public function __construct(
 
         if (! empty($contentLabels)) {
             $labelsStr = implode(', ', $contentLabels);
-            $message = sprintf('%s，过滤原因: %s', $message, $labelsStr);
+            $message = sprintf('%s, reasons: %s', $message, $labelsStr);
         }
 
         parent::__construct($message, self::ERROR_CODE, $previous, 0, $model, $statusCode);
diff --git a/src/Exception/LLMException/Model/LLMContextLengthException.php b/src/Exception/LLMException/Model/LLMContextLengthException.php
index 325f633..aafd418 100644
--- a/src/Exception/LLMException/Model/LLMContextLengthException.php
+++ b/src/Exception/LLMException/Model/LLMContextLengthException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Model;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMModelException;
 use Throwable;
 
@@ -36,23 +37,24 @@ class LLMContextLengthException extends LLMModelException
     protected ?int $maxLength = null;
 
     /**
-     * 创建一个新的上下文长度超出限制异常实例.
+     * Create a new context length exception instance.
      */
     public function __construct(
-        string $message = '上下文长度超出模型限制',
+        string $message = ErrorMessage::CONTEXT_LENGTH,
         ?Throwable $previous = null,
         ?string $model = null,
         ?int $currentLength = null,
-        ?int $maxLength = null
+        ?int $maxLength = null,
+        int $statusCode = 400
     ) {
         $this->currentLength = $currentLength;
         $this->maxLength = $maxLength;
 
         if ($currentLength !== null && $maxLength !== null) {
-            $message = sprintf('%s，当前长度: %d，最大限制: %d', $message, $currentLength, $maxLength);
+            $message = sprintf('%s, current length: %d, max limit: %d', $message, $currentLength, $maxLength);
         }
 
-        parent::__construct($message, self::ERROR_CODE, $previous, 0, $model);
+        parent::__construct($message, self::ERROR_CODE, $previous, 0, $model, $statusCode);
     }
 
     /**
diff --git a/src/Exception/LLMException/Model/LLMEmbeddingInputTooLargeException.php b/src/Exception/LLMException/Model/LLMEmbeddingInputTooLargeException.php
index e77638a..33b72ed 100644
--- a/src/Exception/LLMException/Model/LLMEmbeddingInputTooLargeException.php
+++ b/src/Exception/LLMException/Model/LLMEmbeddingInputTooLargeException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Model;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMModelException;
 use Throwable;
 
@@ -35,7 +36,7 @@ class LLMEmbeddingInputTooLargeException extends LLMModelException
      * @param int $statusCode HTTP状态码
      */
     public function __construct(
-        string $message = '嵌入请求输入内容过大',
+        string $message = ErrorMessage::EMBEDDING_INPUT_TOO_LARGE,
         ?Throwable $previous = null,
         ?string $model = null,
         ?int $inputLength = null,
@@ -70,19 +71,19 @@ public function getMaxInputLength(): ?int
     public function getSuggestion(): string
     {
         $suggestions = [
-            '建议将输入文本分割成较小的块进行处理',
-            '可以使用 TextSplitter 工具进行文本分割',
-            '考虑移除不必要的多媒体内容或格式标记',
+            'Consider splitting the input text into smaller chunks for processing',
+            'You can use a TextSplitter tool to split the text',
+            'Consider removing unnecessary multimedia content or formatting tags',
         ];
 
         if ($this->inputLength && $this->maxInputLength) {
             array_unshift($suggestions, sprintf(
-                '当前输入长度: %d，最大限制: %d',
+                'Current input length: %d, max limit: %d',
                 $this->inputLength,
                 $this->maxInputLength
             ));
         }
 
-        return implode('；', $suggestions);
+        return implode('; ', $suggestions);
     }
 }
diff --git a/src/Exception/LLMException/Model/LLMEmbeddingNotSupportedException.php b/src/Exception/LLMException/Model/LLMEmbeddingNotSupportedException.php
index 288f444..1d834ac 100644
--- a/src/Exception/LLMException/Model/LLMEmbeddingNotSupportedException.php
+++ b/src/Exception/LLMException/Model/LLMEmbeddingNotSupportedException.php
@@ -13,6 +13,7 @@
 namespace Hyperf\Odin\Exception\LLMException\Model;
 
 use Hyperf\Odin\Exception\LLMException;
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Throwable;
 
 /**
@@ -33,7 +34,7 @@ class LLMEmbeddingNotSupportedException extends LLMException
      * @param string $model 模型名称
      */
     public function __construct(
-        string $message = '模型不支持嵌入功能',
+        string $message = ErrorMessage::EMBEDDING_NOT_SUPPORTED,
         ?Throwable $previous = null,
         protected string $model = ''
     ) {
diff --git a/src/Exception/LLMException/Model/LLMFunctionCallNotSupportedException.php b/src/Exception/LLMException/Model/LLMFunctionCallNotSupportedException.php
index b181b24..eab5bb9 100644
--- a/src/Exception/LLMException/Model/LLMFunctionCallNotSupportedException.php
+++ b/src/Exception/LLMException/Model/LLMFunctionCallNotSupportedException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Model;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMModelException;
 use Throwable;
 
@@ -28,7 +29,7 @@ class LLMFunctionCallNotSupportedException extends LLMModelException
     /**
      * 创建一个新的函数调用不支持异常实例.
      */
-    public function __construct(string $message = '模型不支持函数调用功能', ?Throwable $previous = null, ?string $model = null)
+    public function __construct(string $message = ErrorMessage::FUNCTION_NOT_SUPPORTED, ?Throwable $previous = null, ?string $model = null)
     {
         parent::__construct($message, self::ERROR_CODE, $previous, 0, $model, 400);
     }
diff --git a/src/Exception/LLMException/Model/LLMImageUrlAccessException.php b/src/Exception/LLMException/Model/LLMImageUrlAccessException.php
index b4cb700..59fdb7d 100644
--- a/src/Exception/LLMException/Model/LLMImageUrlAccessException.php
+++ b/src/Exception/LLMException/Model/LLMImageUrlAccessException.php
@@ -13,6 +13,7 @@
 namespace Hyperf\Odin\Exception\LLMException\Model;
 
 use Hyperf\Odin\Exception\LLMException\ErrorCode;
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMModelException;
 use Throwable;
 
@@ -35,18 +36,19 @@ class LLMImageUrlAccessException extends LLMModelException
      * 创建一个新的图片URL不可访问异常实例.
      */
     public function __construct(
-        string $message = '多模态图片URL不可访问',
+        string $message = ErrorMessage::IMAGE_URL_ACCESS,
         ?Throwable $previous = null,
         ?string $model = null,
-        ?string $imageUrl = null
+        ?string $imageUrl = null,
+        int $statusCode = 400
     ) {
         $this->imageUrl = $imageUrl;
 
         if (! empty($imageUrl)) {
-            $message = sprintf('%s，图片URL: %s', $message, $imageUrl);
+            $message = sprintf('%s, image URL: %s', $message, $imageUrl);
         }
 
-        parent::__construct($message, self::ERROR_CODE, $previous, ErrorCode::MODEL_IMAGE_URL_ACCESS_ERROR, $model);
+        parent::__construct($message, self::ERROR_CODE, $previous, ErrorCode::MODEL_IMAGE_URL_ACCESS_ERROR, $model, $statusCode);
     }
 
     /**
diff --git a/src/Exception/LLMException/Model/LLMModalityNotSupportedException.php b/src/Exception/LLMException/Model/LLMModalityNotSupportedException.php
index 18432f1..eac967c 100644
--- a/src/Exception/LLMException/Model/LLMModalityNotSupportedException.php
+++ b/src/Exception/LLMException/Model/LLMModalityNotSupportedException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Model;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMModelException;
 use Throwable;
 
@@ -28,7 +29,7 @@ class LLMModalityNotSupportedException extends LLMModelException
     /**
      * 创建一个新的多模态不支持异常实例.
      */
-    public function __construct(string $message = '模型不支持多模态输入', ?Throwable $previous = null, ?string $model = null)
+    public function __construct(string $message = ErrorMessage::MULTIMODAL_NOT_SUPPORTED, ?Throwable $previous = null, ?string $model = null)
     {
         parent::__construct($message, self::ERROR_CODE, $previous, 0, $model, 400);
     }
diff --git a/src/Exception/LLMException/Model/LLMUnsupportedImageFormatException.php b/src/Exception/LLMException/Model/LLMUnsupportedImageFormatException.php
new file mode 100644
index 0000000..7b9b316
--- /dev/null
+++ b/src/Exception/LLMException/Model/LLMUnsupportedImageFormatException.php
@@ -0,0 +1,94 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Exception\LLMException\Model;
+
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
+use Hyperf\Odin\Exception\LLMException\LLMModelException;
+use Throwable;
+
+/**
+ * Exception thrown when an unsupported image format is used in vision requests.
+ *
+ * 当在视觉理解请求中使用不支持的图片格式时抛出的异常。
+ */
+class LLMUnsupportedImageFormatException extends LLMModelException
+{
+    /**
+     * 错误码，基于模型错误基数.
+     */
+    private const ERROR_CODE = 12;
+
+    /**
+     * The unsupported file extension.
+     */
+    protected ?string $fileExtension = null;
+
+    /**
+     * The image URL that caused the error.
+     */
+    protected ?string $imageUrl = null;
+
+    /**
+     * The unsupported content type.
+     */
+    protected ?string $contentType = null;
+
+    /**
+     * Create a new unsupported image format exception.
+     *
+     * @param string $message Exception message
+     * @param null|Throwable $previous Previous exception
+     * @param null|string $fileExtension The unsupported file extension
+     * @param null|string $imageUrl The image URL that caused the error
+     * @param null|string $contentType The unsupported content type
+     * @param int $statusCode HTTP status code
+     */
+    public function __construct(
+        string $message = ErrorMessage::UNSUPPORTED_IMAGE_FORMAT,
+        ?Throwable $previous = null,
+        ?string $fileExtension = null,
+        ?string $imageUrl = null,
+        ?string $contentType = null,
+        int $statusCode = 400
+    ) {
+        $this->fileExtension = $fileExtension;
+        $this->imageUrl = $imageUrl;
+        $this->contentType = $contentType;
+
+        parent::__construct($message, self::ERROR_CODE, $previous, 0, null, $statusCode);
+    }
+
+    /**
+     * Get the unsupported file extension.
+     */
+    public function getFileExtension(): ?string
+    {
+        return $this->fileExtension;
+    }
+
+    /**
+     * Get the image URL that caused the error.
+     */
+    public function getImageUrl(): ?string
+    {
+        return $this->imageUrl;
+    }
+
+    /**
+     * Get the unsupported content type.
+     */
+    public function getContentType(): ?string
+    {
+        return $this->contentType;
+    }
+}
diff --git a/src/Exception/LLMException/Network/LLMConnectionTimeoutException.php b/src/Exception/LLMException/Network/LLMConnectionTimeoutException.php
index d7311de..304e6c4 100644
--- a/src/Exception/LLMException/Network/LLMConnectionTimeoutException.php
+++ b/src/Exception/LLMException/Network/LLMConnectionTimeoutException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Network;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMNetworkException;
 use Throwable;
 
@@ -33,15 +34,15 @@ class LLMConnectionTimeoutException extends LLMNetworkException
     /**
      * 创建一个新的连接超时异常实例.
      */
-    public function __construct(string $message = '连接LLM服务超时', ?Throwable $previous = null, ?float $timeoutSeconds = null)
+    public function __construct(string $message = ErrorMessage::CONNECTION_TIMEOUT, ?Throwable $previous = null, ?float $timeoutSeconds = null, int $statusCode = 408)
     {
         $this->timeoutSeconds = $timeoutSeconds;
 
         if ($timeoutSeconds !== null) {
-            $message = sprintf('%s，超时时间: %.2f秒', $message, $timeoutSeconds);
+            $message = sprintf('%s, timeout: %.2f seconds', $message, $timeoutSeconds);
         }
 
-        parent::__construct($message, self::ERROR_CODE, $previous);
+        parent::__construct($message, self::ERROR_CODE, $previous, 0, $statusCode);
     }
 
     /**
diff --git a/src/Exception/LLMException/Network/LLMReadTimeoutException.php b/src/Exception/LLMException/Network/LLMReadTimeoutException.php
index 18bbe28..ead950c 100644
--- a/src/Exception/LLMException/Network/LLMReadTimeoutException.php
+++ b/src/Exception/LLMException/Network/LLMReadTimeoutException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Network;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMNetworkException;
 use Throwable;
 
@@ -33,15 +34,15 @@ class LLMReadTimeoutException extends LLMNetworkException
     /**
      * 创建一个新的读取超时异常实例.
      */
-    public function __construct(string $message = '从LLM服务读取响应超时', ?Throwable $previous = null, ?float $timeoutSeconds = null)
+    public function __construct(string $message = ErrorMessage::READ_TIMEOUT, ?Throwable $previous = null, ?float $timeoutSeconds = null, int $statusCode = 408)
     {
         $this->timeoutSeconds = $timeoutSeconds;
 
         if ($timeoutSeconds !== null) {
-            $message = sprintf('%s，超时时间: %.2f秒', $message, $timeoutSeconds);
+            $message = sprintf('%s, timeout: %.2f seconds', $message, $timeoutSeconds);
         }
 
-        parent::__construct($message, self::ERROR_CODE, $previous);
+        parent::__construct($message, self::ERROR_CODE, $previous, 0, $statusCode);
     }
 
     /**
diff --git a/src/Exception/LLMException/Network/LLMStreamTimeoutException.php b/src/Exception/LLMException/Network/LLMStreamTimeoutException.php
index db39833..62158b9 100644
--- a/src/Exception/LLMException/Network/LLMStreamTimeoutException.php
+++ b/src/Exception/LLMException/Network/LLMStreamTimeoutException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Network;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Hyperf\Odin\Exception\LLMException\LLMNetworkException;
 use Throwable;
 
@@ -34,20 +35,21 @@ class LLMStreamTimeoutException extends LLMNetworkException
      * 创建一个新的流式响应超时异常实例.
      */
     public function __construct(
-        string $message = '流式响应超时',
+        string $message = ErrorMessage::STREAM_TIMEOUT,
         ?Throwable $previous = null,
         string $timeoutType = 'total',
-        ?float $timeoutSeconds = null
+        ?float $timeoutSeconds = null,
+        int $statusCode = 408
     ) {
         $this->timeoutType = $timeoutType;
 
         if ($timeoutSeconds !== null) {
-            $message = sprintf('%s，超时类型: %s，已等待: %.2f秒', $message, $timeoutType, $timeoutSeconds);
+            $message = sprintf('%s, timeout type: %s, waited: %.2f seconds', $message, $timeoutType, $timeoutSeconds);
         } else {
-            $message = sprintf('%s，超时类型: %s', $message, $timeoutType);
+            $message = sprintf('%s, timeout type: %s', $message, $timeoutType);
         }
 
-        parent::__construct($message, self::ERROR_CODE, $previous);
+        parent::__construct($message, self::ERROR_CODE, $previous, 0, $statusCode);
     }
 
     /**
diff --git a/src/Exception/LLMException/Network/LLMThinkingStreamTimeoutException.php b/src/Exception/LLMException/Network/LLMThinkingStreamTimeoutException.php
index 4926322..57b4c3f 100644
--- a/src/Exception/LLMException/Network/LLMThinkingStreamTimeoutException.php
+++ b/src/Exception/LLMException/Network/LLMThinkingStreamTimeoutException.php
@@ -12,6 +12,7 @@
 
 namespace Hyperf\Odin\Exception\LLMException\Network;
 
+use Hyperf\Odin\Exception\LLMException\ErrorMessage;
 use Throwable;
 
 /**
@@ -23,10 +24,11 @@ class LLMThinkingStreamTimeoutException extends LLMStreamTimeoutException
      * 创建一个新的思考阶段流式响应超时异常实例.
      */
     public function __construct(
-        string $message = '等待首个流式响应块超时',
+        string $message = ErrorMessage::FIRST_CHUNK_TIMEOUT,
         ?Throwable $previous = null,
-        ?float $timeoutSeconds = null
+        ?float $timeoutSeconds = null,
+        int $statusCode = 408
     ) {
-        parent::__construct($message, $previous, 'initial_response', $timeoutSeconds);
+        parent::__construct($message, $previous, 'initial_response', $timeoutSeconds, $statusCode);
     }
 }
diff --git a/src/Factory/ClientFactory.php b/src/Factory/ClientFactory.php
index a78ad7d..b200b9f 100644
--- a/src/Factory/ClientFactory.php
+++ b/src/Factory/ClientFactory.php
@@ -18,6 +18,12 @@
 use Hyperf\Odin\Api\Providers\AwsBedrock\Cache\AutoCacheConfig;
 use Hyperf\Odin\Api\Providers\AzureOpenAI\AzureOpenAI;
 use Hyperf\Odin\Api\Providers\AzureOpenAI\AzureOpenAIConfig;
+use Hyperf\Odin\Api\Providers\DashScope\Cache\DashScopeAutoCacheConfig;
+use Hyperf\Odin\Api\Providers\DashScope\DashScope;
+use Hyperf\Odin\Api\Providers\DashScope\DashScopeConfig;
+use Hyperf\Odin\Api\Providers\Gemini\Cache\GeminiCacheConfig;
+use Hyperf\Odin\Api\Providers\Gemini\Gemini;
+use Hyperf\Odin\Api\Providers\Gemini\GeminiConfig;
 use Hyperf\Odin\Api\Providers\OpenAI\OpenAI;
 use Hyperf\Odin\Api\Providers\OpenAI\OpenAIConfig;
 use Hyperf\Odin\Api\RequestOptions\ApiOptions;
@@ -101,7 +107,7 @@ public static function createAwsBedrockClient(array $config, ?ApiOptions $apiOpt
         $accessKey = $config['access_key'] ?? '';
         $secretKey = $config['secret_key'] ?? '';
         $region = $config['region'] ?? 'us-east-1';
-        $type = $config['type'] ?? AwsType::CONVERSE;
+        $type = $config['type'] ?? AwsType::CONVERSE_CUSTOM;
         $autoCache = (bool) ($config['auto_cache'] ?? false);
         $autoCacheConfig = null;
         if (isset($config['auto_cache_config'])) {
@@ -135,20 +141,122 @@ public static function createAwsBedrockClient(array $config, ?ApiOptions $apiOpt
         return $awsBedrock->getClient($clientConfig, $apiOptions, $logger);
     }
 
+    /**
+     * 创建DashScope客户端.
+     *
+     * @param array $config 配置参数
+     * @param null|ApiOptions $apiOptions API请求选项
+     * @param null|LoggerInterface $logger 日志记录器
+     */
+    public static function createDashScopeClient(array $config, ?ApiOptions $apiOptions = null, ?LoggerInterface $logger = null): ClientInterface
+    {
+        // 验证必要的配置参数
+        $apiKey = $config['api_key'] ?? '';
+        $baseUrl = $config['base_url'] ?? 'https://dashscope.aliyuncs.com';
+        $skipApiKeyValidation = (bool) ($config['skip_api_key_validation'] ?? false);
+
+        // 处理自动缓存配置
+        $autoCacheConfig = null;
+        if (isset($config['auto_cache_config'])) {
+            $autoCacheConfig = new DashScopeAutoCacheConfig(
+                minCacheTokens: $config['auto_cache_config']['min_cache_tokens'] ?? 1024,
+                supportedModels: $config['auto_cache_config']['supported_models'] ?? ['qwen3-coder-plus', 'qwen-max', 'qwen-plus', 'qwen-turbo'],
+                autoEnabled: (bool) ($config['auto_cache_config']['auto_enabled'] ?? false)
+            );
+        }
+
+        // 创建配置对象
+        $clientConfig = new DashScopeConfig(
+            apiKey: $apiKey,
+            baseUrl: $baseUrl,
+            skipApiKeyValidation: $skipApiKeyValidation,
+            autoCacheConfig: $autoCacheConfig
+        );
+
+        // 如果未提供API选项，则创建一个默认的选项
+        if ($apiOptions === null) {
+            $apiOptions = new ApiOptions();
+        }
+
+        // 创建API实例
+        $dashScope = new DashScope();
+
+        // 创建客户端
+        return $dashScope->getClient($clientConfig, $apiOptions, $logger);
+    }
+
+    /**
+     * 创建Gemini客户端.
+     *
+     * @param array $config 配置参数
+     * @param null|ApiOptions $apiOptions API请求选项
+     * @param null|LoggerInterface $logger 日志记录器
+     */
+    public static function createGeminiClient(array $config, ?ApiOptions $apiOptions = null, ?LoggerInterface $logger = null): ClientInterface
+    {
+        // 验证必要的配置参数
+        $apiKey = $config['api_key'] ?? '';
+        $baseUrl = $config['base_url'] ?? 'https://generativelanguage.googleapis.com/v1beta';
+        $skipApiKeyValidation = (bool) ($config['skip_api_key_validation'] ?? false);
+
+        // 处理自动缓存配置（统一缓存策略）
+        $cacheConfig = null;
+        if (isset($config['auto_cache_config'])) {
+            $autoCacheConfig = $config['auto_cache_config'];
+
+            $cacheConfig = new GeminiCacheConfig(
+                enableCache: (bool) ($autoCacheConfig['enable_cache'] ?? false),
+                minCacheTokens: $autoCacheConfig['min_cache_tokens'] ?? 4096,
+                refreshThreshold: $autoCacheConfig['refresh_threshold'] ?? 8000,
+                cacheTtl: $autoCacheConfig['cache_ttl'] ?? 600,
+                estimationRatio: (float) ($autoCacheConfig['estimation_ratio'] ?? 0.33)
+            );
+        }
+
+        // 创建配置对象
+        $clientConfig = new GeminiConfig(
+            apiKey: $apiKey,
+            baseUrl: $baseUrl,
+            skipApiKeyValidation: $skipApiKeyValidation
+        );
+
+        // 设置缓存配置
+        if ($cacheConfig) {
+            $clientConfig->setCacheConfig($cacheConfig);
+        }
+
+        // 创建API实例
+        $gemini = new Gemini();
+
+        if ($apiOptions) {
+            // 由于 Gemini 模型的 chunk 是一大片一大片的通常需要更长的响应时间，调整API选项的超时设置
+            $apiOptions->setStreamChunkTimeout($apiOptions->getStreamTotalTimeout());
+            $apiOptions->setStreamFirstChunkTimeout($apiOptions->getStreamTotalTimeout());
+        }
+
+        // 创建客户端
+        return $gemini->getClient($clientConfig, $apiOptions, $logger);
+    }
+
     /**
      * 根据提供商类型创建客户端.
      *
-     * @param string $provider 提供商类型 (openai, azure_openai, aws_bedrock)
+     * @param string $provider 提供商类型 (openai, azure_openai, aws_bedrock, dashscope, gemini)
      * @param array $config 配置参数
      * @param null|ApiOptions $apiOptions API请求选项
      * @param null|LoggerInterface $logger 日志记录器
      */
     public static function createClient(string $provider, array $config, ?ApiOptions $apiOptions = null, ?LoggerInterface $logger = null): ClientInterface
     {
+        if (! $apiOptions) {
+            $apiOptions = new ApiOptions();
+        }
         return match ($provider) {
             'openai' => self::createOpenAIClient($config, $apiOptions, $logger),
             'azure_openai' => self::createAzureOpenAIClient($config, $apiOptions, $logger),
             'aws_bedrock' => self::createAwsBedrockClient($config, $apiOptions, $logger),
+            'dashscope' => self::createDashScopeClient($config, $apiOptions, $logger),
+            'gemini' => self::createGeminiClient($config, $apiOptions, $logger),
             default => throw new InvalidArgumentException(sprintf('Unsupported provider: %s', $provider)),
         };
     }
diff --git a/src/Message/AbstractMessage.php b/src/Message/AbstractMessage.php
index 6262a15..b2b57c9 100644
--- a/src/Message/AbstractMessage.php
+++ b/src/Message/AbstractMessage.php
@@ -185,4 +185,29 @@ public function getHash(): string
     {
         return md5(serialize($this->toArray()));
     }
+
+    /**
+     * 标准化 tool call ID 以确保跨平台兼容性.
+     *
+     * 将包含不兼容字符（如冒号）的 tool call ID 转换为 MD5 格式
+     * 解决 kimi-k2 等模型与 AWS Claude 的兼容性问题
+     *
+     * @param string $toolCallId 原始工具调用ID
+     * @return string 标准化后的工具调用ID
+     */
+    protected function normalizeToolCallId(string $toolCallId): string
+    {
+        // 如果 ID 为空，直接返回（不应该处理空 ID）
+        if (empty($toolCallId)) {
+            return $toolCallId;
+        }
+
+        // 检查 ID 是否包含不兼容字符（AWS 要求：只允许 [a-zA-Z0-9_-]）
+        if (! preg_match('/^[a-zA-Z0-9_-]+$/', $toolCallId)) {
+            // 使用 MD5 生成兼容的 ID
+            return md5($toolCallId);
+        }
+
+        return $toolCallId;
+    }
 }
diff --git a/src/Message/AssistantMessage.php b/src/Message/AssistantMessage.php
index 7918b71..12a92b1 100644
--- a/src/Message/AssistantMessage.php
+++ b/src/Message/AssistantMessage.php
@@ -49,7 +49,7 @@ class AssistantMessage extends AbstractMessage
     public function __construct(string $content, array $toolsCall = [], ?string $reasoningContent = null)
     {
         parent::__construct($content);
-        $this->toolCalls = $toolsCall;
+        $this->toolCalls = $this->normalizeToolCallIds($toolsCall);
         $this->reasoningContent = $reasoningContent;
     }
 
@@ -65,6 +65,7 @@ public static function fromArray(array $message): self
         $toolCalls = ToolCall::fromArray($message['tool_calls'] ?? []);
         $reasoningContent = $message['reasoning_content'] ?? null;
 
+        // 注意：构造函数中已经包含了标准化逻辑，所以这里不需要额外处理
         return new self($content, $toolCalls, $reasoningContent);
     }
 
@@ -180,4 +181,24 @@ public function setReasoningContent(?string $reasoningContent): self
         $this->reasoningContent = $reasoningContent;
         return $this;
     }
+
+    /**
+     * 标准化 tool call IDs 以确保跨平台兼容性.
+     *
+     * @param array<ToolCall> $toolCalls 原始工具调用列表
+     * @return array<ToolCall> 标准化后的工具调用列表
+     */
+    private function normalizeToolCallIds(array $toolCalls): array
+    {
+        foreach ($toolCalls as $toolCall) {
+            $originalId = $toolCall->getId();
+            $normalizedId = $this->normalizeToolCallId($originalId);
+
+            if ($normalizedId !== $originalId) {
+                $toolCall->setId($normalizedId);
+            }
+        }
+
+        return $toolCalls;
+    }
 }
diff --git a/src/Message/ToolMessage.php b/src/Message/ToolMessage.php
index 2b42ced..baf7ad4 100644
--- a/src/Message/ToolMessage.php
+++ b/src/Message/ToolMessage.php
@@ -50,7 +50,7 @@ class ToolMessage extends AbstractMessage
     public function __construct(string $content, string $toolCallId, ?string $name = null, ?array $arguments = null)
     {
         parent::__construct($content);
-        $this->toolCallId = $toolCallId;
+        $this->toolCallId = $this->normalizeToolCallId($toolCallId);
         $this->name = $name;
         $this->arguments = $arguments;
     }
diff --git a/src/Message/UserMessage.php b/src/Message/UserMessage.php
index 702ffc0..7b4929c 100644
--- a/src/Message/UserMessage.php
+++ b/src/Message/UserMessage.php
@@ -37,7 +37,7 @@ class UserMessage extends AbstractMessage
      */
     public function __construct(string $content = '', array $context = [])
     {
-        parent::__construct($content, $context);
+        parent::__construct(trim($content), $context);
     }
 
     /**
diff --git a/src/Message/UserMessageContent.php b/src/Message/UserMessageContent.php
index 8ad1e2a..8b7c53f 100644
--- a/src/Message/UserMessageContent.php
+++ b/src/Message/UserMessageContent.php
@@ -54,7 +54,7 @@ public function getText(): string
 
     public function setText(string $text): self
     {
-        $this->text = $text;
+        $this->text = trim($text);
         return $this;
     }
 
@@ -65,7 +65,7 @@ public function getImageUrl(): string
 
     public function setImageUrl(string $imageUrl): self
     {
-        $this->imageUrl = $imageUrl;
+        $this->imageUrl = trim($imageUrl);
         return $this;
     }
 
diff --git a/src/Model/AbstractModel.php b/src/Model/AbstractModel.php
index cf48cf1..5e940d3 100644
--- a/src/Model/AbstractModel.php
+++ b/src/Model/AbstractModel.php
@@ -440,6 +440,9 @@ private function checkFixedTemperature(ChatCompletionRequest $request): void
         if ($this->getModelOptions()->getFixedTemperature()) {
             $request->setTemperature($this->getModelOptions()->getFixedTemperature());
         }
+        if (! $request->getTemperature() && $this->modelOptions->getDefaultTemperature()) {
+            $request->setTemperature($this->modelOptions->getDefaultTemperature());
+        }
     }
 
     /**
diff --git a/src/Model/DashScopeModel.php b/src/Model/DashScopeModel.php
new file mode 100644
index 0000000..6022483
--- /dev/null
+++ b/src/Model/DashScopeModel.php
@@ -0,0 +1,61 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Model;
+
+use Hyperf\Odin\Api\Providers\DashScope\Cache\DashScopeAutoCacheConfig;
+use Hyperf\Odin\Api\Providers\DashScope\DashScope;
+use Hyperf\Odin\Api\Providers\DashScope\DashScopeConfig;
+use Hyperf\Odin\Contract\Api\ClientInterface;
+
+/**
+ * DashScope 模型实现
+ * 基于现有 CachePoint 架构支持确定缓存.
+ */
+class DashScopeModel extends AbstractModel
+{
+    protected bool $streamIncludeUsage = true;
+
+    protected function getClient(): ClientInterface
+    {
+        $config = $this->config;
+        $this->processApiBaseUrl($config);
+
+        $dashScope = new DashScope();
+
+        // 创建自动缓存配置
+        $autoCacheConfig = $this->createAutoCacheConfig($config);
+
+        $configObj = new DashScopeConfig(
+            apiKey: $config['api_key'] ?? '',
+            baseUrl: $config['base_url'] ?? 'https://dashscope.aliyuncs.com',
+            skipApiKeyValidation: $config['skip_api_key_validation'] ?? false,
+            autoCacheConfig: $autoCacheConfig
+        );
+
+        return $dashScope->getClient($configObj, $this->getApiRequestOptions(), $this->logger);
+    }
+
+    /**
+     * 创建自动缓存配置.
+     */
+    private function createAutoCacheConfig(array $config): DashScopeAutoCacheConfig
+    {
+        $cacheConfig = $config['auto_cache_config'] ?? [];
+
+        return new DashScopeAutoCacheConfig(
+            minCacheTokens: $cacheConfig['min_cache_tokens'] ?? 1024,
+            supportedModels: $cacheConfig['supported_models'] ?? ['qwen3-coder-plus'],
+            autoEnabled: $cacheConfig['auto_enabled'] ?? false
+        );
+    }
+}
diff --git a/src/Model/GeminiModel.php b/src/Model/GeminiModel.php
new file mode 100644
index 0000000..50810b9
--- /dev/null
+++ b/src/Model/GeminiModel.php
@@ -0,0 +1,48 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Model;
+
+use Hyperf\Odin\Contract\Api\ClientInterface;
+use Hyperf\Odin\Factory\ClientFactory;
+
+class GeminiModel extends AbstractModel
+{
+    protected bool $streamIncludeUsage = true;
+
+    /**
+     * Get client instance.
+     */
+    protected function getClient(): ClientInterface
+    {
+        // Process API base URL to ensure it contains the correct version path
+        $config = $this->config;
+        $this->processApiBaseUrl($config);
+
+        // Use ClientFactory to create Gemini client
+        return ClientFactory::createClient(
+            'gemini',
+            $config,
+            $this->getApiRequestOptions(),
+            $this->logger
+        );
+    }
+
+    /**
+     * Get API version path
+     * Gemini uses OpenAI-compatible API, so no version path is needed.
+     */
+    protected function getApiVersionPath(): string
+    {
+        return '';
+    }
+}
diff --git a/src/Model/ModelOptions.php b/src/Model/ModelOptions.php
index f169b3c..51dc30b 100644
--- a/src/Model/ModelOptions.php
+++ b/src/Model/ModelOptions.php
@@ -39,8 +39,20 @@ class ModelOptions
      */
     protected int $vectorSize = 0;
 
+    /**
+     * @var null|float 固定温度
+     */
     protected ?float $fixedTemperature = null;
 
+    /**
+     * @var null|float 默认温度。即推荐温度
+     */
+    protected ?float $defaultTemperature = null;
+
+    protected ?int $maxTokens = null;
+
+    protected ?int $maxOutputTokens = null;
+
     public function __construct(array $options = [])
     {
         if (isset($options['chat'])) {
@@ -66,6 +78,18 @@ public function __construct(array $options = [])
         if (isset($options['fixed_temperature'])) {
             $this->fixedTemperature = (float) $options['fixed_temperature'];
         }
+
+        if (isset($options['default_temperature'])) {
+            $this->defaultTemperature = (float) $options['default_temperature'];
+        }
+
+        if (isset($options['max_tokens'])) {
+            $this->maxTokens = (int) $options['max_tokens'];
+        }
+
+        if (isset($options['max_output_tokens'])) {
+            $this->maxOutputTokens = (int) $options['max_output_tokens'];
+        }
     }
 
     /**
@@ -88,6 +112,9 @@ public function toArray(): array
             'function_call' => $this->functionCall,
             'vector_size' => $this->vectorSize,
             'fixed_temperature' => $this->fixedTemperature,
+            'default_temperature' => $this->defaultTemperature,
+            'max_tokens' => $this->maxTokens,
+            'max_output_tokens' => $this->maxOutputTokens,
         ];
     }
 
@@ -165,4 +192,34 @@ public function setFixedTemperature(?float $fixedTemperature): void
     {
         $this->fixedTemperature = $fixedTemperature;
     }
+
+    public function getDefaultTemperature(): ?float
+    {
+        return $this->defaultTemperature;
+    }
+
+    public function setDefaultTemperature(?float $defaultTemperature): void
+    {
+        $this->defaultTemperature = $defaultTemperature;
+    }
+
+    public function getMaxTokens(): ?int
+    {
+        return $this->maxTokens;
+    }
+
+    public function setMaxTokens(?int $maxTokens): void
+    {
+        $this->maxTokens = $maxTokens;
+    }
+
+    public function getMaxOutputTokens(): ?int
+    {
+        return $this->maxOutputTokens;
+    }
+
+    public function setMaxOutputTokens(?int $maxOutputTokens): void
+    {
+        $this->maxOutputTokens = $maxOutputTokens;
+    }
 }
diff --git a/src/Model/OpenAIModel.php b/src/Model/OpenAIModel.php
index b1b41f2..4e55bc7 100644
--- a/src/Model/OpenAIModel.php
+++ b/src/Model/OpenAIModel.php
@@ -14,16 +14,21 @@
 
 use Hyperf\Odin\Contract\Api\ClientInterface;
 use Hyperf\Odin\Factory\ClientFactory;
+use Hyperf\Odin\Utils\ModelUtil;
 
 /**
  * OpenAI模型实现.
+ *
+ * 支持智能路由：当使用qwen系列模型时，自动切换到DashScope客户端；
+ * 其他模型继续使用OpenAI客户端。这确保了向后兼容性。
  */
 class OpenAIModel extends AbstractModel
 {
     protected bool $streamIncludeUsage = true;
 
     /**
-     * 获取OpenAI客户端实例.
+     * 获取客户端实例，根据模型类型智能路由.
+     * 如果是qwen系列模型，使用DashScope客户端；否则使用OpenAI客户端.
      */
     protected function getClient(): ClientInterface
     {
@@ -31,8 +36,20 @@ protected function getClient(): ClientInterface
         $config = $this->config;
         $this->processApiBaseUrl($config);
 
-        // 使用ClientFactory创建OpenAI客户端
-        return ClientFactory::createOpenAIClient(
+        // 检查是否为qwen系列模型
+        if (ModelUtil::isQwenModel($this->model)) {
+            // 使用ClientFactory统一创建DashScope客户端
+            return ClientFactory::createClient(
+                'dashscope',
+                $config,
+                $this->getApiRequestOptions(),
+                $this->logger
+            );
+        }
+
+        // 使用ClientFactory统一创建OpenAI客户端
+        return ClientFactory::createClient(
+            'openai',
             $config,
             $this->getApiRequestOptions(),
             $this->logger
diff --git a/src/Utils/ImageDownloader.php b/src/Utils/ImageDownloader.php
new file mode 100644
index 0000000..1713118
--- /dev/null
+++ b/src/Utils/ImageDownloader.php
@@ -0,0 +1,318 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Utils;
+
+use GuzzleHttp\Client;
+use GuzzleHttp\Exception\RequestException;
+use Hyperf\Odin\Exception\LLMException\Api\LLMInvalidRequestException;
+
+/**
+ * Image downloader utility for downloading remote images.
+ *
+ * 图片下载工具类，用于下载远程图片。
+ */
+class ImageDownloader
+{
+    /**
+     * Maximum image file size (10MB).
+     */
+    private const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
+
+    /**
+     * Connection timeout in seconds.
+     */
+    private const CONNECT_TIMEOUT = 10;
+
+    /**
+     * Read timeout in seconds.
+     */
+    private const READ_TIMEOUT = 30;
+
+    /**
+     * Download image from URL and convert to base64 data URL.
+     *
+     * @param string $imageUrl HTTP(S) image URL
+     * @param int $maxFileSize Maximum file size in bytes (default: 10MB)
+     * @return string Base64 data URL (data:image/xxx;base64,...)
+     * @throws LLMInvalidRequestException
+     */
+    public static function downloadAndConvertToBase64(string $imageUrl, int $maxFileSize = self::MAX_FILE_SIZE): string
+    {
+        // Try different download strategies
+        $strategies = [
+            'standard' => fn () => self::downloadWithStrategy($imageUrl, $maxFileSize, 'standard'),
+            'simple' => fn () => self::downloadWithStrategy($imageUrl, $maxFileSize, 'simple'),
+            'mobile' => fn () => self::downloadWithStrategy($imageUrl, $maxFileSize, 'mobile'),
+        ];
+
+        $lastException = null;
+
+        foreach ($strategies as $strategyName => $downloadFn) {
+            try {
+                return $downloadFn();
+            } catch (LLMInvalidRequestException $e) {
+                $lastException = $e;
+                // Continue to next strategy
+                continue;
+            }
+        }
+
+        // If all strategies failed, throw the last exception
+        throw $lastException ?? new LLMInvalidRequestException('所有下载策略都失败了');
+    }
+
+    /**
+     * Detect image MIME type from binary data using PHP 8.1 syntax.
+     *
+     * @param string $imageData Binary image data
+     * @return null|string MIME type (e.g., 'image/jpeg', 'image/png') or null if unknown
+     */
+    public static function detectImageMimeType(string $imageData): ?string
+    {
+        // Check minimum data length
+        if (strlen($imageData) < 8) {
+            return null;
+        }
+
+        return match (true) {
+            // JPEG - starts with 0xFF 0xD8 0xFF
+            str_starts_with($imageData, "\xFF\xD8\xFF") => 'image/jpeg',
+
+            // PNG - starts with specific 8-byte signature
+            str_starts_with($imageData, "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A") => 'image/png',
+
+            // GIF87a or GIF89a
+            str_starts_with($imageData, 'GIF87a') || str_starts_with($imageData, 'GIF89a') => 'image/gif',
+
+            // WebP - RIFF container with WEBP type
+            strlen($imageData) >= 12
+            && str_starts_with($imageData, 'RIFF')
+            && str_starts_with(substr($imageData, 8), 'WEBP') => 'image/webp',
+
+            // BMP - starts with 'BM'
+            str_starts_with($imageData, 'BM') => 'image/bmp',
+
+            // TIFF (little endian) - 'II' followed by 42
+            strlen($imageData) >= 4 && str_starts_with($imageData, "II\x2A\x00") => 'image/tiff',
+
+            // TIFF (big endian) - 'MM' followed by 42
+            strlen($imageData) >= 4 && str_starts_with($imageData, "MM\x00\x2A") => 'image/tiff',
+
+            // Unknown format
+            default => null,
+        };
+    }
+
+    /**
+     * Check if URL is a remote image URL (HTTP/HTTPS).
+     *
+     * @param string $url URL to check
+     * @return bool True if it's a remote image URL
+     */
+    public static function isRemoteImageUrl(string $url): bool
+    {
+        return str_starts_with($url, 'http://') || str_starts_with($url, 'https://');
+    }
+
+    /**
+     * Check if URL is a base64 data URL.
+     *
+     * @param string $url URL to check
+     * @return bool True if it's a base64 data URL
+     */
+    public static function isBase64DataUrl(string $url): bool
+    {
+        return str_starts_with($url, 'data:image/') && str_contains($url, ';base64,');
+    }
+
+    /**
+     * Get maximum file size limit.
+     *
+     * @return int Maximum file size in bytes
+     */
+    public static function getMaxFileSize(): int
+    {
+        return self::MAX_FILE_SIZE;
+    }
+
+    /**
+     * Get maximum file size limit in human readable format.
+     *
+     * @return string Maximum file size (e.g., "10MB")
+     */
+    public static function getMaxFileSizeFormatted(): string
+    {
+        return self::formatFileSize(self::MAX_FILE_SIZE);
+    }
+
+    /**
+     * Format file size in human readable format.
+     *
+     * @param int $bytes File size in bytes
+     * @return string Formatted file size (e.g., "10MB", "512KB", "1.5GB")
+     */
+    public static function formatFileSize(int $bytes): string
+    {
+        if ($bytes <= 0) {
+            return '0B';
+        }
+
+        $units = ['B', 'KB', 'MB', 'GB', 'TB'];
+        $factor = floor(log($bytes, 1024));
+
+        return round($bytes / (1024 ** $factor), 1) . $units[$factor];
+    }
+
+    /**
+     * Download image with specific strategy.
+     *
+     * @param string $imageUrl HTTP(S) image URL
+     * @param int $maxFileSize Maximum file size in bytes
+     * @param string $strategy Download strategy
+     * @return string Base64 data URL
+     * @throws LLMInvalidRequestException
+     */
+    private static function downloadWithStrategy(string $imageUrl, int $maxFileSize, string $strategy): string
+    {
+        // Validate URL format and protocol using PHP 8.1 syntax
+        if (! filter_var($imageUrl, FILTER_VALIDATE_URL)) {
+            throw new LLMInvalidRequestException('无效的图片URL格式');
+        }
+
+        if (! str_starts_with($imageUrl, 'http://') && ! str_starts_with($imageUrl, 'https://')) {
+            throw new LLMInvalidRequestException('只支持HTTP/HTTPS协议的图片URL');
+        }
+
+        // Get client configuration based on strategy
+        $clientConfig = self::getClientConfig($strategy);
+
+        $result = null;
+
+        try {
+            $client = new Client($clientConfig);
+
+            // Download image directly to memory
+            $response = $client->get($imageUrl, [
+                'stream' => false, // Download entire response to memory
+                'progress' => function ($downloadTotal, $downloadedBytes) use ($maxFileSize, $strategy) {
+                    if ($downloadedBytes > $maxFileSize) {
+                        $limitFormatted = self::formatFileSize($maxFileSize);
+                        throw new LLMInvalidRequestException("图片文件过大，超过{$limitFormatted}限制 (策略: {$strategy})");
+                    }
+                },
+            ]);
+
+            // Get response information for debugging
+            $statusCode = $response->getStatusCode();
+            $contentType = $response->getHeaderLine('Content-Type');
+            $contentLength = $response->getHeaderLine('Content-Length');
+
+            // Get the actual image data
+            $imageData = $response->getBody()->getContents();
+            $actualSize = strlen($imageData);
+
+            if ($actualSize > $maxFileSize) {
+                $limitFormatted = self::formatFileSize($maxFileSize);
+                throw new LLMInvalidRequestException("图片文件过大，超过{$limitFormatted}限制 (策略: {$strategy})");
+            }
+
+            if ($actualSize === 0) {
+                $errorDetails = [
+                    "策略: {$strategy}",
+                    "HTTP状态: {$statusCode}",
+                    'Content-Type: ' . ($contentType ?: 'unknown'),
+                    'Content-Length: ' . ($contentLength ?: 'unknown'),
+                    "实际大小: {$actualSize}",
+                    "URL: {$imageUrl}",
+                ];
+                $errorMessage = '下载的图片文件为空 (' . implode(', ', $errorDetails) . ')';
+                throw new LLMInvalidRequestException($errorMessage);
+            }
+
+            // Detect image format
+            $mimeType = self::detectImageMimeType($imageData);
+            if (! $mimeType) {
+                throw new LLMInvalidRequestException("不支持的图片格式或文件已损坏 (策略: {$strategy})");
+            }
+
+            // Convert to base64 data URL
+            $base64Data = base64_encode($imageData);
+            $result = "data:{$mimeType};base64,{$base64Data}";
+        } catch (RequestException $e) {
+            throw new LLMInvalidRequestException("下载图片失败 (策略: {$strategy}): " . $e->getMessage());
+        }
+
+        // This should never be reached if exceptions are properly thrown above
+        return $result ?? throw new LLMInvalidRequestException('下载过程中发生未知错误');
+    }
+
+    /**
+     * Get HTTP client configuration for different download strategies.
+     *
+     * @param string $strategy Download strategy ('standard', 'simple', 'mobile')
+     * @return array Client configuration
+     */
+    private static function getClientConfig(string $strategy): array
+    {
+        $baseConfig = [
+            'timeout' => self::READ_TIMEOUT,
+            'connect_timeout' => self::CONNECT_TIMEOUT,
+        ];
+
+        return match ($strategy) {
+            'standard' => array_merge($baseConfig, [
+                'headers' => [
+                    'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                    'Accept' => 'image/*,*/*;q=0.8',
+                    'Accept-Encoding' => 'gzip, deflate, br',
+                    'Accept-Language' => 'zh-CN,zh;q=0.9,en;q=0.8',
+                    'Cache-Control' => 'no-cache',
+                    'Pragma' => 'no-cache',
+                    'Referer' => 'https://www.google.com/',
+                ],
+                'verify' => false,
+                'allow_redirects' => [
+                    'max' => 10,
+                    'strict' => false,
+                    'referer' => true,
+                    'track_redirects' => true,
+                ],
+            ]),
+
+            'simple' => array_merge($baseConfig, [
+                'headers' => [
+                    'User-Agent' => 'Odin-ImageDownloader/1.0',
+                    'Accept' => 'image/*',
+                ],
+                'verify' => true,
+                'allow_redirects' => true,
+            ]),
+
+            'mobile' => array_merge($baseConfig, [
+                'headers' => [
+                    'User-Agent' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1',
+                    'Accept' => 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8',
+                    'Accept-Encoding' => 'gzip, deflate',
+                    'Accept-Language' => 'zh-CN,zh;q=0.9',
+                ],
+                'verify' => false,
+                'allow_redirects' => [
+                    'max' => 5,
+                    'strict' => true,
+                ],
+            ]),
+
+            default => $baseConfig,
+        };
+    }
+}
diff --git a/src/Utils/ImageFormatValidator.php b/src/Utils/ImageFormatValidator.php
new file mode 100644
index 0000000..3511b1c
--- /dev/null
+++ b/src/Utils/ImageFormatValidator.php
@@ -0,0 +1,87 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Utils;
+
+use Hyperf\Odin\Exception\LLMException\Model\LLMUnsupportedImageFormatException;
+
+/**
+ * Simple image format validator for vision understanding requests.
+ *
+ * 视觉理解请求的简单图片格式验证器。
+ */
+class ImageFormatValidator
+{
+    /**
+     * Supported image file extensions.
+     *
+     * @var string[]
+     */
+    private static array $supportedExtensions = [
+        'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'tif',
+        'ico', 'dib', 'icns', 'sgi', 'j2c', 'j2k', 'jp2', 'jpc', 'jpf', 'jpx',
+    ];
+
+    /**
+     * Validate image URL format.
+     * Only validates URLs that have file extensions.
+     *
+     * 验证图片URL格式。
+     * 只验证有文件扩展名的URL。
+     *
+     * @param string $imageUrl The image URL to validate
+     * @throws LLMUnsupportedImageFormatException When extension exists but is not supported
+     */
+    public static function validateImageUrl(string $imageUrl): void
+    {
+        // Skip validation if it's a data URL (Base64)
+        if (str_starts_with($imageUrl, 'data:')) {
+            return;
+        }
+
+        // Extract file extension from URL
+        $urlPath = parse_url($imageUrl, PHP_URL_PATH);
+        if (! $urlPath) {
+            // Cannot parse URL path, but don't throw error
+            return;
+        }
+
+        $extension = strtolower(pathinfo($urlPath, PATHINFO_EXTENSION));
+
+        // If no extension, don't throw error
+        if (empty($extension)) {
+            return;
+        }
+
+        // If extension exists but not supported, throw error
+        if (! in_array($extension, self::$supportedExtensions, true)) {
+            throw new LLMUnsupportedImageFormatException(
+                sprintf('不支持的图片格式: .%s', $extension),
+                null,
+                $extension,
+                $imageUrl
+            );
+        }
+    }
+
+    /**
+     * Get all supported file extensions.
+     *
+     * 获取所有支持的文件扩展名。
+     *
+     * @return string[] Array of supported file extensions
+     */
+    public static function getSupportedExtensions(): array
+    {
+        return self::$supportedExtensions;
+    }
+}
diff --git a/src/Utils/LogUtil.php b/src/Utils/LogUtil.php
index 31f9d8d..565d316 100644
--- a/src/Utils/LogUtil.php
+++ b/src/Utils/LogUtil.php
@@ -12,6 +12,9 @@
 
 namespace Hyperf\Odin\Utils;
 
+use Hyperf\Context\ApplicationContext;
+use Psr\Log\LoggerInterface;
+
 class LogUtil
 {
     /**
@@ -34,12 +37,21 @@ class LogUtil
 
     private const PERF_TIMEOUT_RISK = 'TIMEOUT_RISK';
 
+    public static function getHyperfLogger(): ?LoggerInterface
+    {
+        return ApplicationContext::getContainer()->get(LoggerInterface::class);
+    }
+
     /**
      * 递归处理数组，格式化超长文本和二进制数据.
+     *
+     * @param array $args 要格式化的数组
+     * @param int $maxTextLength 最大文本长度限制，默认2000
+     * @return array 格式化后的数组
      */
-    public static function formatLongText(array $args): array
+    public static function formatLongText(array $args, int $maxTextLength = 2000): array
     {
-        return self::recursiveFormat($args);
+        return self::recursiveFormat($args, $maxTextLength);
     }
 
     /**
@@ -48,13 +60,14 @@ public static function formatLongText(array $args): array
      * @param array $logData 原始日志数据
      * @param array $whitelistFields 白名单字段列表，为空则返回所有字段，支持嵌套字段如 'args.messages'
      * @param bool $enableWhitelist 是否启用白名单过滤，默认false
+     * @param int $maxTextLength 最大文本长度限制，默认2000
      * @return array 过滤并格式化后的日志数据
      */
-    public static function filterAndFormatLogData(array $logData, array $whitelistFields = [], bool $enableWhitelist = false): array
+    public static function filterAndFormatLogData(array $logData, array $whitelistFields = [], bool $enableWhitelist = false, int $maxTextLength = 2000): array
     {
         // 如果未启用白名单或白名单为空，处理所有字段
         if (! $enableWhitelist || empty($whitelistFields)) {
-            return self::formatLongText($logData);
+            return self::formatLongText($logData, $maxTextLength);
         }
 
         // 根据白名单过滤字段，支持嵌套字段
@@ -75,7 +88,7 @@ public static function filterAndFormatLogData(array $logData, array $whitelistFi
         }
 
         // 格式化过滤后的数据
-        return self::formatLongText($filteredData);
+        return self::formatLongText($filteredData, $maxTextLength);
     }
 
     /**
@@ -168,12 +181,16 @@ private static function setNestedValue(array &$data, string $path, mixed $value)
 
     /**
      * 递归处理数组中的每个元素.
+     *
+     * @param mixed $data 要处理的数据
+     * @param int $maxTextLength 最大文本长度限制
+     * @return mixed 处理后的数据
      */
-    private static function recursiveFormat(mixed $data)
+    private static function recursiveFormat(mixed $data, int $maxTextLength = 2000)
     {
         if (is_array($data)) {
             foreach ($data as $key => $value) {
-                $data[$key] = self::recursiveFormat($value);
+                $data[$key] = self::recursiveFormat($value, $maxTextLength);
             }
             return $data;
         }
@@ -181,7 +198,7 @@ private static function recursiveFormat(mixed $data)
             // 对象转换为数组再处理，最后转回对象
             if (method_exists($data, 'toArray')) {
                 $array = $data->toArray();
-                $array = self::recursiveFormat($array);
+                $array = self::recursiveFormat($array, $maxTextLength);
                 // 如果对象有 fromArray 方法，可以使用它恢复对象
                 if (method_exists($data, 'fromArray')) {
                     return $data->fromArray($array);
@@ -201,8 +218,8 @@ private static function recursiveFormat(mixed $data)
                 return '[Base64 Image]';
             }
 
-            // 处理超长字符串
-            if (strlen($data) > 2000) {
+            // 处理超长字符串（0 表示不限制长度）
+            if ($maxTextLength > 0 && strlen($data) > $maxTextLength) {
                 return '[Long Text]';
             }
         }
diff --git a/src/Utils/LoggingConfigHelper.php b/src/Utils/LoggingConfigHelper.php
index a7e3ddd..3e467c9 100644
--- a/src/Utils/LoggingConfigHelper.php
+++ b/src/Utils/LoggingConfigHelper.php
@@ -60,6 +60,25 @@ public static function isWhitelistEnabled(?ApiOptions $apiOptions = null): bool
         }
     }
 
+    /**
+     * 从API选项中获取最大文本长度限制.
+     */
+    public static function getMaxTextLength(?ApiOptions $apiOptions = null): int
+    {
+        if ($apiOptions) {
+            return $apiOptions->getLoggingMaxTextLength();
+        }
+
+        // 如果没有提供ApiOptions，尝试从全局配置获取
+        try {
+            $config = self::getConfig();
+            return (int) $config->get('odin.llm.general_api_options.logging.max_text_length', 2000);
+        } catch (Throwable $e) {
+            // 如果获取配置失败，使用默认值
+            return 2000;
+        }
+    }
+
     /**
      * 应用白名单过滤并格式化日志数据.
      *
@@ -71,8 +90,9 @@ public static function filterAndFormatLogData(array $logData, ?ApiOptions $apiOp
     {
         $whitelistFields = self::getWhitelistFields($apiOptions);
         $enableWhitelist = self::isWhitelistEnabled($apiOptions);
+        $maxTextLength = self::getMaxTextLength($apiOptions);
 
-        return LogUtil::filterAndFormatLogData($logData, $whitelistFields, $enableWhitelist);
+        return LogUtil::filterAndFormatLogData($logData, $whitelistFields, $enableWhitelist, $maxTextLength);
     }
 
     /**
diff --git a/src/Utils/ModelUtil.php b/src/Utils/ModelUtil.php
new file mode 100644
index 0000000..b57be80
--- /dev/null
+++ b/src/Utils/ModelUtil.php
@@ -0,0 +1,46 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Utils;
+
+/**
+ * 模型相关的工具类.
+ */
+class ModelUtil
+{
+    /**
+     * 检查是否为qwen系列模型.
+     */
+    public static function isQwenModel(string $model): bool
+    {
+        return str_contains(strtolower($model), 'qwen');
+    }
+
+    /**
+     * 获取模型提供商类型.
+     *
+     * @return string 返回 'dashscope'、'openai' 等提供商标识
+     */
+    public static function getProviderType(string $model): string
+    {
+        if (self::isQwenModel($model)) {
+            return 'dashscope';
+        }
+
+        // 可以在这里扩展其他模型的判断
+        // if (self::isClaudeModel($model)) {
+        //     return 'anthropic';
+        // }
+
+        return 'openai'; // 默认为 OpenAI
+    }
+}
diff --git a/src/Utils/TimeUtil.php b/src/Utils/TimeUtil.php
new file mode 100644
index 0000000..14516c8
--- /dev/null
+++ b/src/Utils/TimeUtil.php
@@ -0,0 +1,54 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Utils;
+
+/**
+ * 时间工具类，用于统一处理时间计算.
+ */
+class TimeUtil
+{
+    /**
+     * 计算时间间隔（毫秒）.
+     *
+     * @param float $startTime 开始时间（microtime(true)）
+     * @param int $precision 精度，保留小数位数，默认不保留小数
+     * @return float 时间间隔（毫秒）
+     */
+    public static function calculateDurationMs(float $startTime, int $precision = 0): float
+    {
+        return round((microtime(true) - $startTime) * 1000, $precision);
+    }
+
+    /**
+     * 计算两个时间点之间的间隔（毫秒）.
+     *
+     * @param float $startTime 开始时间（microtime(true)）
+     * @param float $endTime 结束时间（microtime(true)）
+     * @param int $precision 精度，保留小数位数，默认不保留小数
+     * @return float 时间间隔（毫秒）
+     */
+    public static function calculateIntervalMs(float $startTime, float $endTime, int $precision = 0): float
+    {
+        return round(($endTime - $startTime) * 1000, $precision);
+    }
+
+    /**
+     * 获取当前时间戳（microtime格式）.
+     *
+     * @return float 当前时间戳
+     */
+    public static function now(): float
+    {
+        return microtime(true);
+    }
+}
diff --git a/src/Utils/VisionMessageValidator.php b/src/Utils/VisionMessageValidator.php
new file mode 100644
index 0000000..abdc5be
--- /dev/null
+++ b/src/Utils/VisionMessageValidator.php
@@ -0,0 +1,51 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace Hyperf\Odin\Utils;
+
+use Hyperf\Odin\Exception\LLMException\Model\LLMUnsupportedImageFormatException;
+use Hyperf\Odin\Message\UserMessage;
+
+/**
+ * Simple validator for vision understanding messages.
+ *
+ * 视觉理解消息的简单验证器。
+ */
+class VisionMessageValidator
+{
+    /**
+     * Validate images in a single user message.
+     *
+     * 验证单个用户消息中的图片。
+     *
+     * @param UserMessage $message User message to validate
+     * @throws LLMUnsupportedImageFormatException
+     */
+    public static function validateUserMessage(UserMessage $message): void
+    {
+        $contents = $message->getContents();
+
+        // No contents to validate
+        if (empty($contents)) {
+            return;
+        }
+
+        foreach ($contents as $content) {
+            if ($content->getType() === 'image_url') {
+                $imageUrl = $content->getImageUrl();
+                if (! empty($imageUrl)) {
+                    ImageFormatValidator::validateImageUrl($imageUrl);
+                }
+            }
+        }
+    }
+}
diff --git a/tests/Cases/Api/Providers/AwsBedrock/AwsBedrockTest.php b/tests/Cases/Api/Providers/AwsBedrock/AwsBedrockTest.php
index 11c1b49..67849ee 100644
--- a/tests/Cases/Api/Providers/AwsBedrock/AwsBedrockTest.php
+++ b/tests/Cases/Api/Providers/AwsBedrock/AwsBedrockTest.php
@@ -14,6 +14,7 @@
 
 use Hyperf\Odin\Api\Providers\AwsBedrock\AwsBedrock;
 use Hyperf\Odin\Api\Providers\AwsBedrock\AwsBedrockConfig;
+use Hyperf\Odin\Api\Providers\AwsBedrock\AwsType;
 use Hyperf\Odin\Api\Providers\AwsBedrock\Client;
 use Hyperf\Odin\Api\RequestOptions\ApiOptions;
 use Hyperf\Odin\Exception\LLMException\Configuration\LLMInvalidApiKeyException;
@@ -41,11 +42,12 @@ public function testGetClient()
         // 创建AwsBedrock实例
         $awsBedrock = new AwsBedrock();
 
-        // 创建有效的配置
+        // 创建有效的配置，使用 invoke 类型以返回 Client 实例
         $config = new AwsBedrockConfig(
             accessKey: 'test-access-key',
             secretKey: 'test-secret-key',
-            region: 'us-east-1'
+            region: 'us-east-1',
+            type: AwsType::INVOKE
         );
 
         // 获取客户端
@@ -152,11 +154,12 @@ public function testGetClientWithAllParams()
     {
         $awsBedrock = new AwsBedrock();
 
-        // 创建配置
+        // 创建配置，使用 invoke 类型以返回 Client 实例
         $config = new AwsBedrockConfig(
             accessKey: 'test-access-key',
             secretKey: 'test-secret-key',
-            region: 'us-east-1'
+            region: 'us-east-1',
+            type: AwsType::INVOKE
         );
 
         // 创建请求选项
diff --git a/tests/Cases/Api/RequestOptions/ApiOptionsTest.php b/tests/Cases/Api/RequestOptions/ApiOptionsTest.php
index 835a786..780065b 100644
--- a/tests/Cases/Api/RequestOptions/ApiOptionsTest.php
+++ b/tests/Cases/Api/RequestOptions/ApiOptionsTest.php
@@ -34,7 +34,7 @@ public function testDefaultConstructor()
         $this->assertEquals(300.0, $options->getReadTimeout());
         $this->assertEquals(350.0, $options->getTotalTimeout());
         $this->assertEquals(120.0, $options->getThinkingTimeout());
-        $this->assertEquals(30.0, $options->getStreamChunkTimeout());
+        $this->assertEquals(60.0, $options->getStreamChunkTimeout());
         $this->assertEquals(60.0, $options->getStreamFirstChunkTimeout());
 
         // 验证自定义错误映射规则默认为空数组
diff --git a/tests/Cases/Api/Response/ChatCompletionStreamResponseTest.php b/tests/Cases/Api/Response/ChatCompletionStreamResponseTest.php
index 4fa2780..4c58729 100644
--- a/tests/Cases/Api/Response/ChatCompletionStreamResponseTest.php
+++ b/tests/Cases/Api/Response/ChatCompletionStreamResponseTest.php
@@ -189,6 +189,9 @@ public function testStreamIteratorWithSSEClient()
                 yield $eventDone;
             })());
 
+        // Mock the closeEarly() method that will be called when processing is done
+        $sseClient->shouldReceive('closeEarly')->once();
+
         // 创建StreamResponse
         $streamResponse = new ChatCompletionStreamResponse($response, null, $sseClient);
 
diff --git a/tests/Cases/Api/Transport/SSEClientTest.php b/tests/Cases/Api/Transport/SSEClientTest.php
index 2b3e3de..36a84ca 100644
--- a/tests/Cases/Api/Transport/SSEClientTest.php
+++ b/tests/Cases/Api/Transport/SSEClientTest.php
@@ -17,8 +17,6 @@
 use Hyperf\Odin\Exception\InvalidArgumentException;
 use HyperfTest\Odin\Cases\AbstractTestCase;
 use Mockery;
-use Mockery\MockInterface;
-use Psr\Log\LoggerInterface;
 
 /**
  * @internal
@@ -125,18 +123,7 @@ public function testInvalidJsonHandling()
         fwrite($stream, "data: {invalid json}\n\n");
         rewind($stream);
 
-        // 添加日志记录器以捕获日志
-        /** @var LoggerInterface|MockInterface $logger */
-        $logger = Mockery::mock(LoggerInterface::class);
-        // @phpstan-ignore-next-line
-        $logger->shouldReceive('debug')->once()->with(
-            'Failed to parse JSON data in SSE event',
-            Mockery::on(function ($context) {
-                return isset($context['error']) && isset($context['data']) && $context['data'] === '{invalid json}';
-            })
-        );
-
-        $sseClient = new SSEClient($stream, true, null, null, $logger);
+        $sseClient = new SSEClient($stream);
         $events = iterator_to_array($sseClient->getIterator());
 
         $this->assertCount(1, $events);
@@ -145,7 +132,8 @@ public function testInvalidJsonHandling()
     }
 
     /**
-     * 测试超时检测方法.
+     * 测试超时检测功能.
+     * SSEClient 通过 StreamExceptionDetector 来处理超时检测，而不是直接提供 isTimedOut 方法.
      */
     public function testIsTimedOut()
     {
@@ -153,19 +141,16 @@ public function testIsTimedOut()
         fwrite($stream, "data: test\n\n");
         rewind($stream);
 
-        // 创建SSEClient实例
-        $sseClient = new SSEClient($stream, true, 1); // 1秒超时
-
-        // 初始状态下不应超时
-        $isTimedOut = $this->callNonpublicMethod($sseClient, 'isTimedOut');
-        $this->assertFalse($isTimedOut);
+        // 创建SSEClient实例，通过timeoutConfig传递1秒超时
+        $sseClient = new SSEClient($stream, true, ['stream_total' => 1]);
 
-        // 设置connectionStartTime为过去时间，模拟超时
-        $this->setNonpublicPropertyValue($sseClient, 'connectionStartTime', microtime(true) - 2);
+        // 验证 StreamExceptionDetector 已创建
+        $exceptionDetector = $this->getNonpublicProperty($sseClient, 'exceptionDetector');
+        $this->assertNotNull($exceptionDetector);
 
-        // 现在应该检测到超时
-        $isTimedOut = $this->callNonpublicMethod($sseClient, 'isTimedOut');
-        $this->assertTrue($isTimedOut);
+        // 验证超时配置已正确设置
+        $timeoutConfig = $this->getNonpublicProperty($exceptionDetector, 'timeoutConfig');
+        $this->assertEquals(1.0, $timeoutConfig['total']);
     }
 
     /**
diff --git a/tests/Cases/Api/Transport/StreamExceptionDetectorTest.php b/tests/Cases/Api/Transport/StreamExceptionDetectorTest.php
index 4328809..7e0943a 100644
--- a/tests/Cases/Api/Transport/StreamExceptionDetectorTest.php
+++ b/tests/Cases/Api/Transport/StreamExceptionDetectorTest.php
@@ -17,8 +17,6 @@
 use Hyperf\Odin\Exception\LLMException\Network\LLMThinkingStreamTimeoutException;
 use HyperfTest\Odin\Cases\AbstractTestCase;
 use Mockery;
-use Mockery\MockInterface;
-use Psr\Log\LoggerInterface;
 
 /**
  * @internal
@@ -45,7 +43,7 @@ public function testDefaultConfig()
         // 使用反射检查内部配置
         $config = $this->getNonpublicProperty($detector, 'timeoutConfig');
 
-        $this->assertEquals(300.0, $config['total']);
+        $this->assertEquals(600.0, $config['total']); // 流式处理默认超时更长
         $this->assertEquals(60.0, $config['stream_first']);
         $this->assertEquals(30.0, $config['stream_chunk']);
     }
@@ -142,17 +140,7 @@ public function testChunkIntervalTimeout()
      */
     public function testOnChunkReceived()
     {
-        /** @var LoggerInterface|MockInterface $logger */
-        $logger = Mockery::mock(LoggerInterface::class);
-        // @phpstan-ignore-next-line
-        $logger->shouldReceive('debug')->once()->with(
-            'First chunk received',
-            Mockery::on(function ($context) {
-                return isset($context['initial_response_time']);
-            })
-        );
-
-        $detector = new StreamExceptionDetector([], $logger);
+        $detector = new StreamExceptionDetector([]);
 
         // 设置开始时间
         $startTime = microtime(true) - 1;
diff --git a/tests/Cases/Exception/LLMException/AzureModelErrorTest.php b/tests/Cases/Exception/LLMException/AzureModelErrorTest.php
index 92ffe78..0134488 100644
--- a/tests/Cases/Exception/LLMException/AzureModelErrorTest.php
+++ b/tests/Cases/Exception/LLMException/AzureModelErrorTest.php
@@ -64,8 +64,7 @@ public function testAzureOpenAIModelErrorMapping(): void
         $this->assertEquals(500, $mappedException->getStatusCode());
 
         // 断言异常消息包含有用信息
-        $this->assertStringContainsString('模型生成了无效内容', $mappedException->getMessage());
-        $this->assertStringContainsString('建议修改您的提示词内容', $mappedException->getMessage());
+        $this->assertStringContainsString('Model produced invalid content', $mappedException->getMessage());
     }
 
     /**
@@ -102,8 +101,8 @@ public function testAzureServerErrorHandling(): void
         $this->assertEquals(500, $mappedException->getStatusCode());
 
         // 错误消息应该表明这是可重试的服务错误
-        $this->assertStringContainsString('Azure OpenAI 服务暂时不可用', $mappedException->getMessage());
-        $this->assertStringContainsString('建议稍后重试', $mappedException->getMessage());
+        $this->assertStringContainsString('Azure OpenAI service temporarily unavailable', $mappedException->getMessage());
+        $this->assertStringContainsString('please retry later', $mappedException->getMessage());
     }
 
     /**
diff --git a/tests/Cases/Exception/LLMException/ErrorCodeTest.php b/tests/Cases/Exception/LLMException/ErrorCodeTest.php
index 167fe9d..36644eb 100644
--- a/tests/Cases/Exception/LLMException/ErrorCodeTest.php
+++ b/tests/Cases/Exception/LLMException/ErrorCodeTest.php
@@ -56,7 +56,7 @@ public function testGetMessage()
 
         // 测试未知错误码
         $unknownMessage = ErrorCode::getMessage(999999);
-        $this->assertEquals('未知错误', $unknownMessage);
+        $this->assertEquals('Unknown error', $unknownMessage);
     }
 
     /**
diff --git a/tests/Cases/Exception/LLMException/ErrorMappingManagerTest.php b/tests/Cases/Exception/LLMException/ErrorMappingManagerTest.php
index bcfb94e..8af1e69 100644
--- a/tests/Cases/Exception/LLMException/ErrorMappingManagerTest.php
+++ b/tests/Cases/Exception/LLMException/ErrorMappingManagerTest.php
@@ -95,7 +95,7 @@ public function testMapExceptionGeneric()
         $result = $manager->mapException($exception);
 
         $this->assertInstanceOf(LLMException::class, $result);
-        $this->assertEquals('LLM调用错误: 测试异常', $result->getMessage());
+        $this->assertEquals('LLM invocation error: 测试异常', $result->getMessage());
     }
 
     /**
diff --git a/tests/Cases/Exception/LLMException/ErrorMappingTest.php b/tests/Cases/Exception/LLMException/ErrorMappingTest.php
index 17bcf39..4452a1d 100644
--- a/tests/Cases/Exception/LLMException/ErrorMappingTest.php
+++ b/tests/Cases/Exception/LLMException/ErrorMappingTest.php
@@ -73,7 +73,7 @@ public function testMapException()
         $result = $this->mapper->mapException($exception);
 
         $this->assertInstanceOf(LLMException::class, $result);
-        $this->assertEquals('LLM调用错误: 测试异常', $result->getMessage());
+        $this->assertEquals('LLM invocation error: 测试异常', $result->getMessage());
     }
 
     /**
diff --git a/tests/Cases/Exception/ProxyErrorHandlingTest.php b/tests/Cases/Exception/ProxyErrorHandlingTest.php
new file mode 100644
index 0000000..482dcb7
--- /dev/null
+++ b/tests/Cases/Exception/ProxyErrorHandlingTest.php
@@ -0,0 +1,270 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace HyperfTest\Odin\Cases\Exception;
+
+use GuzzleHttp\Exception\RequestException;
+use GuzzleHttp\Psr7\Request;
+use GuzzleHttp\Psr7\Response;
+use Hyperf\Odin\Exception\LLMException\Api\LLMRateLimitException;
+use Hyperf\Odin\Exception\LLMException\Configuration\LLMInvalidApiKeyException;
+use Hyperf\Odin\Exception\LLMException\ErrorMappingManager;
+use Hyperf\Odin\Exception\LLMException\LLMErrorHandler;
+use Hyperf\Odin\Exception\LLMException\Model\LLMContentFilterException;
+use Hyperf\Odin\Exception\LLMException\Model\LLMContextLengthException;
+use HyperfTest\Odin\Cases\AbstractTestCase;
+
+/**
+ * Test error handling in proxy scenarios.
+ *
+ * @internal
+ * @covers \Hyperf\Odin\Exception\LLMException\ErrorMappingManager
+ * @covers \Hyperf\Odin\Exception\LLMException\LLMErrorHandler
+ */
+class ProxyErrorHandlingTest extends AbstractTestCase
+{
+    /**
+     * Test handling proxy error with nested error structure (OpenAI format).
+     */
+    public function testProxyErrorWithNestedStructure()
+    {
+        $errorResponse = json_encode([
+            'error' => [
+                'message' => 'Context length exceeds model limit',
+                'code' => 4002,
+                'request_id' => '838816451070042112',
+            ],
+        ]);
+
+        $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+        $response = new Response(400, [], $errorResponse);
+        $exception = new RequestException('Client error', $request, $response);
+
+        $errorHandler = new LLMErrorHandler();
+        $mappedException = $errorHandler->handle($exception);
+
+        $this->assertInstanceOf(LLMContextLengthException::class, $mappedException);
+        $this->assertStringContainsString('Context length exceeds model limit', $mappedException->getMessage());
+        $this->assertEquals(4002, $mappedException->getErrorCode());
+    }
+
+    /**
+     * Test handling proxy error with flat structure.
+     */
+    public function testProxyErrorWithFlatStructure()
+    {
+        $errorResponse = json_encode([
+            'code' => 4002,
+            'message' => 'Context length exceeds model limit',
+        ]);
+
+        $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+        $response = new Response(400, [], $errorResponse);
+        $exception = new RequestException('Client error', $request, $response);
+
+        $errorHandler = new LLMErrorHandler();
+        $mappedException = $errorHandler->handle($exception);
+
+        $this->assertInstanceOf(LLMContextLengthException::class, $mappedException);
+        $this->assertStringContainsString('Context length exceeds model limit', $mappedException->getMessage());
+    }
+
+    /**
+     * Test handling proxy rate limit error.
+     */
+    public function testProxyRateLimitError()
+    {
+        $errorResponse = json_encode([
+            'error' => [
+                'message' => 'API rate limit exceeded',
+                'code' => 3001,
+                'request_id' => '838816451070042113',
+            ],
+        ]);
+
+        $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+        $response = new Response(429, ['Retry-After' => '60'], $errorResponse);
+        $exception = new RequestException('Too many requests', $request, $response);
+
+        $errorHandler = new LLMErrorHandler();
+        $mappedException = $errorHandler->handle($exception);
+
+        $this->assertInstanceOf(LLMRateLimitException::class, $mappedException);
+        $this->assertStringContainsString('API rate limit exceeded', $mappedException->getMessage());
+
+        /** @var LLMRateLimitException $mappedException */
+        $this->assertEquals(60, $mappedException->getRetryAfter());
+    }
+
+    /**
+     * Test handling proxy content filter error.
+     */
+    public function testProxyContentFilterError()
+    {
+        $errorResponse = json_encode([
+            'error' => [
+                'message' => 'Content filtered by safety system',
+                'code' => 4001,
+                'request_id' => '838816451070042114',
+            ],
+        ]);
+
+        $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+        $response = new Response(400, [], $errorResponse);
+        $exception = new RequestException('Bad request', $request, $response);
+
+        $errorHandler = new LLMErrorHandler();
+        $mappedException = $errorHandler->handle($exception);
+
+        $this->assertInstanceOf(LLMContentFilterException::class, $mappedException);
+        $this->assertStringContainsString('Content filtered by safety system', $mappedException->getMessage());
+    }
+
+    /**
+     * Test handling proxy authentication error.
+     */
+    public function testProxyAuthenticationError()
+    {
+        $errorResponse = json_encode([
+            'error' => [
+                'message' => 'Invalid or missing API key',
+                'code' => 1001,
+                'request_id' => '838816451070042115',
+            ],
+        ]);
+
+        $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+        $response = new Response(401, [], $errorResponse);
+        $exception = new RequestException('Unauthorized', $request, $response);
+
+        $errorHandler = new LLMErrorHandler();
+        $mappedException = $errorHandler->handle($exception);
+
+        $this->assertInstanceOf(LLMInvalidApiKeyException::class, $mappedException);
+        $this->assertStringContainsString('Invalid or missing API key', $mappedException->getMessage());
+    }
+
+    /**
+     * Test error pattern matching extracts message from response body.
+     */
+    public function testErrorPatternMatchingWithResponseBody()
+    {
+        $errorResponse = json_encode([
+            'error' => [
+                'message' => 'Context length exceeds model limit',
+                'code' => 4002,
+            ],
+        ]);
+
+        $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+        $response = new Response(400, [], $errorResponse);
+        $exception = new RequestException('Some generic error', $request, $response);
+
+        $manager = new ErrorMappingManager();
+        $mappedException = $manager->mapException($exception);
+
+        // Should match based on the message in the response body, not just the exception message
+        $this->assertInstanceOf(LLMContextLengthException::class, $mappedException);
+    }
+
+    /**
+     * Test handling multiple nested proxy layers.
+     */
+    public function testMultipleProxyLayers()
+    {
+        // Simulate an error from a downstream service that's already been formatted by an Odin proxy
+        $errorResponse = json_encode([
+            'error' => [
+                'message' => 'Context length exceeds model limit, current length: 8000, max limit: 4096',
+                'code' => 4002,
+                'type' => 'context_length_exceeded',
+                'request_id' => '838816451070042116',
+            ],
+        ]);
+
+        $request = new Request('POST', 'https://proxy.example.com/v1/chat/completions');
+        $response = new Response(400, [], $errorResponse);
+        $exception = new RequestException('Downstream error', $request, $response);
+
+        $errorHandler = new LLMErrorHandler();
+        $mappedException = $errorHandler->handle($exception);
+
+        $this->assertInstanceOf(LLMContextLengthException::class, $mappedException);
+        $this->assertStringContainsString('Context length exceeds model limit', $mappedException->getMessage());
+
+        // Verify length extraction still works
+        /** @var LLMContextLengthException $mappedException */
+        $this->assertEquals(8000, $mappedException->getCurrentLength());
+        $this->assertEquals(4096, $mappedException->getMaxLength());
+    }
+
+    /**
+     * Test that both Chinese and English error messages are properly recognized (for backward compatibility).
+     */
+    public function testChineseAndEnglishErrorMessageRecognition()
+    {
+        $testCases = [
+            [
+                'message' => 'Context length exceeds model limit',
+                'expectedClass' => LLMContextLengthException::class,
+                'statusCode' => 400,
+            ],
+            [
+                'message' => '上下文长度超出模型限制',
+                'expectedClass' => LLMContextLengthException::class,
+                'statusCode' => 400,
+            ],
+            [
+                'message' => 'API rate limit exceeded',
+                'expectedClass' => LLMRateLimitException::class,
+                'statusCode' => 429,
+            ],
+            [
+                'message' => 'API请求频率超出限制',
+                'expectedClass' => LLMRateLimitException::class,
+                'statusCode' => 429,
+            ],
+            [
+                'message' => 'Content filtered by safety system',
+                'expectedClass' => LLMContentFilterException::class,
+                'statusCode' => 400,
+            ],
+            [
+                'message' => '内容被系统安全过滤',
+                'expectedClass' => LLMContentFilterException::class,
+                'statusCode' => 400,
+            ],
+        ];
+
+        foreach ($testCases as $testCase) {
+            $errorResponse = json_encode([
+                'error' => [
+                    'message' => $testCase['message'],
+                    'code' => 4000,
+                ],
+            ]);
+
+            $request = new Request('POST', 'https://api.example.com/v1/chat/completions');
+            $response = new Response($testCase['statusCode'], [], $errorResponse);
+            $exception = new RequestException('Error', $request, $response);
+
+            $errorHandler = new LLMErrorHandler();
+            $mappedException = $errorHandler->handle($exception);
+
+            $this->assertInstanceOf(
+                $testCase['expectedClass'],
+                $mappedException,
+                "Failed to recognize message: {$testCase['message']}"
+            );
+        }
+    }
+}
diff --git a/tests/Cases/Model/AbstractModelTest.php b/tests/Cases/Model/AbstractModelTest.php
index 09faa1b..f288a23 100644
--- a/tests/Cases/Model/AbstractModelTest.php
+++ b/tests/Cases/Model/AbstractModelTest.php
@@ -13,9 +13,13 @@
 namespace HyperfTest\Odin\Cases\Model;
 
 use Hyperf\Odin\Api\Request\ChatCompletionRequest;
+use Hyperf\Odin\Api\Request\CompletionRequest;
+use Hyperf\Odin\Api\Request\EmbeddingRequest;
 use Hyperf\Odin\Api\RequestOptions\ApiOptions;
 use Hyperf\Odin\Api\Response\ChatCompletionResponse;
 use Hyperf\Odin\Api\Response\ChatCompletionStreamResponse;
+use Hyperf\Odin\Api\Response\EmbeddingResponse;
+use Hyperf\Odin\Api\Response\TextCompletionResponse;
 use Hyperf\Odin\Contract\Api\ClientInterface;
 use Hyperf\Odin\Exception\LLMException\Model\LLMFunctionCallNotSupportedException;
 use Hyperf\Odin\Model\AbstractModel;
@@ -60,6 +64,16 @@ public function chatCompletionsStream(ChatCompletionRequest $chatRequest): ChatC
 
                 return new ChatCompletionStreamResponse($response);
             }
+
+            public function embeddings(EmbeddingRequest $embeddingRequest): EmbeddingResponse
+            {
+                // TODO: Implement embeddings() method.
+            }
+
+            public function completions(CompletionRequest $completionRequest): TextCompletionResponse
+            {
+                // TODO: Implement completions() method.
+            }
         };
     }
 }
diff --git a/tests/Cases/Model/ModelOptionsTest.php b/tests/Cases/Model/ModelOptionsTest.php
index 628576c..3f67d50 100644
--- a/tests/Cases/Model/ModelOptionsTest.php
+++ b/tests/Cases/Model/ModelOptionsTest.php
@@ -97,6 +97,9 @@ public function testToArray()
             'function_call' => true,
             'vector_size' => 1536,
             'fixed_temperature' => null, // 未设置时为 null
+            'default_temperature' => null,
+            'max_tokens' => null,
+            'max_output_tokens' => null,
         ];
 
         $this->assertIsArray($array);
diff --git a/tests/Cases/Model/OpenAIModelTest.php b/tests/Cases/Model/OpenAIModelTest.php
index b12b663..a5e9ca7 100644
--- a/tests/Cases/Model/OpenAIModelTest.php
+++ b/tests/Cases/Model/OpenAIModelTest.php
@@ -50,15 +50,17 @@ public function testGetApiVersionPath()
      */
     public function testGetClient()
     {
-        // 使用 Mockery 替换 ClientFactory::createOpenAIClient 方法
+        // 使用 Mockery 替换 ClientFactory::createClient 方法
         $clientMock = Mockery::mock(ClientInterface::class);
 
         $clientFactoryMock = Mockery::mock('alias:' . ClientFactory::class);
-        $clientFactoryMock->shouldReceive('createOpenAIClient')
+        $clientFactoryMock->shouldReceive('createClient')
             ->once()
-            ->withArgs(function ($config, $apiOptions, $logger) {
-                // 验证 base_url 是否包含 API 版本路径
-                return isset($config['base_url']) && str_contains($config['base_url'], '/v1');
+            ->withArgs(function ($provider, $config, $apiOptions, $logger) {
+                // 验证 provider 是 'openai' 并且 base_url 包含 API 版本路径
+                return $provider === 'openai'
+                    && isset($config['base_url'])
+                    && str_contains($config['base_url'], '/v1');
             })
             ->andReturn($clientMock);
 
diff --git a/tests/Cases/Utils/LogUtilTest.php b/tests/Cases/Utils/LogUtilTest.php
index 46ec164..02505b0 100644
--- a/tests/Cases/Utils/LogUtilTest.php
+++ b/tests/Cases/Utils/LogUtilTest.php
@@ -82,6 +82,76 @@ public function testFormatLongTextWithBase64Image()
         $this->assertEquals('[Base64 Image]', $result['image']);
     }
 
+    public function testFormatLongTextWithCustomMaxLength()
+    {
+        $text500 = str_repeat('a', 500);
+        $text1500 = str_repeat('b', 1500);
+        $data = [
+            'short_text' => $text500,
+            'long_text' => $text1500,
+        ];
+
+        // Test with custom max length of 1000
+        $result = LogUtil::formatLongText($data, 1000);
+
+        $this->assertIsArray($result);
+        $this->assertEquals($text500, $result['short_text']); // 500 < 1000, should keep original
+        $this->assertEquals('[Long Text]', $result['long_text']); // 1500 > 1000, should be replaced
+    }
+
+    public function testFormatLongTextWithZeroMaxLength()
+    {
+        $veryLongText = str_repeat('x', 10000); // 10000 characters
+        $data = [
+            'model_id' => 'gpt-4o',
+            'content' => $veryLongText,
+        ];
+
+        // Test with max length of 0 (no limit)
+        $result = LogUtil::formatLongText($data, 0);
+
+        $this->assertIsArray($result);
+        $this->assertEquals('gpt-4o', $result['model_id']);
+        $this->assertEquals($veryLongText, $result['content']); // Should keep the full text
+    }
+
+    public function testFilterAndFormatLogDataWithCustomMaxLength()
+    {
+        $text500 = str_repeat('a', 500);
+        $text1500 = str_repeat('b', 1500);
+        $logData = [
+            'model_id' => 'gpt-4o',
+            'short_content' => $text500,
+            'long_content' => $text1500,
+        ];
+        $whitelistFields = ['model_id', 'short_content', 'long_content'];
+
+        // Test with custom max length of 1000
+        $result = LogUtil::filterAndFormatLogData($logData, $whitelistFields, true, 1000);
+
+        $this->assertIsArray($result);
+        $this->assertEquals('gpt-4o', $result['model_id']);
+        $this->assertEquals($text500, $result['short_content']); // 500 < 1000
+        $this->assertEquals('[Long Text]', $result['long_content']); // 1500 > 1000
+    }
+
+    public function testFilterAndFormatLogDataWithZeroMaxLength()
+    {
+        $veryLongText = str_repeat('x', 10000);
+        $logData = [
+            'model_id' => 'gpt-4o',
+            'content' => $veryLongText,
+        ];
+        $whitelistFields = ['model_id', 'content'];
+
+        // Test with max length of 0 (no limit)
+        $result = LogUtil::filterAndFormatLogData($logData, $whitelistFields, true, 0);
+
+        $this->assertIsArray($result);
+        $this->assertEquals('gpt-4o', $result['model_id']);
+        $this->assertEquals($veryLongText, $result['content']); // Should keep the full text
+    }
+
     public function testFilterAndFormatLogDataWithoutWhitelist()
     {
         $logData = [
diff --git a/tests/Cases/Utils/LoggingConfigHelperTest.php b/tests/Cases/Utils/LoggingConfigHelperTest.php
index e853a5f..8c20f52 100644
--- a/tests/Cases/Utils/LoggingConfigHelperTest.php
+++ b/tests/Cases/Utils/LoggingConfigHelperTest.php
@@ -163,6 +163,54 @@ public function testIsWhitelistEnabledWithConfigException()
         $this->assertFalse($enabled);
     }
 
+    public function testGetMaxTextLengthWithCustomValue()
+    {
+        $mockConfig = $this->createMockConfig([
+            'odin.llm.general_api_options.logging.max_text_length' => 5000,
+        ]);
+        $this->setMockContainer($mockConfig);
+
+        $maxLength = LoggingConfigHelper::getMaxTextLength();
+
+        $this->assertEquals(5000, $maxLength);
+    }
+
+    public function testGetMaxTextLengthWithZeroValue()
+    {
+        $mockConfig = $this->createMockConfig([
+            'odin.llm.general_api_options.logging.max_text_length' => 0,
+        ]);
+        $this->setMockContainer($mockConfig);
+
+        $maxLength = LoggingConfigHelper::getMaxTextLength();
+
+        $this->assertEquals(0, $maxLength);
+    }
+
+    public function testGetMaxTextLengthWithDefaultValue()
+    {
+        $mockConfig = $this->createMockConfig([]);
+        $this->setMockContainer($mockConfig);
+
+        $maxLength = LoggingConfigHelper::getMaxTextLength();
+
+        $this->assertEquals(2000, $maxLength);
+    }
+
+    public function testGetMaxTextLengthWithConfigException()
+    {
+        $mockContainer = $this->createMock(ContainerInterface::class);
+        $mockContainer->method('get')
+            ->with(ConfigInterface::class)
+            ->willThrowException(new RuntimeException('Config not available'));
+
+        ApplicationContext::setContainer($mockContainer);
+
+        $maxLength = LoggingConfigHelper::getMaxTextLength();
+
+        $this->assertEquals(2000, $maxLength);
+    }
+
     public function testFilterAndFormatLogDataWithEnabledWhitelist()
     {
         $mockConfig = $this->createMockConfig([
@@ -260,6 +308,55 @@ public function testFilterAndFormatLogDataWithComplexDataAndFormatting()
         $this->assertArrayNotHasKey('duration_ms', $result);
     }
 
+    public function testFilterAndFormatLogDataWithCustomMaxTextLength()
+    {
+        $mockConfig = $this->createMockConfig([
+            'odin.llm.general_api_options.logging.whitelist_fields' => ['model_id', 'short_content', 'long_content'],
+            'odin.llm.general_api_options.logging.enable_whitelist' => true,
+            'odin.llm.general_api_options.logging.max_text_length' => 1000,
+        ]);
+        $this->setMockContainer($mockConfig);
+
+        $text500 = str_repeat('a', 500);
+        $text1500 = str_repeat('b', 1500);
+        $logData = [
+            'model_id' => 'gpt-4o',
+            'short_content' => $text500,
+            'long_content' => $text1500,
+        ];
+
+        $result = LoggingConfigHelper::filterAndFormatLogData($logData);
+
+        $this->assertIsArray($result);
+        $this->assertCount(3, $result);
+        $this->assertEquals('gpt-4o', $result['model_id']);
+        $this->assertEquals($text500, $result['short_content']); // 500 < 1000
+        $this->assertEquals('[Long Text]', $result['long_content']); // 1500 > 1000
+    }
+
+    public function testFilterAndFormatLogDataWithZeroMaxTextLength()
+    {
+        $mockConfig = $this->createMockConfig([
+            'odin.llm.general_api_options.logging.whitelist_fields' => ['model_id', 'content'],
+            'odin.llm.general_api_options.logging.enable_whitelist' => true,
+            'odin.llm.general_api_options.logging.max_text_length' => 0,
+        ]);
+        $this->setMockContainer($mockConfig);
+
+        $veryLongText = str_repeat('x', 10000);
+        $logData = [
+            'model_id' => 'gpt-4o',
+            'content' => $veryLongText,
+        ];
+
+        $result = LoggingConfigHelper::filterAndFormatLogData($logData);
+
+        $this->assertIsArray($result);
+        $this->assertCount(2, $result);
+        $this->assertEquals('gpt-4o', $result['model_id']);
+        $this->assertEquals($veryLongText, $result['content']); // Should keep the full text when max_text_length is 0
+    }
+
     public function testFilterAndFormatLogDataWithConfigException()
     {
         $mockContainer = $this->createMock(ContainerInterface::class);
diff --git a/tests/Mock/Cache.php b/tests/Mock/Cache.php
index 5a47ada..d70a4a2 100644
--- a/tests/Mock/Cache.php
+++ b/tests/Mock/Cache.php
@@ -37,7 +37,7 @@ public function get(string $key, mixed $default = null): mixed
         return $default;
     }
 
-    public function set(string $key, mixed $value, null|DateInterval|int $ttl = null): bool
+    public function set(string $key, mixed $value, DateInterval|int|null $ttl = null): bool
     {
         $this->storage[$key] = $value;
 
@@ -85,7 +85,7 @@ public function getMultiple(iterable $keys, mixed $default = null): iterable
         return $result;
     }
 
-    public function setMultiple(iterable $values, null|DateInterval|int $ttl = null): bool
+    public function setMultiple(iterable $values, DateInterval|int|null $ttl = null): bool
     {
         $success = true;
         foreach ($values as $key => $value) {
diff --git a/tests/Mock/StdoutLogger.php b/tests/Mock/StdoutLogger.php
new file mode 100644
index 0000000..7c5e928
--- /dev/null
+++ b/tests/Mock/StdoutLogger.php
@@ -0,0 +1,18 @@
+<?php
+
+declare(strict_types=1);
+/**
+ * This file is part of Hyperf.
+ *
+ * @link     https://www.hyperf.io
+ * @document https://hyperf.wiki
+ * @contact  group@hyperf.io
+ * @license  https://github.com/hyperf/hyperf/blob/master/LICENSE
+ */
+
+namespace HyperfTest\Odin\Mock;
+
+use Hyperf\Contract\StdoutLoggerInterface;
+use Hyperf\Odin\Logger;
+
+class StdoutLogger extends Logger implements StdoutLoggerInterface {}