diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 2dd90d8a74..126f53ce4d 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -912,7 +912,7 @@ export class Agent< } // If cache matched but yamlWorkflow is empty, fall through to normal execution - const imagesIncludeCount: number | undefined = deepThink ? undefined : 2; + const imagesIncludeCount: number = deepThink ? 2 : 1; const { output: actionOutput } = await this.taskExecutor.action( taskPrompt, modelConfigForPlanning, diff --git a/packages/core/src/ai-model/conversation-history.ts b/packages/core/src/ai-model/conversation-history.ts index 5bc7130c41..b8d21fc052 100644 --- a/packages/core/src/ai-model/conversation-history.ts +++ b/packages/core/src/ai-model/conversation-history.ts @@ -9,6 +9,7 @@ export class ConversationHistory { private readonly messages: ChatCompletionMessageParam[] = []; private subGoals: SubGoal[] = []; private memories: string[] = []; + private historicalLogs: string[] = []; public pendingFeedbackMessage: string; @@ -38,6 +39,9 @@ export class ConversationHistory { reset() { this.messages.length = 0; + this.memories.length = 0; + this.subGoals.length = 0; + this.historicalLogs.length = 0; } /** @@ -106,7 +110,8 @@ export class ConversationHistory { } /** - * Update a single sub-goal by index + * Update a single sub-goal by index. + * Clears logs if status or description actually changes. * @returns true if the sub-goal was found and updated, false otherwise */ updateSubGoal( @@ -118,23 +123,36 @@ export class ConversationHistory { return false; } - if (updates.status !== undefined) { + let changed = false; + + if (updates.status !== undefined && updates.status !== goal.status) { goal.status = updates.status; + changed = true; } - if (updates.description !== undefined) { + if ( + updates.description !== undefined && + updates.description !== goal.description + ) { goal.description = updates.description; + changed = true; + } + + if (changed) { + goal.logs = []; } return true; } /** - * Mark the first pending sub-goal as running + * Mark the first pending sub-goal as running. + * Clears logs since status changes. */ markFirstPendingAsRunning(): void { const firstPending = this.subGoals.find((g) => g.status === 'pending'); if (firstPending) { firstPending.status = 'running'; + firstPending.logs = []; } } @@ -152,16 +170,38 @@ export class ConversationHistory { } /** - * Mark all sub-goals as finished + * Mark all sub-goals as finished. + * Clears logs for any goal whose status actually changes. */ markAllSubGoalsFinished(): void { for (const goal of this.subGoals) { + if (goal.status !== 'finished') { + goal.logs = []; + } goal.status = 'finished'; } } /** - * Convert sub-goals to text representation + * Append a log entry to the currently running sub-goal. + * The log describes an action performed while working on the sub-goal. + */ + appendSubGoalLog(log: string): void { + if (!log) { + return; + } + const runningGoal = this.subGoals.find((g) => g.status === 'running'); + if (runningGoal) { + if (!runningGoal.logs) { + runningGoal.logs = []; + } + runningGoal.logs.push(log); + } + } + + /** + * Convert sub-goals to text representation. + * Includes actions performed (logs) for the current sub-goal. */ subGoalsToText(): string { if (this.subGoals.length === 0) { @@ -176,13 +216,44 @@ export class ConversationHistory { const currentGoal = this.subGoals.find((goal) => goal.status === 'running') || this.subGoals.find((goal) => goal.status === 'pending'); - const currentGoalText = currentGoal - ? `\nCurrent sub-goal is: ${currentGoal.description}` - : ''; + + let currentGoalText = ''; + if (currentGoal) { + currentGoalText = `\nCurrent sub-goal is: ${currentGoal.description}`; + if (currentGoal.logs && currentGoal.logs.length > 0) { + const logLines = currentGoal.logs.map((log) => `- ${log}`).join('\n'); + currentGoalText += `\nActions performed for current sub-goal:\n${logLines}`; + } + } return `Sub-goals:\n${lines.join('\n')}${currentGoalText}`; } + // Historical log management methods (used in non-deepThink mode) + + /** + * Append a log entry to the historical logs list. + * Used in non-deepThink mode to track executed steps across planning rounds. + */ + appendHistoricalLog(log: string): void { + if (log) { + this.historicalLogs.push(log); + } + } + + /** + * Convert historical logs to text representation. + * Provides context about previously executed steps to the model. + */ + historicalLogsToText(): string { + if (this.historicalLogs.length === 0) { + return ''; + } + + const logLines = this.historicalLogs.map((log) => `- ${log}`).join('\n'); + return `Here are the steps that have been executed:\n${logLines}`; + } + // Memory management methods /** diff --git a/packages/core/src/ai-model/llm-planning.ts b/packages/core/src/ai-model/llm-planning.ts index f74ad31cf7..1a6262020b 100644 --- a/packages/core/src/ai-model/llm-planning.ts +++ b/packages/core/src/ai-model/llm-planning.ts @@ -165,10 +165,12 @@ export async function plan( let latestFeedbackMessage: ChatCompletionMessageParam; - // Build sub-goal status text to include in the message (only when deepThink is enabled) + // Build sub-goal status text to include in the message + // In deepThink mode: show full sub-goals with logs + // In non-deepThink mode: show historical execution logs const subGoalsText = includeSubGoals ? conversationHistory.subGoalsToText() - : ''; + : conversationHistory.historicalLogsToText(); const subGoalsSection = subGoalsText ? `\n\n${subGoalsText}` : ''; // Build memories text to include in the message @@ -318,6 +320,15 @@ export async function plan( conversationHistory.markSubGoalFinished(index); } } + // Append the planning log to the currently running sub-goal + if (planFromAI.log) { + conversationHistory.appendSubGoalLog(planFromAI.log); + } + } else { + // In non-deepThink mode, accumulate logs as historical execution steps + if (planFromAI.log) { + conversationHistory.appendHistoricalLog(planFromAI.log); + } } // Append memory to conversation history if present diff --git a/packages/core/src/ai-model/prompt/llm-planning.ts b/packages/core/src/ai-model/prompt/llm-planning.ts index 90bf4c59e5..78f1a1e8f5 100644 --- a/packages/core/src/ai-model/prompt/llm-planning.ts +++ b/packages/core/src/ai-model/prompt/llm-planning.ts @@ -482,7 +482,10 @@ The previous action has been executed, here is the latest screenshot. Please con Sub-goals: 1. Fill in the Name field with 'John' (running) 2. Fill in the Email field with 'john@example.com' (pending) +3. Return the filled email address (pending) Current sub-goal is: Fill in the Name field with 'John' +Actions performed for current sub-goal: +- Click on the Name field to start filling the form **Screenshot:** [Shows the form with Name field now focused/active] @@ -504,7 +507,11 @@ The previous action has been executed, here is the latest screenshot. Please con Sub-goals: 1. Fill in the Name field with 'John' (running) 2. Fill in the Email field with 'john@example.com' (pending) +3. Return the filled email address (pending) Current sub-goal is: Fill in the Name field with 'John' +Actions performed for current sub-goal: +- Click on the Name field to start filling the form +- Typing 'John' into the Name field **Screenshot:** [Shows the form with Name field containing 'John'] @@ -530,7 +537,10 @@ The previous action has been executed, here is the latest screenshot. Please con Sub-goals: 1. Fill in the Name field with 'John' (finished) 2. Fill in the Email field with 'john@example.com' (running) +3. Return the filled email address (pending) Current sub-goal is: Fill in the Email field with 'john@example.com' +Actions performed for current sub-goal: +- Moving to the Email field **Screenshot:** [Shows the form with Name='John' and Email field focused] @@ -554,6 +564,9 @@ Sub-goals: 2. Fill in the Email field with 'john@example.com' (running) 3. Return the filled email address (pending) Current sub-goal is: Fill in the Email field with 'john@example.com' +Actions performed for current sub-goal: +- Moving to the Email field +- Typing email address into the Email field **Screenshot:** [Shows the form with Name='John' and Email='john@example.com'] diff --git a/packages/core/src/ai-model/service-caller/index.ts b/packages/core/src/ai-model/service-caller/index.ts index 92761b8440..2c9da80ebd 100644 --- a/packages/core/src/ai-model/service-caller/index.ts +++ b/packages/core/src/ai-model/service-caller/index.ts @@ -420,14 +420,26 @@ export async function callAI( } content = result.choices[0].message.content!; - if (!content) { - throw new Error('empty content from AI model'); - } - accumulatedReasoning = (result.choices[0].message as any)?.reasoning_content || ''; usage = result.usage; requestId = result._request_id; + + if ( + !content && + accumulatedReasoning && + modelFamily === 'doubao-vision' + ) { + console.warn( + 'empty content from AI model, using reasoning content', + ); + content = accumulatedReasoning; + } + + if (!content) { + throw new Error('empty content from AI model'); + } + break; // Success, exit retry loop } catch (error) { lastError = error as Error; diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 1f74f6fd4b..966c911879 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -252,6 +252,7 @@ export interface SubGoal { index: number; status: SubGoalStatus; description: string; + logs?: string[]; } export interface RawResponsePlanningAIResponse { diff --git a/packages/core/tests/unit-test/conversation-history.test.ts b/packages/core/tests/unit-test/conversation-history.test.ts index 8e3372b6ca..9a9d59dfe3 100644 --- a/packages/core/tests/unit-test/conversation-history.test.ts +++ b/packages/core/tests/unit-test/conversation-history.test.ts @@ -40,6 +40,30 @@ describe('ConversationHistory', () => { expect(history.length).toBe(2); }); + it('reset clears messages, memories, and subGoals', () => { + const history = new ConversationHistory(); + history.append(userMessage('msg1')); + history.append(assistantMessage('msg2')); + history.appendMemory('Memory from task 1'); + history.appendMemory('Another memory'); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Sub-goal 1' }, + { index: 2, status: 'pending', description: 'Sub-goal 2' }, + ]); + + history.appendHistoricalLog('Step 1'); + history.appendHistoricalLog('Step 2'); + + history.reset(); + + expect(history.length).toBe(0); + expect(history.snapshot()).toEqual([]); + expect(history.getMemories()).toEqual([]); + expect(history.memoriesToText()).toBe(''); + expect(history.subGoalsToText()).toBe(''); + expect(history.historicalLogsToText()).toBe(''); + }); + it('clears pending feedback message only when set', () => { const history = new ConversationHistory(); @@ -335,6 +359,313 @@ describe('ConversationHistory', () => { `); }); + // Sub-goal log tracking tests + + it('appends log to the currently running sub-goal', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Task 1' }, + { index: 2, status: 'pending', description: 'Task 2' }, + ]); + + // Task 1 is automatically running + history.appendSubGoalLog('Clicked login button'); + history.appendSubGoalLog('Typed username'); + + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. Task 1 (running) + 2. Task 2 (pending) + Current sub-goal is: Task 1 + Actions performed for current sub-goal: + - Clicked login button + - Typed username" + `); + }); + + it('ignores empty log strings', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Task 1' }, + ]); + + history.appendSubGoalLog(''); + history.appendSubGoalLog('Valid log'); + + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. Task 1 (running) + Current sub-goal is: Task 1 + Actions performed for current sub-goal: + - Valid log" + `); + }); + + it('does nothing when appending log with no running sub-goal', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'finished', description: 'Task 1' }, + { index: 2, status: 'finished', description: 'Task 2' }, + ]); + + history.appendSubGoalLog('Some log'); + + // No running goal, so no logs appear + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. Task 1 (finished) + 2. Task 2 (finished)" + `); + }); + + it('clears logs when sub-goal status changes via markSubGoalFinished', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Task 1' }, + { index: 2, status: 'pending', description: 'Task 2' }, + ]); + + // Append logs to Task 1 (running) + history.appendSubGoalLog('Step A'); + history.appendSubGoalLog('Step B'); + + // Mark Task 1 finished -> Task 2 becomes running (logs cleared for both) + history.markSubGoalFinished(1); + + // Task 2 is now running with no logs + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. Task 1 (finished) + 2. Task 2 (running) + Current sub-goal is: Task 2" + `); + }); + + it('clears logs when sub-goal description changes via updateSubGoal', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Task 1' }, + ]); + + history.appendSubGoalLog('Did something'); + + // Update description -> logs should be cleared + history.updateSubGoal(1, { description: 'Updated Task 1' }); + + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. Updated Task 1 (running) + Current sub-goal is: Updated Task 1" + `); + }); + + it('preserves logs when updateSubGoal sets same values', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Task 1' }, + ]); + + history.appendSubGoalLog('Did something'); + + // Update with same status and description -> no change, logs preserved + history.updateSubGoal(1, { status: 'running', description: 'Task 1' }); + + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. Task 1 (running) + Current sub-goal is: Task 1 + Actions performed for current sub-goal: + - Did something" + `); + }); + + it('clears logs when setSubGoals replaces all sub-goals', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Old task' }, + ]); + + history.appendSubGoalLog('Old log'); + + // Replace all sub-goals + history.setSubGoals([ + { index: 1, status: 'pending', description: 'New task' }, + ]); + + // New sub-goals start with no logs + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. New task (running) + Current sub-goal is: New task" + `); + }); + + it('clears logs for non-finished goals when markAllSubGoalsFinished is called', () => { + const history = new ConversationHistory(); + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Task 1' }, + { index: 2, status: 'pending', description: 'Task 2' }, + ]); + + history.appendSubGoalLog('Some work'); + history.markAllSubGoalsFinished(); + + // All finished, no current goal, no logs shown + expect(history.subGoalsToText()).toMatchInlineSnapshot(` + "Sub-goals: + 1. Task 1 (finished) + 2. Task 2 (finished)" + `); + }); + + // Historical log management tests (non-deepThink mode) + + it('initializes with empty historical logs', () => { + const history = new ConversationHistory(); + expect(history.historicalLogsToText()).toBe(''); + }); + + it('appends historical logs', () => { + const history = new ConversationHistory(); + history.appendHistoricalLog('Clicked the login button'); + history.appendHistoricalLog('Typed username into the input'); + + expect(history.historicalLogsToText()).toMatchInlineSnapshot(` + "Here are the steps that have been executed: + - Clicked the login button + - Typed username into the input" + `); + }); + + it('ignores empty historical log strings', () => { + const history = new ConversationHistory(); + history.appendHistoricalLog(''); + history.appendHistoricalLog('Valid step'); + history.appendHistoricalLog(''); + + expect(history.historicalLogsToText()).toMatchInlineSnapshot(` + "Here are the steps that have been executed: + - Valid step" + `); + }); + + it('accumulates historical logs across multiple rounds', () => { + const history = new ConversationHistory(); + history.appendHistoricalLog('Step 1: Navigated to page'); + history.appendHistoricalLog('Step 2: Clicked search button'); + history.appendHistoricalLog('Step 3: Entered search query'); + + expect(history.historicalLogsToText()).toMatchInlineSnapshot(` + "Here are the steps that have been executed: + - Step 1: Navigated to page + - Step 2: Clicked search button + - Step 3: Entered search query" + `); + }); + + it('historical logs are independent from sub-goal logs', () => { + const history = new ConversationHistory(); + + // Set up sub-goals (deepThink mode scenario) + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Task 1' }, + ]); + history.appendSubGoalLog('Sub-goal log entry'); + + // Also add historical logs (non-deepThink mode scenario) + history.appendHistoricalLog('Historical log entry'); + + // Both should be independently tracked + expect(history.subGoalsToText()).toContain('Sub-goal log entry'); + expect(history.historicalLogsToText()).toContain('Historical log entry'); + expect(history.historicalLogsToText()).not.toContain('Sub-goal log entry'); + expect(history.subGoalsToText()).not.toContain('Historical log entry'); + }); + + it('seed() clears historical logs', () => { + const history = new ConversationHistory(); + history.appendHistoricalLog('Step before seed'); + history.appendHistoricalLog('Another step'); + + history.seed([userMessage('fresh start')]); + + expect(history.historicalLogsToText()).toBe(''); + expect(history.length).toBe(1); + }); + + it('compressHistory does not affect historical logs', () => { + const history = new ConversationHistory(); + for (let i = 1; i <= 10; i++) { + history.append(userMessage(`msg${i}`)); + } + history.appendHistoricalLog('Step 1: Clicked button'); + history.appendHistoricalLog('Step 2: Typed text'); + + history.compressHistory(5, 3); + + // Messages compressed, but historical logs remain intact + expect(history.length).toBe(4); // 1 placeholder + 3 kept + expect(history.historicalLogsToText()).toMatchInlineSnapshot(` + "Here are the steps that have been executed: + - Step 1: Clicked button + - Step 2: Typed text" + `); + }); + + it('clearMemories does not affect historical logs', () => { + const history = new ConversationHistory(); + history.appendMemory('Some memory'); + history.appendHistoricalLog('Some log'); + + history.clearMemories(); + + expect(history.getMemories()).toEqual([]); + expect(history.historicalLogsToText()).toMatchInlineSnapshot(` + "Here are the steps that have been executed: + - Some log" + `); + }); + + it('historical logs persist through sub-goal lifecycle operations', () => { + const history = new ConversationHistory(); + history.appendHistoricalLog('Early step'); + + // setSubGoals should not affect historical logs + history.setSubGoals([ + { index: 1, status: 'pending', description: 'Goal 1' }, + { index: 2, status: 'pending', description: 'Goal 2' }, + ]); + expect(history.historicalLogsToText()).toContain('Early step'); + + history.appendHistoricalLog('Mid step'); + + // markSubGoalFinished should not affect historical logs + history.markSubGoalFinished(1); + expect(history.historicalLogsToText()).toContain('Early step'); + expect(history.historicalLogsToText()).toContain('Mid step'); + + history.appendHistoricalLog('Late step'); + + // markAllSubGoalsFinished should not affect historical logs + history.markAllSubGoalsFinished(); + expect(history.historicalLogsToText()).toMatchInlineSnapshot(` + "Here are the steps that have been executed: + - Early step + - Mid step + - Late step" + `); + }); + + it('historicalLogsToText formats single log correctly', () => { + const history = new ConversationHistory(); + history.appendHistoricalLog('Only one step'); + + expect(history.historicalLogsToText()).toMatchInlineSnapshot(` + "Here are the steps that have been executed: + - Only one step" + `); + }); + // Memory management tests it('initializes with empty memories', () => { diff --git a/packages/web-integration/tests/ai/web/puppeteer/skipped.test.ts b/packages/web-integration/tests/ai/web/puppeteer/skipped.test.ts index 65b8221037..d9487c8961 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/skipped.test.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/skipped.test.ts @@ -33,6 +33,9 @@ describe( await ctx.agent.aiAct( '在当前页面里完成这个任务:完成 github 账号注册的表单填写。地区必须选择「加拿大」。确保表单上没有遗漏的字段,确保所有的表单项能够通过校验。 只需要填写表单项即可,不需要发起真实的账号注册。 最终请返回表单上实际填写的字段内容。', + { + deepThink: true, + }, ); }, LONG_TEST_TIMEOUT,