Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion apps/report/src/components/detail-side/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,57 @@ const renderElementDetailBox = (_value: LocateResultElement) => {
);
};

const renderEmptyElementBox = (label?: string) => (
<div className="element-detail-box">
<div className="element-detail-line">
{label ? `${label} ` : null}
<Tag bordered={false} color="default">
Not Found
</Tag>
</div>
<div className="element-detail-line element-detail-coords">
center=-, rect=-
</div>
</div>
);

const renderElementList = (
items: Array<LocateResultElement | null | undefined>,
labels?: string[],
) => (
<div>
{items.map((item, index) => (
<div key={index}>
{isElementField(item)
? renderElementDetailBox(item)
: renderEmptyElementBox(labels?.[index])}
</div>
))}
</div>
);

const locateParamLabel = (param: any): string | undefined => {
if (!param) {
return undefined;
}

if (typeof param === 'string') {
return param;
}

if (typeof param === 'object') {
if (typeof param.prompt === 'string') {
return param.prompt;
}

if (typeof param.prompt === 'object' && param.prompt?.prompt) {
return param.prompt.prompt;
}
}

return undefined;
};

// Helper function to render content with element detection
const renderMetaContent = (
content: string | JSX.Element,
Expand Down Expand Up @@ -968,7 +1019,39 @@ const DetailSide = (): JSX.Element => {
}

// Handle output data
if (typeof data === 'object' && data !== null && !Array.isArray(data)) {
const isLocateArrayOutput =
(task?.subType === 'LocateAll' || task?.subType === 'LocateMultiple') &&
Array.isArray(data) &&
data.some((item) => isElementField(item));

if (isLocateArrayOutput) {
let locateLabels: string[] | undefined;
if (task?.subType === 'LocateMultiple' && Array.isArray(task.param)) {
locateLabels = task.param.map(
(param) => locateParamLabel(param) || '',
);
} else if (task?.subType === 'LocateAll') {
const label = locateParamLabel(task.param);
if (label) {
locateLabels = data.map(() => label);
}
}

outputItems.push(
<Card
key="output"
liteMode={true}
onMouseEnter={noop}
onMouseLeave={noop}
title="output"
content={renderElementList(data, locateLabels)}
/>,
);
} else if (
typeof data === 'object' &&
data !== null &&
!Array.isArray(data)
) {
// For object output, create a Card for each field
Object.entries(data).forEach(([key, value]) => {
let content: JSX.Element;
Expand Down
110 changes: 108 additions & 2 deletions packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1134,8 +1134,68 @@ export class Agent<
return verifyResult;
}

async aiLocate(prompt: TUserPrompt, opt?: LocateOption) {
const locateParam = buildDetailedLocateParam(prompt, opt);
async aiLocate(
prompt: TUserPrompt,
opt?: LocateOption,
): Promise<
Pick<LocateResultElement, 'rect' | 'center'> & {
dpr?: number;
}
>;
async aiLocate(
prompts: TUserPrompt[],
opt?: LocateOption,
): Promise<
Array<{
rect?: Rect;
center?: [number, number];
dpr?: number;
}>
>;
async aiLocate(
promptOrPrompts: TUserPrompt | TUserPrompt[],
opt?: LocateOption,
) {
if (Array.isArray(promptOrPrompts)) {
const detailedParams = promptOrPrompts.map((prompt) =>
buildDetailedLocateParam(prompt, opt),
);

const plan = {
type: 'LocateMultiple',
param: detailedParams,
thought: '',
};

const defaultIntentModelConfig =
this.modelConfigManager.getModelConfig('default');
const modelConfigForPlanning =
this.modelConfigManager.getModelConfig('planning');

const { output } = await this.taskExecutor.runPlans(
`Locate - ${promptOrPrompts.length} ${promptOrPrompts.length === 1 ? 'element' : 'elements'}`,
[plan],
modelConfigForPlanning,
defaultIntentModelConfig,
);

const dprValue = await (this.interface.size() as any).dpr;
return (output || []).map((result: any) => {
if (!result) {
return {
rect: undefined,
center: undefined,
} as any;
}
return {
rect: result.rect,
center: result.center,
dpr: dprValue,
};
});
}

const locateParam = buildDetailedLocateParam(promptOrPrompts, opt);
assert(locateParam, 'cannot get locate param for aiLocate');
const locatePlan = locatePlanForLocate(locateParam);
const plans = [locatePlan];
Expand Down Expand Up @@ -1168,6 +1228,52 @@ export class Agent<
};
}

async aiLocateAll(
prompt: TUserPrompt,
opt?: LocateOption,
): Promise<
Array<{
rect?: Rect;
center?: [number, number];
dpr?: number;
}>
> {
const detailedParam = buildDetailedLocateParam(prompt, opt);
assert(detailedParam, 'cannot get locate param for aiLocateAll');
const plan = {
type: 'LocateAll',
param: detailedParam,
thought: '',
};

const defaultIntentModelConfig =
this.modelConfigManager.getModelConfig('default');
const modelConfigForPlanning =
this.modelConfigManager.getModelConfig('planning');

const { output } = await this.taskExecutor.runPlans(
`Locate - ${detailedParam.prompt}`,
[plan],
modelConfigForPlanning,
defaultIntentModelConfig,
);

const dprValue = await (this.interface.size() as any).dpr;
return (output || []).map((r: any) => {
if (!r) {
return {
rect: undefined,
center: undefined,
} as any;
}
return {
rect: r.rect,
center: r.center,
dpr: dprValue,
};
});
}

async aiAssert(
assertion: TUserPrompt,
msg?: string,
Expand Down
148 changes: 148 additions & 0 deletions packages/core/src/agent/task-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,22 @@ export class TaskBuilder {
context,
),
],
[
'LocateMultiple',
(plan) =>
this.handleLocateMultiplePlan(
plan as PlanningAction<PlanningLocateParam[]>,
context,
),
],
[
'LocateAll',
(plan) =>
this.handleLocateAllPlan(
plan as PlanningAction<PlanningLocateParam>,
context,
),
],
['Finished', (plan) => this.handleFinishedPlan(plan, context)],
]);

Expand Down Expand Up @@ -563,4 +579,136 @@ export class TaskBuilder {

return taskLocator;
}

private async handleLocateMultiplePlan(
plan: PlanningAction<PlanningLocateParam[]>,
context: PlanBuildContext,
): Promise<void> {
const taskLocate = this.createLocateMultipleTask(plan, plan.param, context);
context.tasks.push(taskLocate);
}

private async handleLocateAllPlan(
plan: PlanningAction<PlanningLocateParam>,
context: PlanBuildContext,
): Promise<void> {
const taskLocate = this.createLocateAllTask(plan, plan.param, context);
context.tasks.push(taskLocate);
}

private createLocateMultipleTask(
plan: PlanningAction<PlanningLocateParam[]>,
detailedLocateParams: DetailedLocateParam[],
context: PlanBuildContext,
): ExecutionTaskApply {
const { modelConfigForDefaultIntent } = context;

// Use 'Planning' as main type, and 'LocateMultiple' as subType
const taskLocator: ExecutionTaskApply = {
type: 'Planning',
subType: 'LocateMultiple',
subTask: context.subTask || undefined,
param: detailedLocateParams,
thought: plan.thought,
executor: async (param: DetailedLocateParam[], taskContext: any) => {
const { task } = taskContext;
let { uiContext } = taskContext;

if (!uiContext) {
uiContext = await this.service.contextRetrieverFn();
}

assert(uiContext, 'uiContext is required for Service task');

const applyDump = (dump?: ServiceDump) => {
if (!dump) {
return;
}
task.log = {
dump,
};
task.usage = dump.taskInfo?.usage;
};

// For now, we skip cache logic for LocateMultiple to keep it simple and focused on LLM optimization

let multiResult;
try {
multiResult = await this.service.locate(
param,
{ context: uiContext, mode: 'multi' },
modelConfigForDefaultIntent,
);
applyDump(multiResult.dump);
} catch (error) {
if (error instanceof ServiceError) {
applyDump(error.dump);
}
throw error;
}

return {
output: multiResult.results,
};
},
};
return taskLocator;
}

private createLocateAllTask(
plan: PlanningAction<PlanningLocateParam>,
detailedLocateParam: DetailedLocateParam,
context: PlanBuildContext,
): ExecutionTaskApply {
const { modelConfigForDefaultIntent } = context;

const taskLocator: ExecutionTaskApply = {
type: 'Planning',
subType: 'LocateAll',
subTask: context.subTask || undefined,
param: detailedLocateParam,
thought: plan.thought,
executor: async (param: DetailedLocateParam, taskContext: any) => {
const { task } = taskContext;
let { uiContext } = taskContext;

if (!uiContext) {
uiContext = await this.service.contextRetrieverFn();
}

assert(uiContext, 'uiContext is required for Service task');

const applyDump = (dump?: ServiceDump) => {
if (!dump) {
return;
}
task.log = {
dump,
};
task.usage = dump.taskInfo?.usage;
};

let locateResult;
try {
locateResult = await this.service.locate(
param,
{ context: uiContext, mode: 'all' },
modelConfigForDefaultIntent,
);
applyDump(locateResult.dump);
} catch (error) {
if (error instanceof ServiceError) {
applyDump(error.dump);
}
throw error;
}

return {
output: locateResult.results,
};
},
};

return taskLocator;
}
}
Loading
Loading