Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 24 additions & 18 deletions app_gradio/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,32 +247,38 @@ def generate_llm_config_dataframe(llm_list):

def suggest_fields_dataframe(rule_list, llm_list):
"""Suggest required field mappings based on selected evaluators"""
from dingo.io.input import RequiredField
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The import from dingo.io.input import RequiredField is located inside the suggest_fields_dataframe function. According to PEP 8, imports should be at the top of the file. This also avoids the overhead of re-importing the module on every function call.


suggested_fields = set()

# Fields required by rule evaluators
rule_type_mapping = get_rule_type_mapping()
data_column_mapping = get_data_column_mapping()
# Get rule and llm name maps
rule_name_map = Model.get_rule_name_map()
llm_name_map = Model.get_llm_name_map()

# Fields required by rule evaluators
for rule in rule_list:
# Find which type this rule belongs to
for rule_type, rules in rule_type_mapping.items():
if rule in rules:
if rule_type in data_column_mapping:
suggested_fields.update(data_column_mapping[rule_type])
break
if rule in rule_name_map:
rule_class = rule_name_map[rule]
if hasattr(rule_class, '_required_fields'):
for field in rule_class._required_fields:
if isinstance(field, RequiredField):
suggested_fields.add(field.value)

# Fields required by LLM evaluators
llm_column_mapping = get_llm_column_mapping()
for llm in llm_list:
if llm in llm_column_mapping:
suggested_fields.update(llm_column_mapping[llm])

# Generate suggested fields rows
if llm in llm_name_map:
llm_class = llm_name_map[llm]
if hasattr(llm_class, '_required_fields'):
for field in llm_class._required_fields:
if isinstance(field, RequiredField):
suggested_fields.add(field.value)
Comment on lines 259 to +274
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for iterating through rule_list and llm_list to collect _required_fields is duplicated. This can be refactored into a helper function to avoid code repetition and improve maintainability. For example, you could create a helper that takes a list of model names and a map of names to classes, and returns the set of required fields.


# Generate suggested fields rows - Required Field and Dataset Column both with same value
rows = []
for field in sorted(suggested_fields):
rows.append([field, field])

return gr.update(value=rows if rows else [["content", "content"]])
return gr.update(value=rows)


def get_rule_type_mapping():
Expand Down Expand Up @@ -405,11 +411,11 @@ def get_data_column_mapping():
# Field mapping configuration
gr.Markdown("**EvalPipline.fields** - Field Mapping")
fields_dataframe = gr.Dataframe(
value=[["content", "content"]],
headers=["Field Key", "Dataset Column"],
value=[],
headers=["Required Field", "Dataset Column"],
datatype=["str", "str"],
column_count=(2, "fixed"),
row_count=(1, "dynamic"),
row_count=(0, "dynamic"),
label="Field Mappings (add/remove rows as needed)",
interactive=True
)
Expand Down
1 change: 1 addition & 0 deletions dingo/io/input/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from dingo.io.input.data import Data # noqa E402.
from dingo.io.input.required_field import RequiredField # noqa E402.
8 changes: 8 additions & 0 deletions dingo/io/input/required_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from enum import Enum


class RequiredField(Enum):
CONTENT = "content"
PROMPT = "prompt"
CONTEXT = "context"
IMAGE = "image"
4 changes: 3 additions & 1 deletion dingo/model/llm/agent/agent_hallucination.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import json
from typing import Any, Dict, List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail, QualityLabel
from dingo.model import Model
from dingo.model.llm.agent.base_agent import BaseAgent
Expand Down Expand Up @@ -96,6 +96,8 @@ class AgentHallucination(BaseAgent):
{{"claims": ["claim 1", "claim 2", ...]}}
"""

_required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT]

@classmethod
def eval(cls, input_data: Data) -> EvalDetail:
"""
Expand Down
4 changes: 3 additions & 1 deletion dingo/model/llm/agent/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from abc import abstractmethod
from typing import Any, Dict, List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail, QualityLabel
from dingo.model.llm.agent.tools import ToolRegistry
from dingo.model.llm.base_openai import BaseOpenAI
Expand Down Expand Up @@ -48,6 +48,8 @@ class BaseAgent(BaseOpenAI):
max_iterations: int = 5
use_agent_executor: bool = False # Opt-in to LangChain agent path

_required_fields = [RequiredField.CONTENT]

@classmethod
@abstractmethod
def plan_execution(cls, input_data: Data) -> List[Dict[str, Any]]:
Expand Down
4 changes: 4 additions & 0 deletions dingo/model/llm/agent/tools/base_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

from pydantic import BaseModel

from dingo.io.input import RequiredField


class ToolConfig(BaseModel):
"""Base configuration for tools"""
Expand Down Expand Up @@ -39,6 +41,8 @@ class BaseTool(ABC):
description: str = None
config: ToolConfig = ToolConfig()

_required_fields = [RequiredField.CONTENT]

@classmethod
@abstractmethod
def execute(cls, **kwargs) -> Dict[str, Any]:
Expand Down
3 changes: 3 additions & 0 deletions dingo/model/llm/agent/tools/tavily_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from pydantic import Field

from dingo.io.input import RequiredField
from dingo.model.llm.agent.tools.base_tool import BaseTool, ToolConfig
from dingo.model.llm.agent.tools.tool_registry import tool_register
from dingo.utils import log
Expand Down Expand Up @@ -76,6 +77,8 @@ class TavilySearch(BaseTool):
description = "Search the web for factual information using Tavily AI"
config: TavilyConfig = TavilyConfig()

_required_fields = [RequiredField.IMAGE]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The _required_fields is set to [RequiredField.IMAGE], which is incorrect. The TavilySearch tool performs a web search based on a text query, not an image. This is misleading and could cause issues if this attribute is used for tools in the future. It should likely be [RequiredField.CONTENT] to reflect that the query often originates from text content.

Suggested change
_required_fields = [RequiredField.IMAGE]
_required_fields = [RequiredField.CONTENT]


@classmethod
def execute(cls, query: str, **kwargs) -> Dict[str, Any]:
"""
Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/base_lmdeploy_apiclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pydantic import ValidationError

from dingo.config.input_args import EvaluatorLLMArgs
from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail, QualityLabel
from dingo.model.llm.base import BaseLLM
from dingo.model.response.response_class import ResponseScoreReason
Expand All @@ -15,6 +15,7 @@

class BaseLmdeployApiClient(BaseLLM):
dynamic_config = EvaluatorLLMArgs()
_required_fields = [RequiredField.CONTENT] # Default, override in subclasses

# @classmethod
# def set_prompt(cls, prompt):
Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/base_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pydantic import ValidationError

from dingo.config.input_args import EvaluatorLLMArgs
from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail, QualityLabel
from dingo.model.llm.base import BaseLLM
from dingo.model.response.response_class import ResponseScoreReason
Expand All @@ -15,6 +15,7 @@

class BaseOpenAI(BaseLLM):
dynamic_config = EvaluatorLLMArgs()
_required_fields = [RequiredField.CONTENT] # Default, override in subclasses

# Embedding 模型配置(用于 RAG 相关评估器)
embedding_model = None
Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/compare/llm_code_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from typing import List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -25,6 +25,7 @@ class LLMCodeCompare(BaseOpenAI):
'evaluation_results': ''
}

_required_fields = [RequiredField.CONTENT]
prompt = """
你是一位专业的代码块识别评估专家,擅长分析 HTML 代码和 Markdown 文本中的代码块。现在我会提供三段内容:

Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/compare/llm_html_extract_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from typing import List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -13,6 +13,7 @@

@Model.llm_register("LLMHtmlExtractCompare")
class LLMHtmlExtractCompare(BaseOpenAI):
_required_fields = [RequiredField.CONTENT]
prompt = r"""
你是一位专业的 HTML 内容提取评估专家,擅长分析 HTML 代码和 Markdown 文本的转换质量。现在我会提供三段内容:

Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/compare/llm_html_extract_compare_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from typing import List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -13,6 +13,7 @@

@Model.llm_register("LLMHtmlExtractCompareEn")
class LLMHtmlExtractCompareEn(BaseOpenAI):
_required_fields = [RequiredField.CONTENT]
prompt = r"""
You are a professional HTML content extraction evaluator, skilled in analyzing the conversion quality between HTML code and Markdown text. I will provide three pieces of content:

Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/compare/llm_html_extract_compare_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import diff_match_patch as dmp_module

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -28,6 +28,7 @@ class LLMHtmlExtractCompareV2(BaseOpenAI):
- input_data.raw_data.get("language", "en"): 语言类型 ("zh" 或 "en")
"""

_required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
prompt = {
"content_en": r"""Please compare the following two texts, each extracted from the same webpage using different HTML parsing methods. Your task is to determine whether there is a difference in the core informational content between them.

Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/compare/llm_math_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from typing import List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -25,6 +25,7 @@ class LLMMathCompare(BaseOpenAI):
'evaluation_results': ''
}

_required_fields = [RequiredField.CONTENT]
prompt = """
你是一位专业的数学公式识别评估专家,擅长分析 HTML 代码和 Markdown 文本中的数学公式。现在我会提供三段内容:

Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/compare/llm_table_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from typing import List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -25,6 +25,7 @@ class LLMTableCompare(BaseOpenAI):
'evaluation_results': ''
}

_required_fields = [RequiredField.CONTENT]
prompt = """
你是一位专业的表格识别评估专家,擅长分析 HTML 代码和 Markdown 文本中的表格。现在我会提供三段内容:

Expand Down
3 changes: 3 additions & 0 deletions dingo/model/llm/hhh/llm_text_3h.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json

from dingo.io.input import RequiredField
from dingo.io.output.eval_detail import EvalDetail, QualityLabel
from dingo.model.llm.base_openai import BaseOpenAI
from dingo.model.response.response_class import ResponseScoreReason
Expand All @@ -9,6 +10,8 @@

# @Model.llm_register("LLMText3H")
class LLMText3H(BaseOpenAI):
_required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]

@classmethod
def build_messages(cls, input_data):
question = input_data.prompt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
4. Completeness: 指令是否完整,包含所有必要信息
"""

from dingo.io.input import RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand Down Expand Up @@ -42,6 +43,7 @@ class LLMInstructionClarity(BaseOpenAI):
"examples": "examples/sft/evaluate_instruction_quality.py"
}

_required_fields = [RequiredField.CONTENT]
prompt = """
# Role
You are an expert in evaluating instruction quality for Large Language Model training data.
Expand Down
2 changes: 2 additions & 0 deletions dingo/model/llm/instruction_quality/llm_task_difficulty.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
4. Constraint Density: 约束条件密度
"""

from dingo.io.input import RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand Down Expand Up @@ -42,6 +43,7 @@ class LLMTaskDifficulty(BaseOpenAI):
"examples": "examples/sft/evaluate_instruction_quality.py"
}

_required_fields = [RequiredField.CONTENT]
prompt = """
# Role
You are an expert in assessing task complexity and difficulty for LLM training data evaluation.
Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/llm_classify_qr.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from typing import List

from dingo.io.input import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -20,6 +20,7 @@ class LLMClassifyQR(BaseOpenAI):
"evaluation_results": ""
}

_required_fields = [RequiredField.CONTENT]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This model classifies images, but _required_fields is set to [RequiredField.CONTENT]. This is confusing because content usually implies text. For consistency with other vision models and to make the UI mapping more intuitive, this should be [RequiredField.IMAGE]. The build_messages method would also need to be updated to use input_data.image instead of input_data.content to align with this change.

Suggested change
_required_fields = [RequiredField.CONTENT]
_required_fields = [RequiredField.IMAGE]

prompt = """
'Classify the image into one of the following categories: "CAPTCHA", "QR code", or "Normal image". '
'Return the type as the image category (CAPTCHA or QR code or Normal image) and the reason as the specific type of CAPTCHA or QR code. '
Expand Down
2 changes: 2 additions & 0 deletions dingo/model/llm/llm_classify_topic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json

from dingo.io.input import RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -22,6 +23,7 @@ class LLMClassifyTopic(BaseOpenAI):
"validation_dataset": "AlignBench (https://github.com/THUDM/AlignBench)"
}

_required_fields = [RequiredField.CONTENT]
prompt = """
Assume you are a topic classifier, and your task is to categorize user-provided instructions.
There are six options in the list provided. You are required to select one category from the following list: ["Language Understanding and Processing", "Writing Ability", "Code", "Mathematics & Reasoning", "Task-oriented Role Play", "Knowledge-based Question and Answering"].
Expand Down
2 changes: 2 additions & 0 deletions dingo/model/llm/llm_dataman_assessment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json

from dingo.io.input import RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -25,6 +26,7 @@ class LLMDatamanAssessment(BaseOpenAI):
"evaluation_results": ""
}

_required_fields = [RequiredField.CONTENT]
prompt = """
### Role
You are an expert in data quality assessment for large language models.
Expand Down
3 changes: 2 additions & 1 deletion dingo/model/llm/llm_document_parsing_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from typing import List

from dingo.io import Data
from dingo.io.input import Data, RequiredField
from dingo.io.output.eval_detail import EvalDetail
from dingo.model import Model
from dingo.model.llm.base_openai import BaseOpenAI
Expand All @@ -20,6 +20,7 @@ class LLMMinerURecognizeQuality(BaseOpenAI):
"description": "Evaluate the quality of mineru recognize",
"evaluation_results": "error_category and error_label",
}
_required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
prompt = r"""
你是一位熟悉文档解析领域的质量专家,你的核心任务是根据正确的markdown"工具标准结果Markdown",以及对应OCR工具预测结果"Pred的内容",获取工具预测结果的错误类型。
*错误类别和标签*
Expand Down
Loading