diff --git a/.github/env/custom_config_rule.json b/.github/env/custom_config_rule.json index 3df37069..ab21f27e 100644 --- a/.github/env/custom_config_rule.json +++ b/.github/env/custom_config_rule.json @@ -1,24 +1,16 @@ { "input_path": "test/data/test_local_json.json", - "log_level": "DEBUG", "dataset": { "source": "local", - "format": "json", - "field": { - "content": "prediction" - } - }, - "executor": { - "rule_list": ["RuleSpecialCharacter", "RuleWatermark"] + "format": "json" }, - "evaluator": { - "rule_config": { - "RuleSpecialCharacter": { - "pattern": "[�^□]|\\{\\/U\\}" - }, - "RuleWatermark": { - "key_list": ["谢邀", "Architecture of dingo"] - } + "evaluator": [ + { + "fields": {"content": "prediction"}, + "evals": [ + {"name": "RuleSpecialCharacter", "config": {"pattern": "[�^□]|\\{\\/U\\}"}}, + {"name": "RuleWatermark", "config": {"key_list": ["谢邀", "Architecture of dingo"]}} + ] } - } + ] } diff --git a/.github/env/hf_json.json b/.github/env/hf_json.json index f3c130a8..1e231eae 100644 --- a/.github/env/hf_json.json +++ b/.github/env/hf_json.json @@ -2,13 +2,14 @@ "input_path": "chupei/format-json", "dataset": { "source": "hugging_face", - "format": "json", - "field": { - "prompt": "origin_prompt", - "content": "prediction" - } + "format": "json" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "fields": {"prompt": "origin_prompt", "content": "prediction"}, + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/hf_jsonl.json b/.github/env/hf_jsonl.json index 99100ecf..cac0a0e2 100644 --- a/.github/env/hf_jsonl.json +++ b/.github/env/hf_jsonl.json @@ -2,12 +2,14 @@ "input_path": "chupei/format-jsonl", "dataset": { "source": "hugging_face", - "format": "jsonl", - "field": { - "content": "content" - } + "format": "jsonl" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "fields": {"content": "content"}, + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/hf_listjson.json b/.github/env/hf_listjson.json index 2bfa69c5..8f8e436c 100644 --- a/.github/env/hf_listjson.json +++ b/.github/env/hf_listjson.json @@ -2,13 +2,14 @@ "input_path": "chupei/format-listjson", "dataset": { "source": "hugging_face", - "format": "listjson", - "field": { - "prompt": "instruction", - "content": "output" - } + "format": "listjson" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "fields": {"prompt": "instruction", "content": "output"}, + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/hf_plaintext.json b/.github/env/hf_plaintext.json index 7eef1682..ff67f07e 100644 --- a/.github/env/hf_plaintext.json +++ b/.github/env/hf_plaintext.json @@ -2,12 +2,13 @@ "input_path": "chupei/format-text", "dataset": { "source": "hugging_face", - "format": "plaintext", - "field": { - "content": "text" - } + "format": "plaintext" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/local_json.json b/.github/env/local_json.json index 8a464d4c..814d3b50 100644 --- a/.github/env/local_json.json +++ b/.github/env/local_json.json @@ -2,12 +2,14 @@ "input_path": "test/data/test_local_json.json", "dataset": { "source": "local", - "format": "json", - "field": { - "content": "prediction" - } + "format": "json" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "fields": {"content": "prediction"}, + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/local_jsonl.json b/.github/env/local_jsonl.json index 3424b7fa..a07c9242 100644 --- a/.github/env/local_jsonl.json +++ b/.github/env/local_jsonl.json @@ -2,12 +2,14 @@ "input_path": "test/data/test_local_jsonl.jsonl", "dataset": { "source": "local", - "format": "jsonl", - "field": { - "content": "content" - } + "format": "jsonl" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "fields": {"content": "content"}, + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/local_listjson.json b/.github/env/local_listjson.json index b7ca07fd..4ad854b5 100644 --- a/.github/env/local_listjson.json +++ b/.github/env/local_listjson.json @@ -2,12 +2,14 @@ "input_path": "test/data/test_local_listjson.json", "dataset": { "source": "local", - "format": "listjson", - "field": { - "content": "output" - } + "format": "listjson" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "fields": {"content": "output"}, + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/local_plaintext.json b/.github/env/local_plaintext.json index 426d4121..e61c57e4 100644 --- a/.github/env/local_plaintext.json +++ b/.github/env/local_plaintext.json @@ -4,7 +4,11 @@ "source": "local", "format": "plaintext" }, - "executor": { - "eval_group": "default" - } + "evaluator": [ + { + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/env/local_plaintext_save.json b/.github/env/local_plaintext_save.json index f811804b..73e3e0ba 100644 --- a/.github/env/local_plaintext_save.json +++ b/.github/env/local_plaintext_save.json @@ -5,9 +5,15 @@ "format": "plaintext" }, "executor": { - "eval_group": "default", "result_save": { "bad": true } - } + }, + "evaluator": [ + { + "evals": [ + {"name": "RuleColonEnd"} + ] + } + ] } diff --git a/.github/scripts/check_imports.py b/.github/scripts/check_imports.py new file mode 100644 index 00000000..588eabbe --- /dev/null +++ b/.github/scripts/check_imports.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""检查所有Python文件是否可以成功编译和导入""" + +import os +import py_compile +import sys +from pathlib import Path + + +def check_syntax(file_path): + """检查Python文件语法""" + try: + py_compile.compile(file_path, doraise=True) + return True, None + except py_compile.PyCompileError as e: + return False, str(e) + + +def main(): + """主函数""" + project_root = Path(__file__).parent.parent.parent + dingo_path = project_root / "dingo" + + if not dingo_path.exists(): + print(f"❌ 找不到dingo目录: {dingo_path}") + sys.exit(1) + + errors = [] + checked = 0 + + print("🔍 检查所有Python文件的语法和导入...") + print("-" * 60) + + for py_file in dingo_path.rglob("*.py"): + if "__pycache__" in str(py_file): + continue + + checked += 1 + success, error = check_syntax(str(py_file)) + + if success: + print(f"✓ {py_file.relative_to(project_root)}") + else: + error_msg = f"✗ {py_file.relative_to(project_root)}: {error}" + print(error_msg) + errors.append(error_msg) + + print("-" * 60) + print(f"📊 检查了 {checked} 个文件") + + if errors: + print(f"\n❌ 发现 {len(errors)} 个错误:") + for error in errors: + print(f" {error}") + sys.exit(1) + else: + print(f"✅ 所有文件检查通过!") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/IntegrationTest.yml b/.github/workflows/IntegrationTest.yml index 36d50144..6d06ab6a 100644 --- a/.github/workflows/IntegrationTest.yml +++ b/.github/workflows/IntegrationTest.yml @@ -25,8 +25,14 @@ jobs: python -m pip install --upgrade pip pip install pytest if [ -f requirements/runtime.txt ]; then pip install -r requirements/runtime.txt; fi + pip install pyspark + pip install tavily-python pip install -e . + - name: Check Python syntax and imports + run: | + python .github/scripts/check_imports.py + - name: Integration Test(local plaintext) run: | python -m dingo.run.cli --input .github/env/local_plaintext.json @@ -45,7 +51,7 @@ jobs: python -m dingo.run.cli --input .github/env/hf_plaintext.json - name: Integration Test(huggingface json) run: | - python -m dingo.run.cli --input .github/env/hf_plaintext.json + python -m dingo.run.cli --input .github/env/hf_json.json - name: Integration Test(huggingface jsonl) run: | python -m dingo.run.cli --input .github/env/hf_jsonl.json diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5569e069..2fa4491e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -15,7 +15,47 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.10" - - name: Run pre-commit + + - name: Install pre-commit + run: pip install pre-commit==3.8.0 + + - name: Install package + run: | + python -m pip install --upgrade pip + if [ -f requirements/runtime.txt ]; then pip install -r requirements/runtime.txt; fi + pip install -e . + + - name: Check Python syntax and imports + run: | + python .github/scripts/check_imports.py + + - name: Run pre-commit (auto-fix) + id: pre_commit_auto_fix + run: | + # 运行 pre-commit,允许自动修复,不因修复而失败 + pre-commit run --all-files || true + + - name: Check for changes + id: check_changes + run: | + if [[ -n $(git status --porcelain) ]]; then + echo "changed=true" >> $GITHUB_OUTPUT + echo "📝 Files were modified by pre-commit auto-fix" + else + echo "changed=false" >> $GITHUB_OUTPUT + echo "✅ No auto-fix changes" + fi + + - name: Commit auto-fix changes + if: steps.check_changes.outputs.changed == 'true' && github.event_name == 'push' + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add -A + git commit -m "🎨 Auto-format code with pre-commit" + git push + + - name: Run pre-commit (final check) run: | - pip install pre-commit==3.8.0 + # 再次运行 pre-commit,这次如果有错误就真的失败 pre-commit run --all-files diff --git a/.github/workflows/metrics-validation.yml b/.github/workflows/metrics-validation.yml index 32c630c2..e0656c6e 100644 --- a/.github/workflows/metrics-validation.yml +++ b/.github/workflows/metrics-validation.yml @@ -4,12 +4,12 @@ on: push: branches: [ main, dev ] paths: - - 'dingo/model/prompt/**' + - 'dingo/model/**' - 'scripts/generate_metrics.py' pull_request: branches: [ main ] paths: - - 'dingo/model/prompt/**' + - 'dingo/model/**' - 'scripts/generate_metrics.py' workflow_dispatch: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b114fe50..c1fd47c1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,6 +5,7 @@ repos: rev: v5.0.0 hooks: - id: trailing-whitespace + exclude: '^README.*\.md$' - id: end-of-file-fixer exclude: 'docs/metrics\.md' - id: check-yaml diff --git a/README.md b/README.md index 21324476..72a399cf 100644 --- a/README.md +++ b/README.md @@ -55,9 +55,25 @@

-# Introduction of Dingo +# Introduction -Dingo is a data quality evaluation tool that helps you automatically detect data quality issues in your datasets. Dingo provides a variety of built-in rules and model evaluation methods, and also supports custom evaluation methods. Dingo supports commonly used text datasets and multimodal datasets, including pre-training datasets, fine-tuning datasets, and evaluation datasets. In addition, Dingo supports multiple usage methods, including local CLI and SDK, making it easy to integrate into various evaluation platforms, such as [OpenCompass](https://github.com/open-compass/opencompass). +**Dingo is A Comprehensive AI Data, Model and Application Quality Evaluation Tool**, designed for ML practitioners, data engineers, and AI researchers. It helps you systematically assess and improve the quality of training data, fine-tuning datasets, and production AI systems. + +## Why Dingo? + +🎯 **Production-Grade Quality Checks** - From pre-training datasets to RAG systems, ensure your AI gets high-quality data + +🗄️ **Multi-Source Data Integration** - Seamlessly connect to Local files, SQL databases (PostgreSQL/MySQL/SQLite), HuggingFace datasets, and S3 storage + +🔍 **Multi-Field Evaluation** - Apply different quality rules to different fields in parallel (e.g., ISBN validation for `isbn`, text quality for `title`) + +🤖 **RAG System Assessment** - Comprehensive evaluation of retrieval and generation quality with 5 academic-backed metrics + +🧠 **LLM & Rule & Agent Hybrid** - Combine fast heuristic rules (30+ built-in) with LLM-based deep assessment + +🚀 **Flexible Execution** - Run locally for rapid iteration or scale with Spark for billion-scale datasets + +📊 **Rich Reporting** - Detailed quality reports with GUI visualization and field-level insights ## Architecture Diagram @@ -78,7 +94,7 @@ pip install dingo-python ```python from dingo.config.input_args import EvaluatorLLMArgs from dingo.io.input import Data -from dingo.model.llm.llm_text_quality_model_base import LLMTextQualityModelBase +from dingo.model.llm.text_quality.llm_text_quality_v4 import LLMTextQualityV4 from dingo.model.rule.rule_common import RuleEnterAndSpace data = Data( @@ -89,12 +105,12 @@ data = Data( def llm(): - LLMTextQualityModelBase.dynamic_config = EvaluatorLLMArgs( + LLMTextQualityV4.dynamic_config = EvaluatorLLMArgs( key='YOUR_API_KEY', api_url='https://api.openai.com/v1/chat/completions', model='gpt-4o', ) - res = LLMTextQualityModelBase.eval(data) + res = LLMTextQualityV4.eval(data) print(res) @@ -117,11 +133,18 @@ input_data = { "format": "plaintext" # Format: plaintext }, "executor": { - "eval_group": "sft", # Rule set for SFT data "result_save": { "bad": True # Save evaluation results } - } + }, + "evaluator": [ + { + "evals": [ + {"name": "RuleColonEnd"}, + {"name": "RuleSpecialCharacter"} + ] + } + ] } input_args = InputArgs(**input_data) @@ -190,158 +213,353 @@ https://github.com/user-attachments/assets/aca26f4c-3f2e-445e-9ef9-9331c4d7a37b This video demonstrates step-by-step how to use Dingo MCP server with Cursor. -# Data Quality Metrics +# 🎓 Key Concepts for Practitioners + +## What Makes Dingo Production-Ready? -Dingo provides comprehensive data quality assessment through both rule-based and prompt-based evaluation metrics. These metrics cover multiple quality dimensions including effectiveness, completeness, similarity, security, and more. +### 1. **Multi-Field Evaluation Pipeline** +Apply different quality checks to different fields in a single pass: +```python +"evaluator": [ + {"fields": {"content": "isbn"}, "evals": [{"name": "RuleIsbn"}]}, + {"fields": {"content": "title"}, "evals": [{"name": "RuleAbnormalChar"}]}, + {"fields": {"content": "description"}, "evals": [{"name": "LLMTextQualityV5"}]} +] +``` +**Why It Matters**: Evaluate structured data (like database tables) without writing separate scripts for each field. -📊 **[View Complete Metrics Documentation →](docs/metrics.md)** +### 2. **Stream Processing for Large Datasets** +SQL datasources use SQLAlchemy's server-side cursors: +```python +# Handles billions of rows without OOM +for data in dataset.get_data(): # Yields one row at a time + result = evaluator.eval(data) +``` +**Why It Matters**: Process production databases without exporting to intermediate files. -Our evaluation system includes: -- **Pretrain Text Quality Assessment Metrics**: Pre-training data quality evaluation using DataMan methodology and enhanced multi-dimensional assessment -- **SFT Data Assessment Metrics**: Honest, Helpful, Harmless evaluation for supervised fine-tuning data -- **Classification Metrics**: Topic categorization and content classification -- **Multimodality Assessment Metrics**: Image classification and relevance evaluation -- **Rule-Based Quality Metrics**: Automated quality checks using heuristic rules for effectiveness and similarity detection -- **Factuality Assessment Metrics**: Two-stage factuality evaluation based on GPT-5 System Card -- etc +### 3. **Field Isolation in Memory** +RAG evaluations prevent context bleeding across different field combinations: +``` +outputs/ +├── user_input,response,retrieved_contexts/ # Faithfulness group +└── user_input,response/ # Answer Relevancy group +``` +**Why It Matters**: Accurate metric calculations when evaluating multiple field combinations. -Most metrics are backed by academic sources to ensure objectivity and scientific rigor. +### 4. **Hybrid Rule-LLM Strategy** +Combine fast rules (100% coverage) with sampled LLM checks (10% coverage): +```python +"evals": [ + {"name": "RuleAbnormalChar"}, # Fast, runs on all data + {"name": "LLMTextQualityV5"} # Expensive, sample if needed +] +``` +**Why It Matters**: Balance cost and coverage for production-scale evaluation. -### Using LLM Assessment in Evaluation +### 5. **Extensibility Through Registration** +Clean plugin architecture for custom rules, prompts, and models: +```python +@Model.rule_register('QUALITY_BAD_CUSTOM', ['default']) +class MyCustomRule(BaseRule): + @classmethod + def eval(cls, input_data: Data) -> EvalDetail: + # Example: check if content is empty + if not input_data.content: + return EvalDetail( + metric=cls.__name__, + status=True, # Found an issue + label=[f'{cls.metric_type}.{cls.__name__}'], + reason=["Content is empty"] + ) + return EvalDetail( + metric=cls.__name__, + status=False, # No issue found + label=['QUALITY_GOOD'] + ) +``` +**Why It Matters**: Adapt to domain-specific requirements without forking the codebase. -To use these assessment prompts in your evaluations, specify them in your configuration: +--- + +# 📚 Data Quality Metrics + +Dingo provides **70+ evaluation metrics** across multiple dimensions, combining rule-based speed with LLM-based depth. + +## Metric Categories + +| Category | Examples | Use Case | +|----------|----------|----------| +| **Pretrain Text Quality** | Completeness, Effectiveness, Similarity, Security | LLM pre-training data filtering | +| **SFT Data Quality** | Honest, Helpful, Harmless (3H) | Instruction fine-tuning data | +| **RAG Evaluation** | Faithfulness, Context Precision, Answer Relevancy | RAG system assessment | +| **Hallucination Detection** | HHEM-2.1-Open, Factuality Check | Production AI reliability | +| **Classification** | Topic categorization, Content labeling | Data organization | +| **Multimodal** | Image-text relevance, VLM quality | Vision-language data | +| **Security** | PII detection, Perspective API toxicity | Privacy and safety | + +📊 **[View Complete Metrics Documentation →](docs/metrics.md)** +📖 **[RAG Evaluation Guide →](docs/rag_evaluation_metrics.md)** | **[中文版](docs/rag_evaluation_metrics_zh.md)** +🔍 **[Hallucination Detection Guide →](docs/hallucination_detection_guide.md)** | **[中文版](docs/hallucination_guide.md)** +✅ **[Factuality Assessment Guide →](docs/factuality_assessment_guide.md)** | **[中文版](docs/factcheck_guide.md)** + +Most metrics are backed by academic research to ensure scientific rigor. + +## Quick Metric Usage ```python +llm_config = { + "model": "gpt-4o", + "key": "YOUR_API_KEY", + "api_url": "https://api.openai.com/v1/chat/completions" +} + input_data = { - # Other parameters... - "executor": { - "prompt_list": ["QUALITY_BAD_SIMILARITY"], # Specific prompt to use - }, - "evaluator": { - "llm_config": { - "LLMTextQualityPromptBase": { # LLM model to use - "model": "gpt-4o", - "key": "YOUR_API_KEY", - "api_url": "https://api.openai.com/v1/chat/completions" - } + "evaluator": [ + { + "fields": {"content": "content"}, + "evals": [ + {"name": "RuleAbnormalChar"}, # Rule-based (fast) + {"name": "LLMTextQualityV5", "config": llm_config} # LLM-based (deep) + ] } - } + ] } ``` -You can customize these prompts to focus on specific quality dimensions or to adapt to particular domain requirements. When combined with appropriate LLM models, these prompts enable comprehensive evaluation of data quality across multiple dimensions. +**Customization**: All prompts are defined in `dingo/model/llm/` directory (organized by category: `text_quality/`, `rag/`, `hhh/`, etc.). Extend or modify them for domain-specific requirements. -### Hallucination Detection & RAG System Evaluation -For detailed guidance on using Dingo's hallucination detection capabilities, including HHEM-2.1-Open local inference and LLM-based evaluation: +# 🌟 Feature Highlights -📖 **[View Hallucination Detection Guide →](docs/hallucination_guide.md)** +## 📊 Multi-Source Data Integration -### Factuality Assessment +**Diverse Data Sources** - Connect to where your data lives +✅ **Local Files**: JSONL, CSV, TXT, Parquet +✅ **SQL Databases**: PostgreSQL, MySQL, SQLite, Oracle, SQL Server (with stream processing) +✅ **Cloud Storage**: S3 and S3-compatible storage +✅ **ML Platforms**: Direct HuggingFace datasets integration -For comprehensive guidance on using Dingo's two-stage factuality evaluation system: +**Enterprise-Ready SQL Support** - Production database integration +✅ Memory-efficient streaming for billion-scale datasets +✅ Connection pooling and automatic resource cleanup +✅ Complex SQL queries (JOIN, WHERE, aggregations) +✅ Multiple dialect support with SQLAlchemy -📖 **[View Factuality Assessment Guide →](docs/factcheck_guide.md)** +**Multi-Field Quality Checks** - Different rules for different fields +✅ Parallel evaluation pipelines (e.g., ISBN validation + text quality simultaneously) +✅ Field aliasing and nested field extraction (`user.profile.name`) +✅ Independent result reports per field +✅ ETL pipeline architecture for flexible data transformation -# Rule Groups +--- -Dingo provides pre-configured rule groups for different types of datasets: +## 🤖 RAG System Evaluation -| Group | Use Case | Example Rules | -|-------|----------|---------------| -| `default` | General text quality | `RuleColonEnd`, `RuleContentNull`, `RuleDocRepeat`, etc. | -| `sft` | Fine-tuning datasets | Rules from `default` plus `RuleHallucinationHHEM` for hallucination detection | -| `rag` | RAG system evaluation | `RuleHallucinationHHEM`, `PromptHallucination` for response consistency | -| `hallucination` | Hallucination detection | `PromptHallucination` with LLM-based evaluation | -| `pretrain` | Pre-training datasets | Comprehensive set of 20+ rules including `RuleAlphaWords`, `RuleCapitalWords`, etc. | +**5 Academic-Backed Metrics** - Based on RAGAS, DeepEval, TruLens research +✅ **Faithfulness**: Answer-context consistency (hallucination detection) +✅ **Answer Relevancy**: Answer-query alignment +✅ **Context Precision**: Retrieval precision +✅ **Context Recall**: Retrieval recall +✅ **Context Relevancy**: Context-query relevance -To use a specific rule group: +**Comprehensive Reporting** - Auto-aggregated statistics +✅ Average, min, max, standard deviation for each metric +✅ Field-grouped results +✅ Batch and single evaluation modes -```python -input_data = { - "executor": { - "eval_group": "sft", # Use "default", "sft", "rag", "hallucination", or "pretrain" - } - # other parameters... -} -``` +📖 **[View RAG Evaluation Guide →](docs/rag_evaluation_metrics_zh.md)** + +--- + +## 🧠 Hybrid Evaluation System + +**Rule-Based** - Fast, deterministic, cost-effective +✅ 30+ built-in rules (text quality, format, PII detection) +✅ Regex, heuristics, statistical checks +✅ Custom rule registration + +**LLM-Based** - Deep semantic understanding +✅ OpenAI (GPT-4o, GPT-3.5), DeepSeek, Kimi +✅ Local models (Llama3, Qwen) +✅ Vision-Language Models (InternVL, Gemini) +✅ Custom prompt registration -# Feature Highlights +**Agent-Based** - Multi-step reasoning with tools +✅ Web search integration (Tavily) +✅ Adaptive context gathering +✅ Multi-source fact verification +✅ Custom agent & tool registration -## Multi-source & Multi-modal Support +**Extensible Architecture** +✅ Plugin-based rule/prompt/model registration +✅ Clean separation of concerns (agents, tools, orchestration) +✅ Domain-specific customization -- **Data Sources**: Local files, Hugging Face datasets, S3 storage -- **Data Types**: Pre-training, fine-tuning, and evaluation datasets -- **Data Modalities**: Text and image +--- -## Rule-based & Model-based Evaluation +## 🚀 Flexible Execution & Integration -- **Built-in Rules**: 20+ general heuristic evaluation rules -- **LLM Integration**: OpenAI, Kimi, and local models (e.g., Llama3) -- **Hallucination Detection**: HHEM-2.1-Open local model and GPT-based evaluation -- **RAG System Evaluation**: Response consistency and context alignment assessment -- **Custom Rules**: Easily extend with your own rules and models -- **Security Evaluation**: Perspective API integration +**Multiple Interfaces** +✅ CLI for quick checks +✅ Python SDK for integration +✅ MCP (Model Context Protocol) server for IDEs (Cursor, etc.) -## Flexible Usage +**Scalable Execution** +✅ Local executor for rapid iteration +✅ Spark executor for distributed processing +✅ Configurable concurrency and batching -- **Interfaces**: CLI and SDK options -- **Integration**: Easy integration with other platforms -- **Execution Engines**: Local and Spark +**Data Sources** +✅ **Local Files**: JSONL, CSV, TXT, Parquet formats +✅ **Hugging Face**: Direct integration with HF datasets hub +✅ **S3 Storage**: AWS S3 and S3-compatible storage +✅ **SQL Databases**: PostgreSQL, MySQL, SQLite, Oracle, SQL Server (stream processing for large-scale data) -## Comprehensive Reporting +**Modalities** +✅ Text (chat, documents, code) +✅ Images (with VLM support) +✅ Multimodal (text + image consistency) -- **Quality Metrics**: 7-dimensional quality assessment -- **Traceability**: Detailed reports for anomaly tracking +--- -# User Guide +## 📈 Rich Reporting & Visualization -## Custom Rules, Prompts, and Models +**Multi-Level Reports** +✅ Summary JSON with overall scores +✅ Field-level breakdown +✅ Per-rule violation details +✅ Type and name distribution -If the built-in rules don't meet your requirements, you can create custom ones: +**GUI Visualization** +✅ Built-in web interface +✅ Interactive data exploration +✅ Anomaly tracking -### Custom Rule Example +**Metric Aggregation** +✅ Automatic statistics (avg, min, max, std_dev) +✅ Field-grouped metrics +✅ Overall quality score + +--- + +# 📖 User Guide + +## 🔧 Extensibility + +Dingo uses a clean plugin architecture for domain-specific customization: + +### Custom Rule Registration ```python from dingo.model import Model from dingo.model.rule.base import BaseRule -from dingo.config.input_args import EvaluatorRuleArgs from dingo.io import Data -from dingo.model.modelres import ModelRes - -@Model.rule_register('QUALITY_BAD_RELEVANCE', ['default']) -class MyCustomRule(BaseRule): - """Check for custom pattern in text""" +from dingo.io.output.eval_detail import EvalDetail - dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here') +@Model.rule_register('QUALITY_BAD_CUSTOM', ['default']) +class DomainSpecificRule(BaseRule): + """Check domain-specific patterns""" @classmethod - def eval(cls, input_data: Data) -> ModelRes: - res = ModelRes() - # Your rule implementation here - return res + def eval(cls, input_data: Data) -> EvalDetail: + text = input_data.content + + # Your custom logic + is_valid = your_validation_logic(text) + + return EvalDetail( + metric=cls.__name__, + status=not is_valid, # False = good, True = bad + label=['QUALITY_GOOD' if is_valid else 'QUALITY_BAD_CUSTOM'], + reason=["Validation details..."] + ) ``` -### Custom LLM Integration +### Custom LLM/Prompt Registration ```python from dingo.model import Model from dingo.model.llm.base_openai import BaseOpenAI -@Model.llm_register('my_custom_model') -class MyCustomModel(BaseOpenAI): - # Custom implementation here - pass +@Model.llm_register('custom_evaluator') +class CustomEvaluator(BaseOpenAI): + """Custom LLM evaluator with specialized prompts""" + + _metric_info = { + "metric_name": "CustomEvaluator", + "metric_type": "LLM-Based Quality", + "category": "Custom Category" + } + + prompt = """Your custom prompt here...""" +``` + +**Examples:** +- [Custom Rules](examples/register/sdk_register_rule.py) +- [Custom Models](examples/register/sdk_register_llm.py) + +### Agent-Based Evaluation with Tools + +Dingo supports agent-based evaluators that can use external tools for multi-step reasoning and adaptive context gathering: + +```python +from dingo.io import Data +from dingo.io.output.eval_detail import EvalDetail +from dingo.model import Model +from dingo.model.llm.agent.base_agent import BaseAgent + +@Model.llm_register('MyAgent') +class MyAgent(BaseAgent): + """Custom agent with tool support""" + + available_tools = ["tavily_search", "my_custom_tool"] + max_iterations = 5 + + @classmethod + def eval(cls, input_data: Data) -> EvalDetail: + # Use tools for fact-checking + search_result = cls.execute_tool('tavily_search', query=input_data.content) + + # Multi-step reasoning with LLM + result = cls.send_messages([...]) + + return EvalDetail(...) +``` + +**Built-in Agent:** +- `AgentHallucination`: Enhanced hallucination detection with web search fallback + +**Configuration Example:** +```json +{ + "evaluator": [{ + "evals": [{ + "name": "AgentHallucination", + "config": { + "key": "openai-api-key", + "model": "gpt-4", + "parameters": { + "agent_config": { + "max_iterations": 5, + "tools": { + "tavily_search": {"api_key": "tavily-key"} + } + } + } + } + }] + }] +} ``` -See more examples in: -- [Register Rules](examples/register/sdk_register_rule.py) -- [Register Prompts](examples/register/sdk_register_prompt.py) -- [Register Models](examples/register/sdk_register_llm.py) +**Learn More:** +- [Agent Development Guide](docs/agent_development_guide.md) - Comprehensive guide for creating custom agents and tools +- [AgentHallucination Example](examples/agent/agent_hallucination_example.py) - Production agent example +- [AgentFactCheck Example](examples/agent/agent_executor_example.py) - LangChain agent example -## Execution Engines +## ⚙️ Execution Modes -### Local Execution +### Local Executor (Development & Small-Scale) ```python from dingo.config import InputArgs @@ -351,34 +569,33 @@ input_args = InputArgs(**input_data) executor = Executor.exec_map["local"](input_args) result = executor.execute() -# Get results -summary = executor.get_summary() # Overall evaluation summary -bad_data = executor.get_bad_info_list() # List of problematic data -good_data = executor.get_good_info_list() # List of high-quality data +# Access results +summary = executor.get_summary() # Overall metrics +bad_data = executor.get_bad_info_list() # Quality issues +good_data = executor.get_good_info_list() # High-quality data ``` -### Spark Execution +**Best For**: Rapid iteration, debugging, datasets < 100K rows + +### Spark Executor (Production & Large-Scale) ```python -from dingo.config import InputArgs -from dingo.exec import Executor from pyspark.sql import SparkSession +from dingo.exec import Executor -# Initialize Spark spark = SparkSession.builder.appName("Dingo").getOrCreate() -spark_rdd = spark.sparkContext.parallelize([...]) # Your data as Data objects +spark_rdd = spark.sparkContext.parallelize(your_data) -input_data = { - "executor": { - "eval_group": "default", - "result_save": {"bad": True} - } -} -input_args = InputArgs(**input_data) -executor = Executor.exec_map["spark"](input_args, spark_session=spark, spark_rdd=spark_rdd) +executor = Executor.exec_map["spark"]( + input_args, + spark_session=spark, + spark_rdd=spark_rdd +) result = executor.execute() ``` +**Best For**: Production pipelines, distributed processing, datasets > 1M rows + ## Evaluation Reports After evaluation, Dingo generates: @@ -389,7 +606,6 @@ After evaluation, Dingo generates: Report Description: 1. **score**: `num_good` / `total` 2. **type_ratio**: The count of type / total, such as: `QUALITY_BAD_COMPLETENESS` / `total` -3. **name_ratio**: The count of name / total, such as: `QUALITY_BAD_COMPLETENESS-RuleColonEnd` / `total` Example summary: ```json @@ -405,32 +621,34 @@ Example summary: "num_bad": 1, "total": 2, "type_ratio": { - "QUALITY_BAD_COMPLETENESS": 0.5, - "QUALITY_BAD_RELEVANCE": 0.5 - }, - "name_ratio": { - "QUALITY_BAD_COMPLETENESS-RuleColonEnd": 0.5, - "QUALITY_BAD_RELEVANCE-RuleSpecialCharacter": 0.5 + "content": { + "QUALITY_BAD_COMPLETENESS.RuleColonEnd": 0.5, + "QUALITY_BAD_RELEVANCE.RuleSpecialCharacter": 0.5 + } } } ``` -# Future Plans +# 🚀 Roadmap & Contributions + +## Future Plans -- [ ] Richer graphic and text evaluation indicators -- [ ] Audio and video data modality evaluation -- [ ] Small model evaluation (fasttext, Qurating) -- [ ] Data diversity evaluation +- [ ] **Agent-as-a-Judge** - Multi-agent debate patterns for bias reduction and complex reasoning +- [ ] **SaaS Platform** - Hosted evaluation service with API access and dashboard +- [ ] **Audio & Video Modalities** - Extend beyond text/image +- [ ] **Diversity Metrics** - Statistical diversity assessment +- [ ] **Real-time Monitoring** - Continuous quality checks in production pipelines -# Limitations +## Limitations -The current built-in detection rules and model methods focus on common data quality problems. For specialized evaluation needs, we recommend customizing detection rules. +The current built-in detection rules and model methods primarily focus on common data quality issues. For special evaluation needs, we recommend customizing detection rules. # Acknowledgments - [RedPajama-Data](https://github.com/togethercomputer/RedPajama-Data) - [mlflow](https://github.com/mlflow/mlflow) - [deepeval](https://github.com/confident-ai/deepeval) +- [ragas](https://github.com/explodinggradients/ragas) # Contribution diff --git a/README_ja.md b/README_ja.md index 1a7170b9..5ddfc792 100644 --- a/README_ja.md +++ b/README_ja.md @@ -26,6 +26,7 @@ GitHub issues MseeP.ai Security Assessment Badge Ask DeepWiki + Trust Score

@@ -55,7 +56,23 @@ # はじめに -Dingoは、データセット内のデータ品質問題を自動的に検出するデータ品質評価ツールです。Dingoは様々な組み込みルールとモデル評価手法を提供し、カスタム評価手法もサポートしています。Dingoは一般的に使用されるテキストデータセットとマルチモーダルデータセット(事前学習データセット、ファインチューニングデータセット、評価データセットを含む)をサポートしています。さらに、DingoはローカルCLIやSDKなど複数の使用方法をサポートし、[OpenCompass](https://github.com/open-compass/opencompass)などの様々な評価プラットフォームに簡単に統合できます。 +**Dingo は包括的な AI データ、モデル、アプリケーション品質評価ツール**であり、機械学習エンジニア、データエンジニア、AI 研究者向けに設計されています。トレーニングデータ、ファインチューニングデータセット、本番 AI システムの品質を体系的に評価・改善するのを支援します。 + +## なぜ Dingo を選ぶのか? + +🎯 **本番グレードの品質チェック** - 事前学習データセットから RAG システムまで、AI に高品質なデータを提供 + +🗄️ **マルチソースデータ統合** - ローカルファイル、SQL データベース(PostgreSQL/MySQL/SQLite)、HuggingFace データセット、S3 ストレージへのシームレスな接続 + +🔍 **マルチフィールド評価** - 異なるフィールドに並行して異なる品質ルールを適用(例:`isbn` フィールドには ISBN 検証、`title` フィールドにはテキスト品質チェック) + +🤖 **RAG システム評価** - 5つの学術的裏付けのある指標で検索と生成品質を包括的に評価 + +🧠 **LLM とルールのハイブリッド** - 高速ヒューリスティックルール(30以上の組み込みルール)と LLM ベースの深度評価を組み合わせ + +🚀 **柔軟な実行** - ローカルで実行して迅速に反復、または Spark で数十億規模のデータセットにスケール + +📊 **豊富なレポート** - GUI 可視化とフィールドレベルの洞察を備えた詳細な品質レポート ## アーキテクチャ図 @@ -76,7 +93,7 @@ pip install dingo-python ```python from dingo.config.input_args import EvaluatorLLMArgs from dingo.io.input import Data -from dingo.model.llm.llm_text_quality_model_base import LLMTextQualityModelBase +from dingo.model.llm.text_quality.llm_text_quality_v4 import LLMTextQualityV4 from dingo.model.rule.rule_common import RuleEnterAndSpace data = Data( @@ -85,13 +102,14 @@ data = Data( content="Hello! The world is a vast and diverse place, full of wonders, cultures, and incredible natural beauty." ) + def llm(): - LLMTextQualityModelBase.dynamic_config = EvaluatorLLMArgs( + LLMTextQualityV4.dynamic_config = EvaluatorLLMArgs( key='YOUR_API_KEY', api_url='https://api.openai.com/v1/chat/completions', model='gpt-4o', ) - res = LLMTextQualityModelBase.eval(data) + res = LLMTextQualityV4.eval(data) print(res) @@ -114,11 +132,18 @@ input_data = { "format": "plaintext" # フォーマット: plaintext }, "executor": { - "eval_group": "sft", # SFTデータ用のルールセット "result_save": { "bad": True # 評価結果を保存 } - } + }, + "evaluator": [ + { + "evals": [ + {"name": "RuleColonEnd"}, + {"name": "RuleSpecialCharacter"} + ] + } + ] } input_args = InputArgs(**input_data) @@ -185,138 +210,262 @@ https://github.com/user-attachments/assets/aca26f4c-3f2e-445e-9ef9-9331c4d7a37b このビデオでは、Dingo MCPサーバーをCursorと一緒に使用する方法をステップバイステップで説明しています。 -# データ品質メトリクス +# 🎓 実務者のための重要概念 + +## Dingo を本番環境で使用できる理由 + +### 1. **マルチフィールド評価パイプライン** +1回の実行で異なるフィールドに異なる品質チェックを適用: +```python +"evaluator": [ + {"fields": {"content": "isbn"}, "evals": [{"name": "RuleIsbn"}]}, + {"fields": {"content": "title"}, "evals": [{"name": "RuleAbnormalChar"}]}, + {"fields": {"content": "description"}, "evals": [{"name": "LLMTextQualityV5"}]} +] +``` +**重要性**:各フィールドごとに別々のスクリプトを書かずに構造化データ(データベーステーブルなど)を評価できます。 + +### 2. **大規模データセットのストリーミング処理** +SQL データソースは SQLAlchemy のサーバーサイドカーソルを使用: +```python +# メモリオーバーフローなしで数十億行を処理 +for data in dataset.get_data(): # 1行ずつyield + result = evaluator.eval(data) +``` +**重要性**:中間ファイルにエクスポートすることなく本番データベースを処理できます。 + +### 3. **メモリ内フィールド分離** +RAG 評価は異なるフィールド組み合わせ間のコンテキストリークを防止: +``` +outputs/ +├── user_input,response,retrieved_contexts/ # Faithfulness グループ +└── user_input,response/ # Answer Relevancy グループ +``` +**重要性**:複数のフィールド組み合わせを評価する際のメトリクス計算の正確性を保証。 + +### 4. **ルール-LLM ハイブリッド戦略** +高速ルール(100% カバレッジ)とサンプリング LLM チェック(10% カバレッジ)を組み合わせ: +```python +"evals": [ + {"name": "RuleAbnormalChar"}, # 高速、全データで実行 + {"name": "LLMTextQualityV5"} # コスト高、必要に応じてサンプリング +] +``` +**重要性**:本番規模の評価でコストとカバレッジのバランスを取る。 + +### 5. **登録による拡張性** +カスタムルール、プロンプト、モデルのための明確なプラグインアーキテクチャ: +```python +@Model.rule_register('QUALITY_BAD_CUSTOM', ['default']) +class MyCustomRule(BaseRule): + @classmethod + def eval(cls, input_data: Data) -> EvalDetail: + # 例:コンテンツが空かチェック + if not input_data.content: + return EvalDetail( + metric=cls.__name__, + status=True, # 問題を発見 + label=[f'{cls.metric_type}.{cls.__name__}'], + reason=["コンテンツが空です"] + ) + return EvalDetail( + metric=cls.__name__, + status=False, # 問題なし + label=['QUALITY_GOOD'] + ) +``` +**重要性**:コードベースをフォークせずにドメイン固有のニーズに適応。 + +--- + +# 📚 データ品質メトリクス -Dingoはルールベースおよびプロンプトベースの評価メトリクスを通じて包括的なデータ品質評価を提供します。これらのメトリクスは、効果性、完全性、類似性、セキュリティなどの複数の品質次元をカバーしています。 +Dingo は **70以上の評価メトリクス**を提供し、複数の次元にわたってルールベースの速度と LLM ベースの深度を組み合わせます。 -📊 **[完全なメトリクス文書を表示 →](docs/metrics.md)** +## メトリクスカテゴリ -評価システムには以下が含まれます: -- **テキスト品質評価メトリクス**: DataMan手法と拡張された多次元評価を使用した事前学習データの品質評価 -- **SFTデータ評価メトリクス**: 教師ありファインチューニングデータの正直、有用、無害評価 -- **分類メトリクス**: トピック分類とコンテンツ分類 -- **マルチモーダル評価メトリクス**: 画像分類と関連性評価 -- **ルールベース品質メトリクス**: ヒューリスティックルールによる効果性と類似性検出を用いた自動品質チェック -- **事実性評価メトリクス**: GPT-5 System Cardに基づく二段階事実性評価 -- など +| カテゴリ | 例 | 使用例 | +|----------|----------|----------| +| **事前学習テキスト品質** | 完全性、有効性、類似性、セキュリティ | LLM 事前学習データフィルタリング | +| **SFT データ品質** | 正直、有用、無害 (3H) | 指示ファインチューニングデータ | +| **RAG 評価** | 忠実度、コンテキスト精度、答え関連性 | RAG システム評価 | +| **幻覚検出** | HHEM-2.1-Open、事実性チェック | 本番 AI 信頼性 | +| **分類** | トピック分類、コンテンツラベリング | データ整理 | +| **マルチモーダル** | 画像テキスト関連性、VLM 品質 | ビジュアル言語データ | +| **セキュリティ** | PII 検出、Perspective API 毒性 | プライバシーと安全性 | -大部分のメトリクスは学術的なソースによって支持されており、客観性と科学的厳密性を保証しています。 +📊 **[完全なメトリクス文書を表示 →](docs/metrics.md)** +📖 **[RAG 評価ガイド →](docs/rag_evaluation_metrics_zh.md)** +🔍 **[幻覚検出ガイド →](docs/hallucination_guide.md)** +✅ **[事実性評価ガイド →](docs/factcheck_guide.md)** -### 評価でのLLM評価の使用 +大部分のメトリクスは学術研究に裏付けられており、科学的厳密性を確保しています。 -これらの評価プロンプトを評価で使用するには、設定で指定します: +## メトリクスの迅速な使用 ```python +llm_config = { + "model": "gpt-4o", + "key": "YOUR_API_KEY", + "api_url": "https://api.openai.com/v1/chat/completions" +} + input_data = { - # Other parameters... - "executor": { - "prompt_list": ["QUALITY_BAD_SIMILARITY"], # Specific prompt to use - }, - "evaluator": { - "llm_config": { - "LLMTextQualityPromptBase": { # LLM model to use - "model": "gpt-4o", - "key": "YOUR_API_KEY", - "api_url": "https://api.openai.com/v1/chat/completions" - } + "evaluator": [ + { + "fields": {"content": "content"}, + "evals": [ + {"name": "RuleAbnormalChar"}, # ルールベース(高速) + {"name": "LLMTextQualityV5", "config": llm_config} # LLMベース(深度) + ] } - } + ] } ``` -これらのプロンプトは、特定の品質次元に焦点を当てたり、特定のドメイン要件に適応させるためにカスタマイズできます。適切なLLMモデルと組み合わせることで、これらのプロンプトは複数の次元にわたる包括的なデータ品質評価を可能にします。 +**カスタマイズ**:すべてのプロンプトは `dingo/model/llm/` ディレクトリに定義されています(カテゴリ別に整理:`text_quality/`、`rag/`、`hhh/` など)。ドメイン固有のニーズに合わせて拡張または変更できます。 -### 幻覚検出とRAGシステム評価 -HHEM-2.1-Openローカル推論とLLMベース評価を含む、Dingoの幻覚検出機能の使用に関する詳細なガイダンス: +# 🌟 機能ハイライト -📖 **[幻覚検出ガイドを見る →](docs/hallucination_guide.md)** +## 📊 マルチソースデータ統合 -### 事実性評価 +**多様なデータソース** - データがある場所に接続 +✅ **ローカルファイル**:JSONL、CSV、TXT、Parquet +✅ **SQL データベース**:PostgreSQL、MySQL、SQLite、Oracle、SQL Server(ストリーミング処理対応) +✅ **クラウドストレージ**:S3 および S3 互換ストレージ +✅ **ML プラットフォーム**:HuggingFace データセットの直接統合 -Dingoの二段階事実性評価システムの使用に関する詳細なガイダンス: +**エンタープライズ対応 SQL サポート** - 本番データベース統合 +✅ 数十億規模のデータセットのメモリ効率的なストリーミング +✅ 接続プールと自動リソースクリーンアップ +✅ 複雑な SQL クエリ(JOIN、WHERE、集計) +✅ SQLAlchemy による複数の方言サポート -📖 **[事実性評価ガイドを見る →](docs/factcheck_guide.md)** +**マルチフィールド品質チェック** - 異なるフィールドに異なるルール +✅ 並列評価パイプライン(例:ISBN 検証 + テキスト品質を同時実行) +✅ フィールドエイリアスとネストされたフィールド抽出(`user.profile.name`) +✅ フィールドごとに独立した結果レポート +✅ 柔軟なデータ変換のための ETL パイプラインアーキテクチャ -# ルールグループ +--- -Dingoは異なるタイプのデータセット用に事前設定されたルールグループを提供します: +## 🤖 RAG システム評価 -| グループ | 使用例 | ルール例 | -|----------|--------|----------| -| `default` | 一般的なテキスト品質 | `RuleColonEnd`, `RuleContentNull`, `RuleDocRepeat`など | -| `sft` | ファインチューニングデータセット | `default`のルールに加えて幻覚検出用の`RuleHallucinationHHEM` | -| `rag` | RAGシステム評価 | 応答一貫性検出用の`RuleHallucinationHHEM`, `PromptHallucination` | -| `hallucination` | 幻覚検出 | LLMベース評価の`PromptHallucination` | -| `pretrain` | 事前学習データセット | `RuleAlphaWords`, `RuleCapitalWords`などを含む20以上のルールの包括的セット | +**5つの学術的裏付けのある指標** - RAGAS、DeepEval、TruLens 研究に基づく +✅ **忠実度(Faithfulness)**:答え-コンテキストの一貫性(幻覚検出) +✅ **答え関連性(Answer Relevancy)**:答え-クエリの整合性 +✅ **コンテキスト精度(Context Precision)**:検索精度 +✅ **コンテキスト再現率(Context Recall)**:検索再現率 +✅ **コンテキスト関連性(Context Relevancy)**:コンテキスト-クエリ関連性 -特定のルールグループを使用するには: +**包括的なレポート** - 自動集計統計 +✅ 各メトリクスの平均、最小、最大、標準偏差 +✅ フィールド別にグループ化された結果 +✅ バッチおよび単一評価モード -```python -input_data = { - "executor": { - "eval_group": "sft", # Use "default", "sft", "rag", "hallucination", or "pretrain" - } - # other parameters... -} -``` +📖 **[RAG 評価ガイドを見る →](docs/rag_evaluation_metrics_zh.md)** + +--- -# 機能ハイライト +## 🧠 ハイブリッド評価システム -## マルチソース・マルチモーダルサポート +**ルールベース** - 高速、決定論的、コスト効率 +✅ 30以上の組み込みルール(テキスト品質、フォーマット、PII 検出) +✅ 正規表現、ヒューリスティック、統計チェック +✅ カスタムルール登録 -- **データソース**: ローカルファイル、Hugging Faceデータセット、S3ストレージ -- **データタイプ**: 事前学習、ファインチューニング、評価データセット -- **データモダリティ**: テキストと画像 +**LLM ベース** - 深い意味理解 +✅ OpenAI(GPT-4o、GPT-3.5)、DeepSeek、Kimi +✅ ローカルモデル(Llama3、Qwen) +✅ ビジョン言語モデル(InternVL、Gemini) +✅ カスタムプロンプト登録 -## ルールベース・モデルベース評価 +**拡張可能なアーキテクチャ** +✅ プラグインベースのルール/プロンプト/モデル登録 +✅ 明確な関心の分離(エージェント、ツール、オーケストレーション) +✅ ドメイン固有のカスタマイズ -評価システムには以下が含まれます: -- **テキスト品質評価メトリクス**: DataMan手法と拡張された多次元評価を使用した事前学習データの品質評価 -- **SFTデータ評価メトリクス**: 教師ありファインチューニングデータの正直、有用、無害評価 -- **幻覚検出**: HHEM-2.1-OpenローカルモデルとGPTベースの評価 -- **RAGシステム評価**: 応答一貫性とコンテキスト整合性評価 -- **分類メトリクス**: トピック分類とコンテンツ分類 -- **マルチモーダル評価メトリクス**: 画像分類と関連性評価 -- **ルールベース品質メトリクス**: ヒューリスティックルールによる効果性と類似性検出を用いた自動品質チェック +--- -## 柔軟な使用方法 +## 🚀 柔軟な実行と統合 -- **インターフェース**: CLIとSDKオプション -- **統合**: 他のプラットフォームとの簡単な統合 -- **実行エンジン**: ローカルとSpark +**複数のインターフェース** +✅ 迅速なチェックのための CLI +✅ 統合のための Python SDK +✅ IDE 用 MCP(モデルコンテキストプロトコル)サーバー(Cursor など) -## 包括的なレポート +**スケーラブルな実行** +✅ 迅速な反復のためのローカル実行 +✅ 分散処理のための Spark 実行 +✅ 設定可能な並行性とバッチ処理 -- **品質メトリクス**: 7次元品質評価 -- **トレーサビリティ**: 異常追跡のための詳細レポート +**データソース** +✅ **ローカルファイル**:JSONL、CSV、TXT、Parquet フォーマット +✅ **Hugging Face**:HF データセットハブとの直接統合 +✅ **S3 ストレージ**:AWS S3 および S3 互換ストレージ +✅ **SQL データベース**:PostgreSQL、MySQL、SQLite、Oracle、SQL Server(大規模データのストリーミング処理) -# ユーザーガイド +**モダリティ** +✅ テキスト(チャット、ドキュメント、コード) +✅ 画像(VLM サポート) +✅ マルチモーダル(テキスト+画像の一貫性) + +--- + +## 📈 豊富なレポートと可視化 + +**多層レポート** +✅ 全体スコア付き Summary JSON +✅ フィールドレベルの内訳 +✅ ルール違反ごとの詳細情報 +✅ タイプと名前の分布 + +**GUI 可視化** +✅ 組み込み Web インターフェース +✅ インタラクティブなデータ探索 +✅ 異常追跡 + +**メトリクス集計** +✅ 自動統計(avg、min、max、std_dev) +✅ フィールド別にグループ化されたメトリクス +✅ 全体品質スコア + +# 📖 ユーザーガイド ## カスタムルール、プロンプト、モデル -組み込みルールが要件を満たさない場合、カスタムルールを作成できます: +Dingo はドメイン固有のニーズに対応する柔軟な拡張メカニズムを提供します。 + +**例:** +- [カスタムルール](examples/register/sdk_register_rule.py) +- [カスタムモデル](examples/register/sdk_register_llm.py) ### カスタムルール例 ```python from dingo.model import Model from dingo.model.rule.base import BaseRule -from dingo.config.input_args import EvaluatorRuleArgs from dingo.io import Data -from dingo.model.modelres import ModelRes +from dingo.io.output.eval_detail import EvalDetail -@Model.rule_register('QUALITY_BAD_RELEVANCE', ['default']) -class MyCustomRule(BaseRule): - """テキスト内のカスタムパターンをチェック""" - - dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here') +@Model.rule_register('QUALITY_BAD_CUSTOM', ['default']) +class DomainSpecificRule(BaseRule): + """ドメイン固有のパターンをチェック""" @classmethod - def eval(cls, input_data: Data) -> ModelRes: - res = ModelRes() - # ここにルール実装 - return res + def eval(cls, input_data: Data) -> EvalDetail: + text = input_data.content + + # あなたのカスタムロジック + is_valid = your_validation_logic(text) + + return EvalDetail( + metric=cls.__name__, + status=not is_valid, # False = 良好, True = 問題あり + label=['QUALITY_GOOD' if is_valid else 'QUALITY_BAD_CUSTOM'], + reason=["検証の詳細..."] + ) ``` ### カスタムLLM統合 @@ -333,7 +482,6 @@ class MyCustomModel(BaseOpenAI): 詳細な例については以下をご覧ください: - [ルール登録](examples/register/sdk_register_rule.py) -- [プロンプト登録](examples/register/sdk_register_prompt.py) - [モデル登録](examples/register/sdk_register_llm.py) ## 実行エンジン @@ -363,13 +511,21 @@ from pyspark.sql import SparkSession # Sparkを初期化 spark = SparkSession.builder.appName("Dingo").getOrCreate() -spark_rdd = spark.sparkContext.parallelize([...]) # MetaDataオブジェクトとしてのデータ +spark_rdd = spark.sparkContext.parallelize([...]) # Dataオブジェクトとしてのデータ input_data = { "executor": { - "eval_group": "default", "result_save": {"bad": True} - } + }, + "evaluator": [ + { + "fields": {"content": "content"}, + "evals": [ + {"name": "RuleColonEnd"}, + {"name": "RuleSpecialCharacter"} + ] + } + ] } input_args = InputArgs(**input_data) executor = Executor.exec_map["spark"](input_args, spark_session=spark, spark_rdd=spark_rdd) @@ -386,7 +542,6 @@ result = executor.execute() レポートの説明: 1. **score**: `num_good` / `total` 2. **type_ratio**: タイプの数 / 総数, 例: `QUALITY_BAD_COMPLETENESS` / `total` -3. **name_ratio**: 名前の数 / 総数, 例: `QUALITY_BAD_COMPLETENESS-RuleColonEnd` / `total` サマリー例: ```json @@ -402,24 +557,24 @@ result = executor.execute() "num_bad": 1, "total": 2, "type_ratio": { - "QUALITY_BAD_COMPLETENESS": 0.5, - "QUALITY_BAD_RELEVANCE": 0.5 - }, - "name_ratio": { - "QUALITY_BAD_COMPLETENESS-RuleColonEnd": 0.5, - "QUALITY_BAD_RELEVANCE-RuleSpecialCharacter": 0.5 + "content": { + "QUALITY_BAD_COMPLETENESS.RuleColonEnd": 0.5, + "QUALITY_BAD_RELEVANCE.RuleSpecialCharacter": 0.5 + } } } ``` -# 今後の計画 +# 🔮 今後の計画 -- [ ] より豊富なグラフィックとテキスト評価指標 -- [ ] 音声・動画データモダリティ評価 -- [ ] 小規模モデル評価(fasttext、Qurating) -- [ ] データ多様性評価 +**近日公開予定の機能**: +- [ ] **Agent-as-a-Judge** - 多ラウンド反復評価 +- [ ] **SaaS プラットフォーム** - API アクセスとダッシュボードを備えたホスト型評価サービス +- [ ] **音声・動画モダリティ** - テキスト/画像を超えた拡張 +- [ ] **多様性メトリクス** - 統計的多様性評価 +- [ ] **リアルタイム監視** - 本番パイプラインでの継続的品質チェック -# 制限事項 +## 制限事項 現在の組み込み検出ルールとモデル手法は、一般的なデータ品質問題に焦点を当てています。専門的な評価ニーズについては、検出ルールのカスタマイズを推奨します。 @@ -428,6 +583,7 @@ result = executor.execute() - [RedPajama-Data](https://github.com/togethercomputer/RedPajama-Data) - [mlflow](https://github.com/mlflow/mlflow) - [deepeval](https://github.com/confident-ai/deepeval) +- [ragas](https://github.com/explodinggradients/ragas) # 貢献 diff --git a/README_zh-CN.md b/README_zh-CN.md index e364332c..b92615f3 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -26,8 +26,11 @@ GitHub 问题 MseeP.ai 安全评估徽章 Ask DeepWiki + Trust Score

+ +
@@ -53,29 +56,45 @@ # Dingo 介绍 -Dingo是一款数据质量评估工具,帮助你自动化检测数据集中的数据质量问题。Dingo提供了多种内置的规则和模型评估方法,同时也支持自定义评估方法。Dingo支持常用的文本数据集和多模态数据集,包括预训练数据集、微调数据集和评测数据集。此外,Dingo支持多种使用方式,包括本地CLI和SDK,便于集成到各种评测平台,如[OpenCompass](https://github.com/open-compass/opencompass)等。 +**Dingo 是一款全面的 AI 数据、模型和应用质量评估工具**,专为机器学习工程师、数据工程师和 AI 研究人员设计。它帮助你系统化地评估和改进训练数据、微调数据集和生产AI系统的质量。 + +## 为什么选择 Dingo? + +🎯 **生产级质量检查** - 从预训练数据集到 RAG 系统,确保你的 AI 获得高质量数据 + +🗄️ **多数据源集成** - 无缝连接本地文件、SQL 数据库(PostgreSQL/MySQL/SQLite)、HuggingFace 数据集和 S3 存储 + +🔍 **多字段评估** - 对不同字段并行应用不同的质量规则(例如:对 `isbn` 字段进行 ISBN 验证,对 `title` 字段进行文本质量检查) -## 1. 架构图 +🤖 **RAG 系统评估** - 使用 5 个学术支持的指标全面评估检索和生成质量 + +🧠 **LLM、规则和智能体混合** - 结合快速启发式规则(30+ 内置规则)和基于 LLM 的深度评估 + +🚀 **灵活执行** - 本地运行快速迭代,或使用 Spark 扩展到数十亿级数据集 + +📊 **丰富报告** - 详细的质量报告,带有 GUI 可视化和字段级洞察 + +## 架构图 ![Architecture of dingo](./docs/assets/architeture.png) # 快速启动 -## 1. 安装 +## 安装 ```shell pip install dingo-python ``` -## 2. Dingo 使用示例 +## Dingo 使用示例 -### 2.1 评估LLM对话数据 +### 1. 评估LLM对话数据 ```python from dingo.config.input_args import EvaluatorLLMArgs from dingo.io.input import Data -from dingo.model.llm.llm_text_quality_model_base import LLMTextQualityModelBase +from dingo.model.llm.text_quality.llm_text_quality_v4 import LLMTextQualityV4 from dingo.model.rule.rule_common import RuleEnterAndSpace data = Data( @@ -86,12 +105,12 @@ data = Data( def llm(): - LLMTextQualityModelBase.dynamic_config = EvaluatorLLMArgs( + LLMTextQualityV4.dynamic_config = EvaluatorLLMArgs( key='YOUR_API_KEY', api_url='https://api.openai.com/v1/chat/completions', model='gpt-4o', ) - res = LLMTextQualityModelBase.eval(data) + res = LLMTextQualityV4.eval(data) print(res) @@ -100,7 +119,7 @@ def rule(): print(res) ``` -### 2.2 评估数据集 +### 2. 评估数据集 ```python from dingo.config import InputArgs @@ -114,11 +133,18 @@ input_data = { "format": "plaintext" # 格式: plaintext }, "executor": { - "eval_group": "sft", # SFT数据的规则集 "result_save": { "bad": True # 保存评估结果 } - } + }, + "evaluator": [ + { + "evals": [ + {"name": "RuleColonEnd"}, + {"name": "RuleSpecialCharacter"} + ] + } + ] } input_args = InputArgs(**input_data) @@ -127,21 +153,21 @@ result = executor.execute() print(result) ``` -## 3. 命令行界面 +## 命令行界面 -### 3.1 使用规则集评估 +### 使用规则集评估 ```shell python -m dingo.run.cli --input test/env/local_plaintext.json ``` -### 3.2 使用LLM评估(例如GPT-4o) +### 使用LLM评估(例如GPT-4o) ```shell python -m dingo.run.cli --input test/env/local_json.json ``` -## 4. 图形界面可视化 +## 图形界面可视化 进行评估后(设置`result_save.bad=True`),系统会自动生成前端页面。若要手动启动前端页面,请运行: @@ -153,10 +179,10 @@ python -m dingo.run.vsl --input 输出目录 ![GUI output](docs/assets/dingo_gui.png) -## 5. 在线演示 +## 在线演示 尝试我们的在线演示: [(Hugging Face)🤗](https://huggingface.co/spaces/DataEval/dingo) -## 6. 本地演示 +## 本地演示 尝试我们的本地演示: ```shell @@ -166,7 +192,7 @@ python app.py ![Gradio demo](docs/assets/gradio_demo.png) -## 7. Google Colab 演示 +## Google Colab 演示 通过Google Colab笔记本交互式体验Dingo:[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DataEval/dingo/blob/dev/examples/colab/dingo_colab_demo.ipynb) @@ -186,139 +212,271 @@ https://github.com/user-attachments/assets/aca26f4c-3f2e-445e-9ef9-9331c4d7a37b 此视频展示了关于 Dingo MCP 服务端与 Cursor 一起使用的分步演示。 -# 数据质量指标 +# 🎓 实践者关键概念 + +## 让 Dingo 适用于生产环境的原因? + +### 1. **多字段评估流水线** +在单次运行中对不同字段应用不同的质量检查: +```python +"evaluator": [ + {"fields": {"content": "isbn"}, "evals": [{"name": "RuleIsbn"}]}, + {"fields": {"content": "title"}, "evals": [{"name": "RuleAbnormalChar"}]}, + {"fields": {"content": "description"}, "evals": [{"name": "LLMTextQualityV5"}]} +] +``` +**为什么重要**:无需为每个字段编写单独的脚本即可评估结构化数据(如数据库表)。 + +### 2. **大数据集流式处理** +SQL 数据源使用 SQLAlchemy 的服务器端游标: +```python +# 处理数十亿行数据而不会内存溢出 +for data in dataset.get_data(): # 每次yield一行 + result = evaluator.eval(data) +``` +**为什么重要**:无需导出到中间文件即可处理生产数据库。 + +### 3. **内存中的字段隔离** +RAG 评估防止不同字段组合之间的上下文泄漏: +``` +outputs/ +├── user_input,response,retrieved_contexts/ # Faithfulness 组 +└── user_input,response/ # Answer Relevancy 组 +``` +**为什么重要**:评估多个字段组合时保证指标计算准确。 + +### 4. **混合规则-LLM 策略** +结合快速规则(100% 覆盖)和采样 LLM 检查(10% 覆盖): +```python +"evals": [ + {"name": "RuleAbnormalChar"}, # 快速,在所有数据上运行 + {"name": "LLMTextQualityV5"} # 昂贵,按需采样 +] +``` +**为什么重要**:平衡生产规模评估的成本和覆盖率。 + +### 5. **通过注册实现可扩展性** +清晰的插件架构用于自定义规则、prompt 和模型: +```python +@Model.rule_register('QUALITY_BAD_CUSTOM', ['default']) +class MyCustomRule(BaseRule): + @classmethod + def eval(cls, input_data: Data) -> EvalDetail: + # 示例:检查内容是否为空 + if not input_data.content: + return EvalDetail( + metric=cls.__name__, + status=True, # 发现问题 + label=[f'{cls.metric_type}.{cls.__name__}'], + reason=["内容为空"] + ) + return EvalDetail( + metric=cls.__name__, + status=False, # 未发现问题 + label=['QUALITY_GOOD'] + ) +``` +**为什么重要**:适应特定领域需求而无需分叉代码库。 + +--- -Dingo通过基于规则和基于提示的评估指标提供全面的数据质量评估。这些指标涵盖多个质量维度,包括有效性、完整性、相似性、安全性等。 +# 📚 数据质量指标 -📊 **[查看完整指标文档 →](docs/metrics.md)** +Dingo 提供 **70+ 评估指标**,跨多个维度,结合基于规则的速度和基于 LLM 的深度。 -我们的评估系统包括: -- **文本质量评估指标**:使用DataMan方法论和增强的多维评估进行预训练数据质量评估 -- **SFT数据评估指标**:针对监督微调数据的诚实、有帮助、无害评估 -- **分类指标**:主题分类和内容分类 -- **多模态评估指标**:图像分类和相关性评估 -- **基于规则的质量指标**:使用启发式规则进行效果性和相似性检测的自动化质量检查 -- **事实性评估指标**:基于 GPT-5 System Card 的两阶段事实性评估 -- 等等 +## 指标类别 -大部分指标都由学术来源支持,以确保客观性和科学严谨性。 +| 类别 | 示例 | 使用场景 | +|----------|----------|----------| +| **预训练文本质量** | 完整性、有效性、相似性、安全性 | LLM 预训练数据过滤 | +| **SFT 数据质量** | 诚实、有帮助、无害 (3H) | 指令微调数据 | +| **RAG 评估** | 忠实度、上下文精度、答案相关性 | RAG 系统评估 | +| **幻觉检测** | HHEM-2.1-Open、事实性检查 | 生产 AI 可靠性 | +| **分类** | 主题分类、内容标注 | 数据组织 | +| **多模态** | 图文相关性、VLM 质量 | 视觉语言数据 | +| **安全性** | PII 检测、Perspective API 毒性 | 隐私和安全 | -### 在评估中使用LLM评估 +📊 **[查看完整指标文档 →](docs/metrics.md)** +📖 **[RAG 评估指南 →](docs/rag_evaluation_metrics_zh.md)** +🔍 **[幻觉检测指南 →](docs/hallucination_guide.md)** +✅ **[事实性评估指南 →](docs/factcheck_guide.md)** -要在评估中使用这些评估prompt,请在配置中指定它们: +大部分指标都有学术研究支持,以确保科学严谨性。 + +## 快速使用指标 ```python +llm_config = { + "model": "gpt-4o", + "key": "YOUR_API_KEY", + "api_url": "https://api.openai.com/v1/chat/completions" +} + input_data = { - # Other parameters... - "executor": { - "prompt_list": ["QUALITY_BAD_SIMILARITY"], # Specific prompt to use - }, - "evaluator": { - "llm_config": { - "LLMTextQualityPromptBase": { # LLM model to use - "model": "gpt-4o", - "key": "YOUR_API_KEY", - "api_url": "https://api.openai.com/v1/chat/completions" - } + "evaluator": [ + { + "fields": {"content": "content"}, + "evals": [ + {"name": "RuleAbnormalChar"}, # 基于规则(快速) + {"name": "LLMTextQualityV5", "config": llm_config} # 基于LLM(深度) + ] } - } + ] } ``` -您可以自定义这些prompt,以关注特定的质量维度或适应特定的领域需求。当与适当的LLM模型结合时,这些prompt能够在多个维度上对数据质量进行全面评估。 +**自定义**:所有 prompts 都定义在 `dingo/model/llm/` 目录中(按类别组织:`text_quality/`、`rag/`、`hhh/` 等)。可针对特定领域需求进行扩展或修改。 -### 幻觉检测和RAG系统评估 -有关使用Dingo幻觉检测功能的详细指导,包括HHEM-2.1-Open本地推理和基于LLM的评估: +# 🌟 功能亮点 -📖 **[查看幻觉检测指南 →](docs/hallucination_guide.md)** +## 📊 多源数据集成 -### 事实性评估 +**多样化数据源** - 连接到你的数据所在之处 +✅ **本地文件**:JSONL、CSV、TXT、Parquet +✅ **SQL 数据库**:PostgreSQL、MySQL、SQLite、Oracle、SQL Server(支持流式处理) +✅ **云存储**:S3 和 S3 兼容存储 +✅ **ML 平台**:直接集成 HuggingFace 数据集 -有关使用Dingo两阶段事实性评估系统的详细指导: +**企业级 SQL 支持** - 生产数据库集成 +✅ 数十亿级数据集的内存高效流式处理 +✅ 连接池和自动资源清理 +✅ 复杂 SQL 查询(JOIN、WHERE、聚合) +✅ 通过 SQLAlchemy 支持多种方言 -📖 **[查看事实性评估指南 →](docs/factcheck_guide.md)** +**多字段质量检查** - 不同字段使用不同规则 +✅ 并行评估流水线(例如:ISBN 验证 + 文本质量同时进行) +✅ 字段别名和嵌套字段提取(`user.profile.name`) +✅ 每个字段独立结果报告 +✅ 灵活数据转换的 ETL 流水线架构 -# 规则组 +--- -Dingo为不同类型的数据集提供预配置的规则组: +## 🤖 RAG 系统评估 -| 组名 | 用例 | 示例规则 | -|-------|----------|---------------| -| `default` | 通用文本质量 | `RuleColonEnd`, `RuleContentNull`, `RuleDocRepeat`等 | -| `sft` | 微调数据集 | `default`中的规则加上用于幻觉检测的`RuleHallucinationHHEM` | -| `rag` | RAG系统评估 | 用于响应一致性检测的`RuleHallucinationHHEM`, `PromptHallucination` | -| `hallucination` | 幻觉检测 | 基于LLM评估的`PromptHallucination` | -| `pretrain` | 预训练数据集 | 包括`RuleAlphaWords`, `RuleCapitalWords`等20多条规则的全面集合 | +**5 个学术支持的指标** - 基于 RAGAS、DeepEval、TruLens 研究 +✅ **忠实度(Faithfulness)**:答案-上下文一致性(幻觉检测) +✅ **答案相关性(Answer Relevancy)**:答案-查询对齐 +✅ **上下文精度(Context Precision)**:检索精度 +✅ **上下文召回(Context Recall)**:检索召回 +✅ **上下文相关性(Context Relevancy)**:上下文-查询相关性 -使用特定规则组: +**全面报告** - 自动聚合统计 +✅ 每个指标的平均值、最小值、最大值、标准差 +✅ 按字段分组的结果 +✅ 批量和单次评估模式 -```python -input_data = { - "executor": { - "eval_group": "sft", # Use "default", "sft", "rag", "hallucination", or "pretrain" - } - # other parameters... -} -``` +📖 **[查看 RAG 评估指南 →](docs/rag_evaluation_metrics_zh.md)** + +--- + +## 🧠 混合评估系统 + +**基于规则** - 快速、确定性、成本效益高 +✅ 30+ 内置规则(文本质量、格式、PII 检测) +✅ 正则表达式、启发式、统计检查 +✅ 自定义规则注册 + +**基于 LLM** - 深度语义理解 +✅ OpenAI(GPT-4o、GPT-3.5)、DeepSeek、Kimi +✅ 本地模型(Llama3、Qwen) +✅ 视觉语言模型(InternVL、Gemini) +✅ 自定义 prompt 注册 + +**基于智能体** - 多步推理与工具 +✅ 网络搜索集成(Tavily) +✅ 自适应上下文收集 +✅ 多源事实验证 +✅ 自定义智能体与工具注册 + +**可扩展架构** +✅ 基于插件的规则/prompt/模型注册 +✅ 清晰的关注点分离(agents、tools、orchestration) +✅ 特定领域定制 + +--- + +## 🚀 灵活执行与集成 -# 功能亮点 +**多种接口** +✅ CLI 用于快速检查 +✅ Python SDK 用于集成 +✅ MCP(模型上下文协议)服务器用于 IDE(Cursor 等) -## 1. 多源和多模态支持 +**可扩展执行** +✅ 本地执行器用于快速迭代 +✅ Spark 执行器用于分布式处理 +✅ 可配置并发和批处理 -- **数据源**:本地文件、Hugging Face数据集、S3存储 -- **数据类型**:预训练、微调和评估数据集 -- **数据模态**:文本和图像 +**数据源** +✅ **本地文件**:JSONL、CSV、TXT、Parquet 格式 +✅ **Hugging Face**:直接与 HF 数据集中心集成 +✅ **S3 存储**:AWS S3 和 S3 兼容存储 +✅ **SQL 数据库**:PostgreSQL、MySQL、SQLite、Oracle、SQL Server(大规模数据流式处理) -## 2. 基于规则和模型的评估 +**模态** +✅ 文本(聊天、文档、代码) +✅ 图像(支持 VLM) +✅ 多模态(文本+图像一致性) -- **内置规则**:20多种通用启发式评估规则 -- **LLM集成**:OpenAI、Kimi和本地模型(如Llama3) -- **幻觉检测**:HHEM-2.1-Open本地模型和基于GPT的评估 -- **RAG系统评估**:响应一致性和上下文对齐评估 -- **自定义规则**:轻松扩展自己的规则和模型 -- **安全评估**:Perspective API集成 +--- -## 3. 灵活的使用方式 +## 📈 丰富的报告和可视化 -- **接口**:CLI和SDK选项 -- **集成**:易于与其他平台集成 -- **执行引擎**:本地和Spark +**多层级报告** +✅ 带有总体评分的 Summary JSON +✅ 字段级分解 +✅ 每条规则违规的详细信息 +✅ 类型和名称分布 -## 4. 全面报告 +**GUI 可视化** +✅ 内置 Web 界面 +✅ 交互式数据探索 +✅ 异常追踪 -- **质量指标**:7维质量评估 -- **可追溯性**:异常追踪的详细报告 +**指标聚合** +✅ 自动统计(avg、min、max、std_dev) +✅ 按字段分组的指标 +✅ 总体质量评分 -# 使用指南 +# 📖 用户指南 -## 1. 自定义规则、Prompt和模型 +## 自定义规则、Prompt 和模型 -如果内置规则不满足您的需求,您可以创建自定义规则: +Dingo 提供灵活的扩展机制来满足特定领域需求。 -### 1.1 自定义规则示例 +**示例:** +- [自定义规则](examples/register/sdk_register_rule.py) +- [自定义模型](examples/register/sdk_register_llm.py) + +### 自定义规则示例 ```python from dingo.model import Model from dingo.model.rule.base import BaseRule -from dingo.config.input_args import EvaluatorRuleArgs from dingo.io import Data -from dingo.model.modelres import ModelRes - -@Model.rule_register('QUALITY_BAD_RELEVANCE', ['default']) -class MyCustomRule(BaseRule): - """检查文本中的自定义模式""" +from dingo.io.output.eval_detail import EvalDetail - dynamic_config = EvaluatorRuleArgs(pattern=r'your_pattern_here') +@Model.rule_register('QUALITY_BAD_CUSTOM', ['default']) +class DomainSpecificRule(BaseRule): + """检查特定领域的模式""" @classmethod - def eval(cls, input_data: Data) -> ModelRes: - res = ModelRes() - # 您的规则实现 - return res + def eval(cls, input_data: Data) -> EvalDetail: + text = input_data.content + + # 你的自定义逻辑 + is_valid = your_validation_logic(text) + + return EvalDetail( + metric=cls.__name__, + status=not is_valid, # False = 良好, True = 有问题 + label=['QUALITY_GOOD' if is_valid else 'QUALITY_BAD_CUSTOM'], + reason=["验证详情..."] + ) ``` -### 1.2 自定义LLM集成 +### 自定义LLM集成 ```python from dingo.model import Model @@ -332,12 +490,70 @@ class MyCustomModel(BaseOpenAI): 查看更多示例: - [注册规则](examples/register/sdk_register_rule.py) -- [注册Prompts](examples/register/sdk_register_prompt.py) - [注册模型](examples/register/sdk_register_llm.py) -## 2. 执行引擎 +### 智能体评估与工具 -### 2.1 本地执行 +Dingo 支持基于智能体的评估器,可以使用外部工具进行多步推理和自适应上下文收集: + +```python +from dingo.io import Data +from dingo.io.output.eval_detail import EvalDetail +from dingo.model import Model +from dingo.model.llm.agent.base_agent import BaseAgent + +@Model.llm_register('MyAgent') +class MyAgent(BaseAgent): + """支持工具的自定义智能体""" + + available_tools = ["tavily_search", "my_custom_tool"] + max_iterations = 5 + + @classmethod + def eval(cls, input_data: Data) -> EvalDetail: + # 使用工具进行事实核查 + search_result = cls.execute_tool('tavily_search', query=input_data.content) + + # 使用LLM进行多步推理 + result = cls.send_messages([...]) + + return EvalDetail(...) +``` + +**内置智能体:** +- `AgentHallucination`: 增强的幻觉检测,支持网络搜索回退 + +**配置示例:** +```json +{ + "evaluator": [{ + "evals": [{ + "name": "AgentHallucination", + "config": { + "key": "openai-api-key", + "model": "gpt-4", + "parameters": { + "agent_config": { + "max_iterations": 5, + "tools": { + "tavily_search": {"api_key": "tavily-key"} + } + } + } + } + }] + }] +} +``` + +**了解更多:** +- [智能体开发指南](docs/agent_development_guide.md) +- [AgentHallucination 示例](examples/agent/agent_hallucination_example.py) +- [AgentFactCheck LangChain示例](examples/agent/agent_executor_example.py) + +## 执行引擎 + +### 本地执行 ```python from dingo.config import InputArgs @@ -353,7 +569,7 @@ bad_data = executor.get_bad_info_list() # 有问题数据列表 good_data = executor.get_good_info_list() # 高质量数据列表 ``` -### 2.2 Spark执行 +### Spark执行 ```python from dingo.config import InputArgs @@ -366,16 +582,24 @@ spark_rdd = spark.sparkContext.parallelize([...]) # 以Data对象形式的数 input_data = { "executor": { - "eval_group": "default", "result_save": {"bad": True} - } + }, + "evaluator": [ + { + "fields": {"content": "content"}, + "evals": [ + {"name": "RuleColonEnd"}, + {"name": "RuleSpecialCharacter"} + ] + } + ] } input_args = InputArgs(**input_data) executor = Executor.exec_map["spark"](input_args, spark_session=spark, spark_rdd=spark_rdd) result = executor.execute() ``` -## 3. 评估报告 +## 评估报告 评估后,Dingo生成: @@ -385,7 +609,6 @@ result = executor.execute() 报告说明: 1. **score**: `num_good` / `total` 2. **type_ratio**: 类型的数量 / 总数, 例如: `QUALITY_BAD_COMPLETENESS` / `total` -3. **name_ratio**: 名称的数量 / 总数, 例如: `QUALITY_BAD_COMPLETENESS-RuleColonEnd` / `total` 概要示例: ```json @@ -401,24 +624,24 @@ result = executor.execute() "num_bad": 1, "total": 2, "type_ratio": { - "QUALITY_BAD_COMPLETENESS": 0.5, - "QUALITY_BAD_RELEVANCE": 0.5 - }, - "name_ratio": { - "QUALITY_BAD_COMPLETENESS-RuleColonEnd": 0.5, - "QUALITY_BAD_RELEVANCE-RuleSpecialCharacter": 0.5 + "content": { + "QUALITY_BAD_COMPLETENESS.RuleColonEnd": 0.5, + "QUALITY_BAD_RELEVANCE.RuleSpecialCharacter": 0.5 + } } } ``` -# 未来计划 +# 🔮 未来计划 -- [ ] 更丰富的图文评测指标 -- [ ] 音频和视频数据模态评测 -- [ ] 小模型评测(如fasttext、Qurating) -- [ ] 数据多样性评测 +**即将推出的功能**: +- [ ] **Agent-as-a-Judge** - 多轮迭代评估 +- [ ] **SaaS 平台** - 托管评估服务,提供 API 访问和仪表板 +- [ ] **音频和视频模态** - 扩展到文本/图像之外 +- [ ] **多样性指标** - 统计多样性评估 +- [ ] **实时监控** - 生产流水线中的持续质量检查 -# 局限性 +## 局限性 当前内置的检测规则和模型方法主要关注常见的数据质量问题。对于特殊评估需求,我们建议定制化检测规则。 @@ -427,6 +650,7 @@ result = executor.execute() - [RedPajama-Data](https://github.com/togethercomputer/RedPajama-Data) - [mlflow](https://github.com/mlflow/mlflow) - [deepeval](https://github.com/confident-ai/deepeval) +- [ragas](https://github.com/explodinggradients/ragas) # 贡献 diff --git a/app/package.json b/app/package.json index e7eaa83c..dbdb9763 100644 --- a/app/package.json +++ b/app/package.json @@ -45,6 +45,7 @@ "antd": "^5.21.1", "classnames": "^2.5.1", "copy-to-clipboard": "^3.3.3", + "echarts": "^6.0.0", "echarts-for-react": "^3.0.2", "electron-updater": "^6.1.7", "fs-extra": "^11.2.0", @@ -53,6 +54,7 @@ "minimist": "^1.2.8", "react-intl": "^6.7.0", "react-router-dom": "^6.26.2", + "tinycolor2": "^1.6.0", "zustand": "^5.0.0-rc.2" }, "devDependencies": { @@ -79,5 +81,6 @@ "tailwindcss": "^3.4.13", "typescript": "^5.5.2", "vite": "^5.3.1" - } + }, + "packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e" } diff --git a/app/src/main/index.ts b/app/src/main/index.ts index 94f752a3..a2e09ec0 100644 --- a/app/src/main/index.ts +++ b/app/src/main/index.ts @@ -181,6 +181,140 @@ app.whenReady().then(() => { } ); + // 通用的递归遍历 jsonl 文件的辅助函数 + async function traverseJsonlFiles( + dirPath: string, + processFile: ( + fullPath: string, + relativePath: string + ) => Promise + ): Promise { + const results: T[] = []; + + async function traverseDirectory( + currentPath: string, + relativePath: string = '' + ): Promise { + try { + const items = await fs.readdir(currentPath, { + withFileTypes: true, + }); + + for (const item of items) { + const fullPath = path.join(currentPath, item.name); + const newRelativePath = relativePath + ? `${relativePath}/${item.name}` + : item.name; + + if (item.isDirectory()) { + // 递归遍历子目录 + await traverseDirectory(fullPath, newRelativePath); + } else if ( + item.isFile() && + item.name.endsWith('.jsonl') && + item.name !== 'summary.json' + ) { + // 处理 jsonl 文件 + try { + const result = await processFile( + fullPath, + newRelativePath + ); + if (result !== null) { + results.push(result); + } + } catch (error) { + console.error( + `Error processing file ${fullPath}:`, + error + ); + } + } + } + } catch (error) { + console.error(`Error reading directory ${currentPath}:`, error); + } + } + + await traverseDirectory(dirPath); + return results; + } + + // 递归获取所有 jsonl 文件的路径列表(相对路径) + async function getAllJsonlFilePathsRecursive( + dirPath: string + ): Promise { + const filePaths = await traverseJsonlFiles( + dirPath, + async (_, relativePath) => relativePath + ); + return filePaths.sort(); + } + + ipcMain.handle( + 'get-all-jsonl-file-paths', + async (event, dirPath: string) => { + try { + return await getAllJsonlFilePathsRecursive(dirPath); + } catch (error) { + console.error('Error getting all JSONL file paths:', error); + throw error; + } + } + ); + + // 修改 readAllJsonlFilesRecursive,为每个数据项添加文件路径信息 + async function readAllJsonlFilesRecursiveWithPath( + dirPath: string + ): Promise { + const allDataArrays = await traverseJsonlFiles( + dirPath, + async (fullPath, relativePath) => { + try { + const fileContent = await fs.readFile(fullPath, 'utf-8'); + const lines = fileContent + .trim() + .split('\n') + .filter(line => line.trim()); + const parsedData = lines + .map(line => { + try { + const data = JSON.parse(line); + // 为每个数据项添加文件路径信息 + return { + ...data, + _filePath: relativePath, + }; + } catch (e) { + console.error( + `Error parsing line in ${fullPath}:`, + e + ); + return null; + } + }) + .filter(item => item !== null); + return parsedData; + } catch (error) { + console.error(`Error reading file ${fullPath}:`, error); + return null; + } + } + ); + + // 展平所有数组 + return allDataArrays.flat(); + } + + ipcMain.handle('read-all-jsonl-files', async (event, dirPath: string) => { + try { + return await readAllJsonlFilesRecursiveWithPath(dirPath); + } catch (error) { + console.error('Error reading all JSONL files:', error); + throw error; + } + }); + ipcMain.handle('get-input-path', () => { const argv = minimist(process?.argv?.slice(2)); const inputPath = argv.input; diff --git a/app/src/preload/index.d.ts b/app/src/preload/index.d.ts index 10bda1c2..aff81daf 100644 --- a/app/src/preload/index.d.ts +++ b/app/src/preload/index.d.ts @@ -13,6 +13,8 @@ declare global { primaryName: string, secondaryNameList: string[] ) => Promise; + readAllJsonlFiles: (dirPath: string) => Promise; + getAllJsonlFilePaths: (dirPath: string) => Promise; getInputPath: () => Promise; }; } diff --git a/app/src/preload/index.ts b/app/src/preload/index.ts index fa8b1041..22714693 100644 --- a/app/src/preload/index.ts +++ b/app/src/preload/index.ts @@ -23,6 +23,10 @@ const api = { primaryName, secondaryNameList ), + readAllJsonlFiles: (dirPath: string): Promise => + ipcRenderer.invoke('read-all-jsonl-files', dirPath), + getAllJsonlFilePaths: (dirPath: string): Promise => + ipcRenderer.invoke('get-all-jsonl-file-paths', dirPath), getInputPath: (): Promise => ipcRenderer.invoke('get-input-path'), openExternal: (url: string) => ipcRenderer.invoke('open-external', url), diff --git a/app/src/renderer/src/components/detail-card/index.tsx b/app/src/renderer/src/components/detail-card/index.tsx index fa5f5779..78eb81d2 100644 --- a/app/src/renderer/src/components/detail-card/index.tsx +++ b/app/src/renderer/src/components/detail-card/index.tsx @@ -20,7 +20,7 @@ interface DetailCardProps { data: DataItem; showHighlight?: boolean; } - +//该组件此次迭代该组件暂时不用了 const DetailCard: React.FC = ({ data, showHighlight }) => { const [isExpanded, setIsExpanded] = useState(false); const textRef = useRef(null); diff --git a/app/src/renderer/src/components/detail-table.tsx b/app/src/renderer/src/components/detail-table.tsx index 49923e6d..204cec27 100644 --- a/app/src/renderer/src/components/detail-table.tsx +++ b/app/src/renderer/src/components/detail-table.tsx @@ -1,16 +1,10 @@ import React, { useState, useEffect, useMemo } from 'react'; -import { Table, Tooltip, Pagination, Switch } from 'antd'; +import { Table } from 'antd'; import { ColumnsType } from 'antd/es/table'; import { useDALStore } from '@/store/dal'; import { FormattedMessage } from 'react-intl'; import { SummaryData } from '@/pages/main-home/components/summary-data-table'; -import { EyeOutlined, EyeInvisibleOutlined } from '@ant-design/icons'; -import { uniqBy } from 'lodash'; import FilterCascader from './filter-cascader'; -import DetailCard from './detail-card'; -import Empty from '@/components/empty'; -import IconFont from './icon-font'; -import cls from 'classnames'; import HighlightText from './HightLightText'; interface DetailTableProps { @@ -29,61 +23,41 @@ interface DetailTableProps { } interface DataItem { - data_id: string; - prompt: string; - content: string; - type_list: string[]; - name_list: string[]; - reason_list: (string | string[])[]; + [key: string]: any; // eslint-disable-line @typescript-eslint/no-explicit-any } -const DetailTable: React.FC = ({ - summary, - currentPath, - detailPathList, - allDataPath, - defaultErrorTypes, - defaultErrorNames, -}) => { +const DetailTable: React.FC = ({ currentPath }) => { const [data, setData] = useState([]); const [loading, setLoading] = useState(true); - const [errorTypes, setErrorTypes] = useState([]); - const [selectedErrorTypes, setSelectedErrorTypes] = useState([]); - const [selectedErrorNames, setSelectedErrorNames] = useState([]); - const [errorNames, setErrorNames] = useState([]); - const [showHighlight, setShowHighlight] = useState(true); + const [jsonlFilePaths, setJsonlFilePaths] = useState([]); const dal = useDALStore(state => state.dal); - const [viewMode, setViewMode] = useState<'table' | 'grid'>('table'); const [current, setCurrent] = useState({ currentPage: 1, - pageSize: 10, + pageSize: 20, }); const [filter, setFilter] = useState<{ - primaryName?: string; - secondaryName?: string; + filePath?: string; }>({}); - // console.log('test-data', summary, detailPathList, allDataPath); useEffect(() => { const loadData = async () => { try { setLoading(true); - setErrorNames(Object.keys(summary.name_ratio)); - setErrorTypes(Object.keys(summary.type_ratio)); - let allData: DataItem[] = []; - for (const { primaryName, secondaryNameList } of allDataPath) { - const result = await dal?.getEvaluationDetail?.({ + // 获取所有 jsonl 文件路径列表 + const filePaths = + (await dal?.getAllJsonlFilePaths?.({ currentPath, - primaryName, - secondaryNameList, - }); - if (result) { - allData = allData.concat(result); - } - } + })) || []; + setJsonlFilePaths(filePaths); - setData(uniqBy(allData, 'data_id')); + // 直接读取所有 jsonl 文件(排除 summary.json) + const allData: DataItem[] = + ((await dal?.getAllJsonlFiles?.({ + currentPath, + })) as DataItem[]) || []; + + setData(allData); setCurrent({ ...current, currentPage: 1, @@ -96,195 +70,145 @@ const DetailTable: React.FC = ({ }; loadData(); - }, [currentPath, detailPathList]); - - const columns: ColumnsType = [ - { - title: '数据 ID', - dataIndex: 'data_id', - key: 'data_id', - minWidth: 100, - }, - { - title: 一级维度, - dataIndex: 'type_list', - key: 'type_list', - render: types => JSON.stringify(types), - // filters: errorTypes.map(type => ({ text: type, value: type })), - // onFilter: (value, record) => - // record.type_list.includes(value.toString()), - // filterIcon: filtered => ( - // - // ), - // filteredValue: selectedErrorTypes, - }, - { - title: () => 二级维度, - dataIndex: 'name_list', - key: 'name_list', - render: names => JSON.stringify(names), - // filters: errorNames.map(name => ({ text: name, value: name })), - // onFilter: (value, record) => - // record.name_list.includes(value.toString()), - // filteredValue: selectedErrorNames, - // filterIcon: filtered => ( - // - // ), - }, - { - title: ( - 内容 - ), - dataIndex: 'content', - key: 'content', - render: (text, record) => { - return ( - - ); - }, - }, + }, [currentPath]); - { - title: '原因', - dataIndex: 'reason_list', - key: 'reason_list', - minWidth: 300, - render: reasons => ( - {JSON.stringify(reasons)} - ), - }, - ]; - - const handleFilter = (primaryName: string, secondaryName: string) => { - setFilter({ primaryName, secondaryName }); + const handleFilter = (filePath: string) => { + setFilter({ filePath }); setCurrent({ ...current, currentPage: 1, }); }; - const hiddenClass = 'w-0 h-0 z-[-1] overflow-hidden'; - - useEffect(() => { - setSelectedErrorTypes(defaultErrorTypes || []); - }, [defaultErrorTypes]); - useEffect(() => { - setSelectedErrorNames(defaultErrorNames || []); - }, [defaultErrorNames]); - const filterData = useMemo(() => { - const _primaryName = filter?.primaryName; - if (_primaryName) { - const _secondaryName = filter?.secondaryName; - const _res = data?.filter(i => - i?.type_list?.includes(_primaryName) - ); - return _secondaryName - ? _res?.filter(i => i?.name_list?.includes(_secondaryName)) - : _res; + const selectedFilePath = filter?.filePath; + if (selectedFilePath && selectedFilePath !== 'all') { + // 根据文件路径筛选数据 + return data?.filter(i => { + const itemFilePath = i?._filePath; + return itemFilePath === selectedFilePath; + }); } else { + // 显示全部数据 return data; } }, [data, filter]); - const filterCardListData = useMemo(() => { - const startIndex = (current.currentPage - 1) * current.pageSize; - const endIndex = startIndex + current.pageSize; - return filterData.slice(startIndex, endIndex); - }, [filterData, current.currentPage, current.pageSize]); + // 动态生成列配置 + const columns: ColumnsType = useMemo(() => { + if (!filterData || filterData.length === 0) { + return []; + } + + // 收集所有唯一的键,排除 _filePath 字段 + const allKeys = new Set(); + filterData.forEach(item => { + Object.keys(item).forEach(key => { + // 过滤掉 _filePath 字段 + if (key !== '_filePath') { + allKeys.add(key); + } + }); + }); + + // 生成列配置 + const generatedColumns: ColumnsType = Array.from(allKeys).map( + key => { + return { + title: key, + dataIndex: key, + key: key, + render: (value: unknown, record) => { + if (key === 'content') { + return ( + + ); + } + // 如果是对象,显示为格式化的 JSON + if ( + typeof value === 'object' && + value !== null && + !Array.isArray(value) + ) { + return ( + + ); + } + // 如果是数组,显示为 JSON + if (Array.isArray(value)) { + return ( + + {JSON.stringify(value)} + + ); + } + // 如果是字符串,直接显示 + if (typeof value === 'string') { + return ( + + {value || '-'} + + ); + } + // 其他类型直接显示 + return ( + + {String(value ?? '-')} + + ); + }, + }; + } + ); + + return generatedColumns; + }, [filterData]); return ( <>
- - {`${filterData?.length || 0} 条数据`} - - 命中内容高亮 - - -
e.stopPropagation()} - > - {[ - { value: 'table', icon: 'icon-listViewOutlined' }, - { - value: 'grid', - icon: 'icon-SwitchViewOutlined', - }, - ]?.map(i => ( - - setViewMode(i.value)} - /> - - ))} -
-
-
- {filterCardListData?.length ? ( - filterCardListData?.map(i => { - return ( - - ); - }) - ) : ( - - )} - ( - - )} - onChange={(_page, _pageSize) => { - setCurrent({ - currentPage: _page, - pageSize: _pageSize, - }); - }} + + {`${filterData?.length || 0} 条数据`}
columns={columns} dataSource={filterData} loading={loading} - className={cls('mt-4', viewMode !== 'table' && hiddenClass)} - rowKey={record => `${record?.data_id}_${record?.content}`} + className="mt-4" + rowKey={(record, index) => { + return `${record?._filePath}_${index}`; + }} pagination={{ pageSize: current?.pageSize, showQuickJumper: true, @@ -293,7 +217,7 @@ const DetailTable: React.FC = ({ ), }} - onChange={(pagination, filters) => { + onChange={pagination => { if (current?.pageSize !== pagination.pageSize) { setCurrent({ currentPage: 1, @@ -306,7 +230,25 @@ const DetailTable: React.FC = ({ }); } }} - scroll={{ x: '100%' }} + scroll={{ x: 'max-content' }} + components={{ + body: { + cell: ( + props: React.TdHTMLAttributes + ) => ( + + ), + }, + }} /> ); diff --git a/app/src/renderer/src/components/filter-cascader/index.tsx b/app/src/renderer/src/components/filter-cascader/index.tsx index 8b58306f..cc560efa 100644 --- a/app/src/renderer/src/components/filter-cascader/index.tsx +++ b/app/src/renderer/src/components/filter-cascader/index.tsx @@ -1,103 +1,60 @@ import { Cascader } from 'antd'; -import React, { useState, useEffect, useMemo } from 'react'; +import React, { useState, useMemo } from 'react'; import IconFont from '@/components/icon-font'; import styles from './index.module.scss'; -import { SummaryData } from '@/pages/main-home/components/summary-data-table'; import cls from 'classnames'; interface FilterCascaderProps { - summary: SummaryData; - onFilter: (primaryName: string, secondaryName: string) => void; + jsonlFilePaths: string[]; + onFilter: (filePath: string) => void; } const FilterCascader: React.FC = ({ - summary, + jsonlFilePaths, onFilter, }) => { - const [firstText, setFirstText] = useState(''); - const [secondText, setSecondText] = useState(''); + const [selectedText, setSelectedText] = useState(''); const [selectedValue, setSelectedValue] = useState(['all']); const [isDropdownOpen, setIsDropdownOpen] = useState(false); - const cascaderOptions = [ - { - value: 'all', - label: '全部', - }, - ...Object.entries(summary?.type_ratio).map(([key, value]) => { - const primaryOption = { - value: key, - label: ( - - {`${key}`} - - {(value * 100).toFixed(1)}% - - - ), - children: [] as { value: string; label: any }[], - }; - - // 处理二级选项 - Object.entries(summary?.name_ratio).forEach( - ([nameKey, nameValue]) => { - if (nameKey.startsWith(`${key}-`)) { - // const secondaryName = nameKey.split('-')[1]; - primaryOption.children.push({ - value: nameKey, - label: ( - - {`${nameKey}`} - - {(nameValue * 100).toFixed(1)}% - - - ), - }); - } - } - ); + // 将所有 jsonl 文件路径作为一级列表 + const cascaderOptions = useMemo(() => { + const options: Array<{ + value: string; + label: string; + }> = [ + { + value: 'all', + label: '全部', + }, + ...jsonlFilePaths.sort().map(filePath => ({ + value: filePath, + label: filePath, + })), + ]; - return primaryOption; - }), - ]; - const onChange = (value: any) => { - if (!value || value.length === 0) { - setFirstText(''); - setSecondText(''); + return options; + }, [jsonlFilePaths]); + const onChange = (value: string | string[] | null) => { + if (!value || (Array.isArray(value) && value.length === 0)) { + setSelectedText(''); setSelectedValue(['all']); - onFilter('', ''); + onFilter(''); return; } - const [primaryName, secondaryName] = value; - setSelectedValue(value); + // 由于是单级列表,value 直接就是文件路径 + const selectedPath = Array.isArray(value) ? value[0] : value; + setSelectedValue([selectedPath]); - if (primaryName === 'all') { - setFirstText(''); - setSecondText(''); - onFilter('', ''); + if (selectedPath === 'all') { + setSelectedText(''); + onFilter(''); return; } - if (primaryName) { - setFirstText(primaryName); - } - if (secondaryName) { - setSecondText(secondaryName); - } else { - setSecondText(''); - } - - onFilter(primaryName as string, secondaryName as string); - }; - - const handlePrimaryClick = (e: React.MouseEvent) => { - if (firstText && firstText !== 'all') { - setSecondText(''); - setSelectedValue([firstText]); - onFilter(firstText, ''); - } + setSelectedText(selectedPath); + onFilter(selectedPath); }; return ( @@ -117,24 +74,9 @@ const FilterCascader: React.FC = ({ onDropdownVisibleChange={setIsDropdownOpen} > - - {firstText} + + {selectedText || '全部测评数据'} - {secondText && ( - - )} - {secondText && ( - - {secondText} - - )} - {!firstText && !secondText && '全部测评数据'} { const colors = [ @@ -88,7 +89,7 @@ const CustomLegend = ({ // 检查是否有二级数据 const hasSecondLevel = firstLevelType => { - return Object.keys(data.name_ratio).some(key => + return Object.keys(data.type_ratio?.content||{}).some(key => key.startsWith(firstLevelType + '-') ); }; @@ -184,24 +185,25 @@ const CustomLegend = ({ const PieChart = ({ data }: { data: SummaryData }) => { // 存储当前选中的一级标签 const [activeFirstLevel, setActiveFirstLevel] = useState(''); + // 我要取得data.type_ratio的第一个key + const [selected, setSelected] = useState(Object.keys(data.type_ratio || {})[0] || ''); - // 一级数据处理 - const firstLevelData = Object.entries(data?.type_ratio).map( - ([key, value], index) => ({ - name: key, - value, - itemStyle: { - color: tinycolor(getColorByRatio(index, false)) - ?.setAlpha(0.8) - .toRgbString(), - hoverColor: getColorByRatioHover(index), - }, - }) - ); + + // 安全获取 type_ratio,支持 content 属性或直接使用 type_ratio + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const typeRatioData = (data.type_ratio as any)?.content || data.type_ratio || {}; + const typeRatio = data.type_ratio || {}; + const selectList = Object.keys(data.type_ratio || {}).map((key) => ({ + value: key, + label: key, + })); // 获取二级数据的函数 - const getSecondLevelData = firstLevelType => { - return Object.entries(data.name_ratio) + const getSecondLevelData = (firstLevelType: string) => { + if (!typeRatioData || typeof typeRatioData !== 'object') { + return []; + } + return Object.entries(typeRatioData) .filter(([key]) => key.startsWith(firstLevelType + '-')) .map(([key, value], idx) => ({ name: key.split('-')[1], @@ -215,17 +217,33 @@ const PieChart = ({ data }: { data: SummaryData }) => { })); }; + //根据筛选获得扇形图的右侧展示的一级目录 + const firstLevelData = useMemo(()=>{ + return Object.entries(typeRatio[selected] || {}).map( + ([key, value], index) => ({ + name: key, + value, + itemStyle: { + color: tinycolor(getColorByRatio(index, false)) + ?.setAlpha(0.8) + .toRgbString(), + hoverColor: getColorByRatioHover(index), + }, + }) + ); + },[selected]); + // 图例点击事件处理 const onEvents = { legendselectchanged: params => { // 获取被点击的图例名称 const clickedName = Object.entries(params.selected).find( - ([_, selected]) => selected + ([, selected]) => selected )?.[0]; // 如果点击的是当前活动的一级标签,则关闭二级展示 if (activeFirstLevel === clickedName) { - setActiveFirstLevel(null); + setActiveFirstLevel(''); } else { setActiveFirstLevel(clickedName!); } @@ -350,6 +368,17 @@ const PieChart = ({ data }: { data: SummaryData }) => { style={{ height: '100%', width: '100%', minWidth: 800 }} className="flex justify-center" > +
+