MigoXLab
diff --git a/‎dingo/model/llm/hhh/llm_text_3h.py‎
Lines changed: 11 additions & 5 deletions b/‎dingo/model/llm/hhh/llm_text_3h.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎docs/artimuse.md‎
Lines changed: 8 additions & 9 deletions b/‎docs/artimuse.md‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎docs/ats_resume_guide.md‎
Lines changed: 47 additions & 10 deletions b/‎docs/ats_resume_guide.md‎
Lines changed: 47 additions & 10 deletions
diff --git a/‎docs/document_ocr.md‎
Lines changed: 6 additions & 8 deletions b/‎docs/document_ocr.md‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎docs/document_parsing_quality_guide.md‎
Lines changed: 11 additions & 12 deletions b/‎docs/document_parsing_quality_guide.md‎
Lines changed: 11 additions & 12 deletions
diff --git a/‎docs/factcheck_guide.md‎
Lines changed: 20 additions & 40 deletions b/‎docs/factcheck_guide.md‎
Lines changed: 20 additions & 40 deletions
@@ -13,7 +13,7 @@ class LLMText3H(BaseOpenAI):
     def build_messages(cls, input_data):
         question = input_data.prompt
         response = input_data.content
-        prompt_content = cls.prompt.content % (question, response)
+        prompt_content = cls.prompt % (question, response)
 
         messages = [{"role": "user", "content": prompt_content}]
 
@@ -38,15 +38,21 @@ def process_response(cls, response: str) -> EvalDetail:
 
         result = EvalDetail(metric=cls.__name__)
 
+        # Get the quality dimension name from class name
+        # e.g., LLMText3HHelpful -> HELPFUL
+        class_prefix = "LLMText3H"
+        if cls.__name__.startswith(class_prefix):
+            quality_name = cls.__name__[len(class_prefix):].upper()
+        else:
+            quality_name = cls.__name__.upper()
+
         # eval_status
         if response_model.score == 1:
-            tmp_name = cls.prompt.__name__[8:].upper()
-            result.label = [f"{QualityLabel.QUALITY_GOOD}.{tmp_name}"]
+            result.label = [f"{QualityLabel.QUALITY_GOOD}.{quality_name}"]
             result.reason = [response_model.reason] if response_model.reason else ["Response meets quality criteria"]
         else:
             result.status = True
-            tmp_name = "NOT_" + cls.prompt.__name__[8:].upper()
-            result.label = [f"QUALITY_BAD.{tmp_name}"]
+            result.label = [f"QUALITY_BAD.NOT_{quality_name}"]
             result.reason = [response_model.reason] if response_model.reason else ["Response fails quality criteria"]
 
         return result
@@ -24,7 +24,7 @@ RuleImageArtimuse 基于 ArtiMuse 在线服务对输入图片进行美学质量
 
 ## 核心方法
 
-### `eval(cls, input_data: Data) -> ModelRes`
+### `eval(cls, input_data: Data) -> EvalDetail`
 
 这是规则的主要评估方法，接收包含图像 URL 的 `Data` 对象，返回评估结果。
 
@@ -50,20 +50,19 @@ RuleImageArtimuse 基于 ArtiMuse 在线服务对输入图片进行美学质量
 
 #### 返回值
 
-返回 `ModelRes` 对象，包含以下属性：
+返回 `EvalDetail` 对象，包含以下属性：
 
-- `eval_status`: 布尔值，表示图像质量是否不合格（低于阈值）
-- `type`: 评估结果类型（"Artimuse_Succeeded" 或 "Artimuse_Fail"）
-- `name`: 评估结果名称（"BadImage" 或 "GoodImage" 或 "Exception"）
+- `metric`: 指标名称（"RuleImageArtimuse"）
+- `status`: 布尔值，表示图像质量是否不合格（低于阈值）(True=不合格, False=合格)
+- `label`: 质量标签列表（如 ["Artimuse_Succeeded.BadImage"] 或 ["QUALITY_GOOD"]）
 - `reason`: 包含详细评估信息或异常信息的数组（字符串化 JSON）
 
 ## 异常处理
 
-当评估过程中发生异常时，返回的 `ModelRes` 对象将包含：
+当评估过程中发生异常时，返回的 `EvalDetail` 对象将包含：
 
-- `eval_status`: `False`
-- `type`: `"Artimuse_Fail"`
-- `name`: `"Exception"`
+- `status`: `False`
+- `label`: `["Artimuse_Fail.Exception"]`
 - `reason`: 包含异常信息的数组
 
 ## 使用示例
 
@@ -16,6 +16,12 @@ ATS 工具套件用于：
 
 分析简历与 JD 的匹配度，输出加权匹配分数和详细分析报告。
 
+**核心功能：**
+- 语义匹配（不仅是字符串匹配）
+- 同义词自动识别（如 k8s → Kubernetes）
+- 负向约束识别（Excluded 技能警告）
+- 基于证据的匹配（引用简历原文）
+
 **输入字段：**
 | 字段 | 类型 | 必需 | 说明 |
 |------|------|------|------|
@@ -26,8 +32,17 @@ ATS 工具套件用于：
 | 字段 | 类型 | 说明 |
 |------|------|------|
 | `score` | float | 匹配分数 (0.0-1.0) |
-| `error_status` | bool | 是否低于阈值 (默认 0.6) |
-| `reason` | List[str] | 详细分析报告 |
+| `status` | bool | 是否低于阈值 (True=低于，False=通过) |
+| `reason` | List[str] | 详细分析报告（文本格式） |
+
+**内置同义词映射 (SYNONYM_MAP)：**
+```
+k8s → Kubernetes, js → JavaScript, ts → TypeScript
+py → Python, tf → TensorFlow, pt → PyTorch
+nodejs → Node.js, postgres → PostgreSQL
+aws → Amazon Web Services, gcp → Google Cloud Platform
+ml → Machine Learning, dl → Deep Learning, nlp → NLP
+```
 
 ### 2. LLMResumeOptimizer（简历优化器）
 
@@ -80,6 +95,8 @@ jd = """
 match_data = Data(data_id='test_1', content=resume, prompt=jd)
 match_result = LLMKeywordMatcher.eval(match_data)
 print(f"匹配分数: {match_result.score}")
+print(f"是否通过: {'通过' if not match_result.status else '未通过'}")
+print(f"分析报告: {match_result.reason[0]}")
 
 # Step 2: 简历优化
 optimize_data = Data(
@@ -89,18 +106,23 @@ optimize_data = Data(
     context='{"match_details": {"missing": [{"skill": "Docker", "importance": "Required"}]}}'
 )
 opt_result = LLMResumeOptimizer.eval(optimize_data)
-print(f"优化结果: {opt_result.reason[0]}")
+print(f"优化摘要: {opt_result.reason[0]}")
+print(f"完整结果: {opt_result.optimized_content}")
 ```
 
 ## 📊 匹配分数计算
 
-### 权重分配
+### 权重公式
+
+```
+score = (Required_Matched × 2 + Nice_Matched × 1) / (Required_Total × 2 + Nice_Total × 1)
+```
 
 | 类别 | 权重 | 说明 |
 |------|------|------|
-| Required (必需) | 0.7 | 缺失会显著降低分数 |
-| Nice-to-have (加分) | 0.3 | 缺失影响较小 |
-| Excluded (排除) | -0.1 | 存在会扣分 |
+| Required (必需) | ×2 | 缺失会显著降低分数 |
+| Nice-to-have (加分) | ×1 | 缺失影响较小 |
+| Excluded (排除) | 不计分 | 仅生成警告，不影响分数 |
 
 ### 阈值配置
 
@@ -156,16 +178,31 @@ Nice-to-have (Missing): Kubernetes
 
 ### ResumeOptimizer 输出
 
-结果同样存放在 `result.reason[0]` 中，JSON 格式：
+**`reason[0]`**: 人类可读的摘要文本
+**`optimized_content`**: 完整的 JSON 优化结果
 
 ```python
 # 访问方式
 result = LLMResumeOptimizer.eval(data)
-import json
-output = json.loads(result.reason[0])
+
+# 摘要文本
+print(result.reason[0])
+
+# 完整 JSON 结果
+opt = result.optimized_content
+print(opt.get('optimization_summary'))
+print(opt.get('section_changes'))
 ```
 
 **`reason[0]` 内容示例：**
+```
+Overall: 优化了专业技能板块
+Keywords Added: Docker
+Associative: Kubernetes (了解概念)
+Sections Modified: 专业技能
+```
+
+**`optimized_content` 结构：**
 ```json
 {
   "optimization_summary": {
 
@@ -22,9 +22,7 @@ Dingo 提供了一种基于LLM的文档OCR解析质量评估工具，可帮助
 dingo/
   ├── model/
   │   ├── llm/
-  │   │   └── vlm_document_parsing.py         # 评估器实现
-  │   └── prompt/
-  │       └── prompt_mineru_recognize.py      # 评估提示词
+  │   │   └── llm_document_parsing_ocr.py     # 评估器实现（含内嵌Prompt）
   │── examples/
   │   └── document_parser/
   │       └── document_parsing_quality_ocr.py  # 单条评估示例
@@ -75,11 +73,11 @@ input_data = {
 #### 输出结果格式
 
 ```python
-# result 是 ModelRes 对象，包含以下字段：
-result.type          # 错误问题一级标签: prompt中定义的一级错误大类
-result.name          # 错误问题二级标签: 一级错误大类对应的详细错误标签 List[str]
-result.eval_status  # 错误状态: False 或 True
-result.reason        # 评估原因: List[str]
+# result 是 EvalDetail 对象，包含以下字段：
+result.metric        # 指标名称: "LLMMinerURecognizeQuality"
+result.label         # 错误标签列表: ["error_category1.error_category2.error_label1.error_label2"]
+result.status        # 错误状态: False (默认值)
+result.reason        # 评估原因: List[str]，包含完整的JSON分析结果
 ```
 
 
 
@@ -1,12 +1,12 @@
-# VLMDocumentParsingQuality 文档解析评估工具 使用文档
+# VLMDocumentParsing 文档解析评估工具 使用文档
 
 Dingo 提供了一种基于VLM的文档解析质量评估与可视化工具，可帮助您：
 - 评估文档解析模型输出质量
 - 生成模型质量报告
 
 ## 工具介绍
 
-### VLMDocumentParsingQuality：文档解析评估工具
+### VLMDocumentParsing：文档解析评估工具
 
 #### 功能说明
 该工具用于评估文档解析模型效果，具体功能包括：
@@ -22,9 +22,8 @@ Dingo 提供了一种基于VLM的文档解析质量评估与可视化工具，
 dingo/
   ├── model/
   │   ├── llm/
-  │   │   └── vlm_document_parsing.py         # 评估器实现
-  │   └── prompt/
-  │       └── prompt_document_parsing.py      # 评估提示词
+  │   │   └── mineru/
+  │   │       └── vlm_document_parsing.py     # 评估器实现（含内嵌Prompt）
   │── examples/
   │   └── document_parser/
   │       └── vlm_document_parser_quality.py  # 单条评估示例
@@ -64,7 +63,7 @@ input_data = {
         },
         "evaluator": {
             "llm_config": {
-                "VLMDocumentParsingQuality": {
+                "VLMDocumentParsing": {
                     "key": "",
                     "api_url": "",
                 }
@@ -76,11 +75,11 @@ input_data = {
 #### 输出结果格式
 
 ```python
-# result 是 ModelRes 对象，包含以下字段：
-result.type          # 错误问题一级标签: prompt中定义的一级错误大类
-result.name          # 错误问题二级标签: 一级错误大类对应的详细错误标签 List[str]
-result.eval_status  # 错误状态: False 或 True
-result.reason        # 评估原因: List[str]
+# result 是 EvalDetail 对象，包含以下字段：
+result.metric        # 指标名称: "VLMDocumentParsing"
+result.label         # 错误标签列表: ["公式相关问题.行内公式漏检", "表格相关问题.单元格内容错误"]
+result.status        # 错误状态: False (默认值，该类不设置)
+result.reason        # 评估原因: List[str]，包含完整的JSON分析结果
 ```
 
 
@@ -114,7 +113,7 @@ if __name__ == '__main__':
         },
         "evaluator": {
             "llm_config": {
-                "VLMDocumentParsingQuality": {
+                "VLMDocumentParsing": {
                     "key": "",
                     "api_url": "",
                 }
 
@@ -64,14 +64,10 @@ data = Data(
 # 执行评估
 result = LLMFactCheckPublic.eval(data)
 
-# 查看结果
-print(f"Factual ratio: {result.score:.2%}")
-print(f"Reason: {result.reason}")
-print("\nDetailed results:")
-for claim in result.raw_resp["results"]:
-    print(f"\nClaim: {claim.claim}")
-    print(f"Answer: {claim.answer}")
-    print(f"Reasoning: {claim.reasoning}")
+# 查看结果 (返回 EvalDetail 对象)
+print(f"是否通过: {'通过' if not result.status else '未通过'}")
+print(f"标签: {result.label}")
+print(f"详细原因: {result.reason[0]}")
 ```
 
 ### 场景二：评估数据集
@@ -143,13 +139,10 @@ rag_data = {
 data = Data(**rag_data)
 result = LLMFactCheckPublic.eval(data)
 
-# 分析结果
-print(f"Factual consistency: {result.score:.2%}")
-for claim in result.raw_resp["results"]:
-    if claim.answer != "true":
-        print(f"\nPotential hallucination:")
-        print(f"Claim: {claim.claim}")
-        print(f"Evidence: {claim.reasoning}")
+# 分析结果 (返回 EvalDetail 对象)
+print(f"是否通过: {'通过' if not result.status else '未通过'}")
+print(f"标签: {result.label}")
+print(f"详细原因: {result.reason[0]}")
 ```
 
 ### 场景四：多轮对话监控
@@ -173,9 +166,10 @@ for turn in conversation:
     data = Data(**turn)
     result = LLMFactCheckPublic.eval(data)
     print(f"\nTurn {turn['data_id']}:")
-    print(f"Factual ratio: {result.score:.2%}")
-    if result.score < LLMFactCheckPublic.threshold:
+    print(f"是否通过: {'通过' if not result.status else '未通过'}")
+    if result.status:
         print("Warning: Potential misinformation detected!")
+        print(f"详情: {result.reason[0]}")
 ```
 
 ## 最佳实践
@@ -241,30 +235,16 @@ dingo/
 ### 评估结果格式
 
 ```python
-ModelRes(
-    score=0.85,  # 事实性得分
-    threshold=0.8,  # 判断阈值
-    reason=["Found 10 claims: 8 true, 1 false, 1 unsure..."],
-    raw_resp={
-        "claims": ["claim1", "claim2", ...],
-        "results": [
-            FactCheckResult(
-                claim="...",
-                answer="true",
-                reasoning="...",
-                supporting_evidence=[...]
-            ),
-            ...
-        ],
-        "metrics": {
-            "factual_ratio": 0.85,
-            "true_count": 8,
-            "false_count": 1,
-            "unsure_count": 1,
-            "total_claims": 10
-        }
-    }
+# LLMFactCheckPublic 返回 EvalDetail 对象
+EvalDetail(
+    metric="LLMFactCheckPublic",           # 指标名称
+    status=False,                           # 是否未通过 (False=通过, True=未通过)
+    label=["QUALITY_GOOD.FACTUALITY_CHECK_PASSED"],  # 质量标签
+    reason=["Found 10 claims: 8 true, 1 false, 1 unsure. Factual ratio: 80.00%"]
 )
+
+# reason[0] 包含完整的评估摘要，格式示例：
+# "Found 10 claims: 8 true, 1 false, 1 unsure. Factual ratio: 80.00%"
 ```
 
 ## 参考资料