OpenDCAI
diff --git a/‎dataflow/operators/agentic_rag/eval/agenticrag_multihop_sample_evaluator.py‎
Lines changed: 13 additions & 61 deletions b/‎dataflow/operators/agentic_rag/eval/agenticrag_multihop_sample_evaluator.py‎
Lines changed: 13 additions & 61 deletions
diff --git a/‎dataflow/operators/agentic_rag/generate/agenticrag_multihop_qa_generator.py‎
Lines changed: 14 additions & 53 deletions b/‎dataflow/operators/agentic_rag/generate/agenticrag_multihop_qa_generator.py‎
Lines changed: 14 additions & 53 deletions
diff --git a/‎dataflow/serving/flash_rag_serving.py‎
Lines changed: 0 additions & 80 deletions b/‎dataflow/serving/flash_rag_serving.py‎
Lines changed: 0 additions & 80 deletions
@@ -47,28 +47,6 @@ def __init__(
     def get_desc(lang: str = "zh"):
         return "MultiHopRAG 验证算子：对 multi_hop_data 中每个候选进行多步验证并返回合格的数据。" if lang == "zh" else "Verifier for MultiHop RAG."
 
-    def _safe_json_load(self, text: str, stage: str):
-        """
-        Safely load JSON from LLM output.
-        Return None if parsing fails.
-        """
-        if not text or not text.strip():
-            self.logger.warning(f"[{stage}] Empty LLM output")
-            return None
-
-        cleaned = _clean_json_block(text)
-        if not cleaned or not cleaned.strip():
-            self.logger.warning(f"[{stage}] Empty cleaned JSON")
-            return None
-
-        try:
-            return json.loads(cleaned)
-        except json.JSONDecodeError as e:
-            self.logger.warning(
-                f"[{stage}] JSON decode failed: {e} | content: {cleaned[:200]}"
-            )
-            return None
-
     def run(self, storage: DataFlowStorage):
         df = storage.read("dataframe")
 
@@ -132,15 +110,9 @@ def run(self, storage: DataFlowStorage):
 
         check_outputs = self.llm_serving.generate_from_input(check_prompts) if check_prompts else []
         parsed_checks = []
-        valid_check_meta = []
-
-        for out, meta in zip(check_outputs, check_meta):
-            check_obj = self._safe_json_load(out, stage="phase1_check")
-            if check_obj is None:
-                continue
-            parsed_checks.append(check_obj)
-            valid_check_meta.append(meta)
-        check_meta = valid_check_meta
+        for out in check_outputs:
+            cleaned = _clean_json_block(out)
+            parsed_checks.append(json.loads(cleaned))
 
         passed_after_check = []
         for idx, check_result in enumerate(parsed_checks):
@@ -168,6 +140,7 @@ def run(self, storage: DataFlowStorage):
         # ---- Phase 2: reasoning prompts (one per passed row) ----
         reasoning_prompts = []
         reasoning_meta = []
+        print("passed_after_check: ", len(passed_after_check))
         for item in passed_after_check:
             qa_type = item["qa_type"]
             final_question = item["final_question"]
@@ -195,16 +168,9 @@ def run(self, storage: DataFlowStorage):
 
         judge_outputs = self.llm_serving.generate_from_input(judge_prompts) if judge_prompts else []
         parsed_judges = []
-        valid_judge_meta = []
-
-        for out, meta in zip(judge_outputs, judge_meta):
-            judge_obj = self._safe_json_load(out, stage="phase3_reasoning_judge")
-            if judge_obj is None:
-                continue
-            parsed_judges.append(judge_obj)
-            valid_judge_meta.append(meta)
-
-        judge_meta = valid_judge_meta
+        for out in judge_outputs:
+            cleaned = _clean_json_block(out)
+            parsed_judges.append(json.loads(cleaned))
 
         passed_after_reasoning = []
         for idx, judge_res in enumerate(parsed_judges):
@@ -261,16 +227,9 @@ def run(self, storage: DataFlowStorage):
 
         single_judge_outputs = self.llm_serving.generate_from_input(single_judge_prompts) if single_judge_prompts else []
         parsed_single_judges = []
-        valid_single_judge_meta = []
-
-        for out, meta in zip(single_judge_outputs, single_judge_meta):
-            judge_obj = self._safe_json_load(out, stage="phase5_singlehop_judge")
-            if judge_obj is None:
-                continue
-            parsed_single_judges.append(judge_obj)
-            valid_single_judge_meta.append(meta)
-
-        single_judge_meta = valid_single_judge_meta
+        for out in single_judge_outputs:
+            cleaned = _clean_json_block(out)
+            parsed_single_judges.append(json.loads(cleaned))
 
         row_fail_map = {}
         for idx, judge_res in enumerate(parsed_single_judges):
@@ -345,16 +304,9 @@ def run(self, storage: DataFlowStorage):
 
         final_judge_outputs = self.llm_serving.generate_from_input(final_judge_prompts) if final_judge_prompts else []
         parsed_final_judges = []
-        valid_final_judge_meta = []
-
-        for out, meta in zip(final_judge_outputs, final_judge_meta):
-            judge_obj = self._safe_json_load(out, stage="phase7_final_judge")
-            if judge_obj is None:
-                continue
-            parsed_final_judges.append(judge_obj)
-            valid_final_judge_meta.append(meta)
-
-        final_judge_meta = valid_final_judge_meta
+        for out in final_judge_outputs:
+            cleaned = _clean_json_block(out)
+            parsed_final_judges.append(json.loads(cleaned))
 
         verified_rows = []
         for idx, judge_res in enumerate(parsed_final_judges):
 
@@ -20,7 +20,6 @@
 from typing import List
 import requests
 import time
-from tqdm import tqdm
 
 def _clean_json_block(item: str) -> str:
         return item.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip()
@@ -84,7 +83,7 @@ def retrieve_docs(self, query: str, original_docs: List[str], now_hop: int, topk
         response = requests.post(
             self.retriever_url,
             json={"query": query, "topk": topk + now_hop},
-            timeout=60
+            timeout=1200
         )
         data = response.json()
         all_docs = [doc.get("contents", "") for doc in data.get("results", [])]
@@ -97,37 +96,15 @@ def retrieve_docs(self, query: str, original_docs: List[str], now_hop: int, topk
         filter_docs = [d for d in unique_docs if "(number)" not in d and "(decade)" not in d]
         return filter_docs[:topk]
 
-    def _safe_json_load(self, text: str, stage: str):
-        """
-        Safely load JSON from LLM output.
-        Return None if parsing fails.
-        """
-        if not text or not text.strip():
-            self.logger.warning(f"[{stage}] Empty LLM output")
-            return None
-
-        cleaned = _clean_json_block(text)
-        if not cleaned or not cleaned.strip():
-            self.logger.warning(f"[{stage}] Empty cleaned JSON")
-            return None
-
-        try:
-            return json.loads(cleaned)
-        except json.JSONDecodeError as e:
-            self.logger.warning(
-                f"[{stage}] JSON decode failed: {e} | content: {cleaned[:200]}"
-            )
-            return None
-
     def run(
         self, 
         storage: DataFlowStorage, 
         input_hop: int, 
         input_question_key: str = "question", 
         input_answer_key: str = "answer", 
         input_doc_key: str = "doc",
-        input_topk: int = 3,
-        input_per_doc_qa: int = 1,
+        input_topk: int = 5,
+        input_per_doc_qa: int = 5,
     ):
         self.input_hop = input_hop
         self.input_question_key = input_question_key
@@ -153,7 +130,7 @@ def run(
         # ---- Phase 1: build atomic prompts for ALL rows/docs and call model in batch ----
         atomic_prompts = []
         atomic_meta = []
-        for i, current_data in tqdm(enumerate(rows), total=len(rows), desc="Generating atomic QA prompts"):
+        for i, current_data in enumerate(rows):
             hop_num = input_hop
             hop_key = f"hop_{hop_num}"
             now_question = current_data[hop_key][input_question_key]
@@ -182,10 +159,8 @@ def run(
         atomic_outputs = self.llm_serving.generate_from_input(atomic_prompts)
         parsed_atomic = []
         for out in atomic_outputs:
-            obj = self._safe_json_load(out, stage="atomic_qa")
-            if obj is None:
-                continue
-            parsed_atomic.append(obj)
+            cleaned = _clean_json_block(out)
+            parsed_atomic.append(json.loads(cleaned))
 
         # ---- Phase 2: build merge prompts for ALL atomic qas and call model in batch ----
         merge_prompts = []
@@ -226,12 +201,8 @@ def run(
         merge_outputs = self.llm_serving.generate_from_input(merge_prompts)
         parsed_merges = []
         for out in merge_outputs:
-            obj = self._safe_json_load(out, stage="merge_qa")
-            if obj is None:
-                continue
-            parsed_merges.append(obj)
-
-        print("parsed_merges: ", len(parsed_merges))
+            cleaned = _clean_json_block(out)
+            parsed_merges.append(json.loads(cleaned))
 
         # ---- Phase 3: filter merges and build refine prompts ----
         refine_prompts = []
@@ -266,14 +237,9 @@ def run(
 
         refine_outputs = self.llm_serving.generate_from_input(refine_prompts)
         parsed_refines = []
-        valid_refine_meta = []
-        for out, meta in zip(refine_outputs, refine_meta):
-            obj = self._safe_json_load(out, stage="refine_answer")
-            if obj is None:
-                continue
-            parsed_refines.append(obj)
-            valid_refine_meta.append(meta)
-        refine_meta = valid_refine_meta
+        for out in refine_outputs:
+            cleaned = _clean_json_block(out)
+            parsed_refines.append(json.loads(cleaned))
 
         # ---- Phase 4: build optional prompts for ALL refines and batch call ----
         opt_prompts = []
@@ -302,14 +268,9 @@ def run(
 
         opt_outputs = self.llm_serving.generate_from_input(opt_prompts)
         parsed_opts = []
-        valid_opt_meta = []
-        for out, meta in zip(opt_outputs, opt_meta):
-            obj = self._safe_json_load(out, stage="optional_answer")
-            if obj is None:
-                continue
-            parsed_opts.append(obj)
-            valid_opt_meta.append(meta)
-        opt_meta = valid_opt_meta
+        for out in opt_outputs:
+            cleaned = _clean_json_block(out)
+            parsed_opts.append(json.loads(cleaned))
 
         # ---- Phase 5: assemble new_rows from opt results and corresponding meta ----
         new_rows = []