microsoft · Hoder-zyf · Aug 13, 2025 · Aug 13, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/rdagent/scenarios/data_science/dev/prompts.yaml b/rdagent/scenarios/data_science/dev/prompts.yaml
@@ -91,27 +91,26 @@ exp_feedback:
   user: |-
     We are currently in a process of validating hypotheses to iteratively improve our models for Kaggle competitions. Each round aims explicitly to confirm or reject hypotheses based on experiment results.
 
-    ## SOTA Solution
+    # SOTA Solution
     {{ sota_desc }}
 
-    ## Current Solution
-    ### Task of Current Solution
+    # Current Solution
+    ## Task of Current Solution
     {{ cur_exp.pending_tasks_list[0][0].get_task_information() }}
 
     {% if cur_exp.hypothesis %}
-    The experiment was designed based on the following hypothesis:
+    ## The experiment was designed based on the following hypothesis:
     {{ cur_exp.hypothesis }}
-
-    Modified code according to hypothesis:
-    {% else %}
-    Modified code:
     {% endif %}
 
+    {% if diff_edition and diff_edition|length > 0 %}
+    ## Code Changes:
     {% for de in diff_edition %}
     {{ de }}
     {% endfor %}
+    {% endif %}
 
-    ### Final Results of the Current Solution
+    ## Final Results of the Current Solution
     1. Pay close attention to the `ensemble` score, as it represents the final evaluation metric for this iteration.
     2. If any individual model significantly outperforms the ensemble, this may indicate an issue in the ensemble method. But if the final `ensemble` score surpasses the current SOTA, you should update the SOTA record. However, it seems that there are noticeable issues in the ensemble component, be sure to highlight them explicitly.
 
@@ -125,11 +124,11 @@ exp_feedback:
     {% endif %}
 
     {% if cur_exp.format_check_result is not none %}
-    ### Submission format check to current solution:
+    ## Submission format check to current solution:
     {{ cur_exp.format_check_result }}
     {% endif %}
 
-    ### Complete Code of Current Solution
+    ## Complete Code of Current Solution
     {{ cur_exp.experiment_workspace.all_codes }}
 
     ## Feedback of past experiments

diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/base.py b/rdagent/scenarios/data_science/proposal/exp_gen/base.py
@@ -38,17 +38,113 @@ def __str__(self) -> str:
             return f"No hypothesis available. Trying to construct the first runnable {self.component} component."
 
         lines = []
+
+        # Problem section - split name and description for clarity
         if self.problem_name is not None:
             lines.append(f"Target Problem Name: {self.problem_name}")
-        if self.problem_desc is not None:
-            lines.append(f"Target Problem: {self.problem_desc}")
+            if self.problem_desc is not None:
+                lines.append(f"Target Problem Description: {self.problem_desc}")
+            lines.append("")  # Empty line for separation
+
+        # Component
         lines.append(f"Chosen Component: {self.component}")
-        lines.append(f"Hypothesis: {self.hypothesis}")
+        lines.append("")
+
+        # Hypothesis
+        lines.append(f"Experiment Hypothesis: {self.hypothesis}")
+        lines.append("")
+
+        # Reasoning
         if self.reason is not None:
             lines.append(f"Reason: {self.reason}")
+            lines.append("")
+
+        # Additional info
         if hasattr(self, "appendix") and self.appendix is not None:  # FIXME: compatibility with old traces
             lines.append(f"Appendix: {self.appendix}")
-        return "\n".join(lines)
+
+        return "\n".join(lines).rstrip()
+
+
+class DSCombinedHypothesis(DSHypothesis):
+    """
+    A combined hypothesis that merges multiple individual hypotheses.
+    This preserves information when multiple hypotheses are generated but only one can be used.
+    """
+
+    def __init__(self, hypothesis_list: list[DSHypothesis]) -> None:
+        if not hypothesis_list:
+            raise ValueError("hypothesis_list cannot be empty")
+
+        # Merge multiple hypotheses into combined strings
+        combined_hypothesis = "\n\n".join(
+            [f"Approach {i+1}: {h.hypothesis}" for i, h in enumerate(hypothesis_list) if h.hypothesis]
+        )
+
+        combined_reason = "\n\n".join(
+            [f"Approach {i+1} Reason: {h.reason}" for i, h in enumerate(hypothesis_list) if h.reason]
+        )
+
+        combined_problem_name = " | ".join([h.problem_name for h in hypothesis_list if h.problem_name])
+
+        combined_problem_desc = "\n\n".join(
+            [f"{h.problem_name}: {h.problem_desc}" for h in hypothesis_list if h.problem_desc]
+        )
+
+        # For no-sota case, force to use Workflow component
+        main_component = "Workflow"
+
+        super().__init__(
+            component=main_component,
+            hypothesis=combined_hypothesis,
+            reason=combined_reason,
+            problem_name=combined_problem_name,
+            problem_desc=combined_problem_desc,
+            problem_label=hypothesis_list[0].problem_label,
+            appendix="\n\n".join([h.appendix for h in hypothesis_list if h.appendix]) or None,
+        )
+
+        # Store original hypotheses for potential future use
+        self.original_hypotheses = hypothesis_list
+
+    def __str__(self) -> str:
+        lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]
-        lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]
+        # TODO: we can use T().r() to render the text.
+        lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]
-        lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]
+        # TODO: we can use T().r() to render the text.
+        lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]
+        lines.append("")
+
+        # Problems section - using new PROBLEMS label style
+        if self.original_hypotheses and any(h.problem_name and h.problem_desc for h in self.original_hypotheses):
+            lines.append("Target Problems and Descriptions:")
+            for h in self.original_hypotheses:
+                if h.problem_name and h.problem_desc:
+                    lines.append(f"• {h.problem_name}: {h.problem_desc}")
+            lines.append("")
+
+        # Component - matching DSHypothesis style
+        lines.append(f"Chosen Component: {self.component}")
+        lines.append("")
+
+        # Proposed solutions - renamed from "Hypothesis" for clarity
+        if any(h.hypothesis for h in self.original_hypotheses):
+            lines.append("Experiment Hypotheses:")
+            for i, h in enumerate(self.original_hypotheses, 1):
+                if h.hypothesis:
+                    lines.append(f"{i}. {h.hypothesis}")
+            lines.append("")
+
+        # Reasoning - matching DSHypothesis style
+        if any(h.reason for h in self.original_hypotheses):
+            lines.append("Reason:")
+            for i, h in enumerate(self.original_hypotheses, 1):
+                if h.reason:
+                    lines.append(f"{i}. {h.reason}")
+            lines.append("")
+
+        # Additional info - matching DSHypothesis style
+        appendix_items = [h.appendix for h in self.original_hypotheses if h.appendix]
+        if appendix_items:
+            lines.append(f"Appendix: {'; '.join(appendix_items)}")
+
+        return "\n".join(lines).rstrip()
 
 
 class DSTrace(Trace[DataScienceScen, KnowledgeBase]):

diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py b/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
@@ -18,7 +18,11 @@
 from rdagent.oai.llm_utils import APIBackend, md5_hash
 from rdagent.scenarios.data_science.dev.feedback import ExperimentFeedback
 from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
-from rdagent.scenarios.data_science.proposal.exp_gen.base import DSHypothesis, DSTrace
+from rdagent.scenarios.data_science.proposal.exp_gen.base import (
+    DSCombinedHypothesis,
+    DSHypothesis,
+    DSTrace,
+)
 from rdagent.scenarios.data_science.proposal.exp_gen.draft.draft import (
     DSDraftExpGen,  # TODO: DSDraftExpGen should be moved to router in the further
 )
@@ -966,7 +970,12 @@ def task_gen(
             # Persist for later stages
             task.package_info = get_packages(pkgs)
 
-        exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=hypotheses[0])
+        # When no sota_exp, combine all hypotheses to preserve information
+        if len(hypotheses) > 1:
+            combined_hypothesis = DSCombinedHypothesis(hypotheses)
+            exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=combined_hypothesis)
+        else:
+            exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=hypotheses[0])
         if sota_exp is not None:
             exp.experiment_workspace.inject_code_from_file_dict(sota_exp.experiment_workspace)
 
@@ -1155,9 +1164,9 @@ def gen(
             sota_exp_desc=sota_exp_desc,
             sota_exp=sota_exp,
             hypotheses=(
-                [new_hypothesis]
-                if len(trace.hist) > 0
-                else self.get_all_hypotheses(all_problems, improved_hypotheses_dict)
+                self.get_all_hypotheses(all_problems, improved_hypotheses_dict)
+                if sota_exp is None
+                else [new_hypothesis]
             ),
             pipeline=pipeline,
             failed_exp_feedback_list_desc=failed_exp_feedback_list_desc,