Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions rdagent/scenarios/data_science/dev/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,27 +91,26 @@ exp_feedback:
user: |-
We are currently in a process of validating hypotheses to iteratively improve our models for Kaggle competitions. Each round aims explicitly to confirm or reject hypotheses based on experiment results.

## SOTA Solution
# SOTA Solution
{{ sota_desc }}

## Current Solution
### Task of Current Solution
# Current Solution
## Task of Current Solution
{{ cur_exp.pending_tasks_list[0][0].get_task_information() }}

{% if cur_exp.hypothesis %}
The experiment was designed based on the following hypothesis:
## The experiment was designed based on the following hypothesis:
{{ cur_exp.hypothesis }}

Modified code according to hypothesis:
{% else %}
Modified code:
{% endif %}

{% if diff_edition and diff_edition|length > 0 %}
## Code Changes:
{% for de in diff_edition %}
{{ de }}
{% endfor %}
{% endif %}

### Final Results of the Current Solution
## Final Results of the Current Solution
1. Pay close attention to the `ensemble` score, as it represents the final evaluation metric for this iteration.
2. If any individual model significantly outperforms the ensemble, this may indicate an issue in the ensemble method. But if the final `ensemble` score surpasses the current SOTA, you should update the SOTA record. However, it seems that there are noticeable issues in the ensemble component, be sure to highlight them explicitly.

Expand All @@ -125,11 +124,11 @@ exp_feedback:
{% endif %}

{% if cur_exp.format_check_result is not none %}
### Submission format check to current solution:
## Submission format check to current solution:
{{ cur_exp.format_check_result }}
{% endif %}

### Complete Code of Current Solution
## Complete Code of Current Solution
{{ cur_exp.experiment_workspace.all_codes }}

## Feedback of past experiments
Expand Down
104 changes: 100 additions & 4 deletions rdagent/scenarios/data_science/proposal/exp_gen/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,113 @@ def __str__(self) -> str:
return f"No hypothesis available. Trying to construct the first runnable {self.component} component."

lines = []

# Problem section - split name and description for clarity
if self.problem_name is not None:
lines.append(f"Target Problem Name: {self.problem_name}")
if self.problem_desc is not None:
lines.append(f"Target Problem: {self.problem_desc}")
if self.problem_desc is not None:
lines.append(f"Target Problem Description: {self.problem_desc}")
lines.append("") # Empty line for separation

# Component
lines.append(f"Chosen Component: {self.component}")
lines.append(f"Hypothesis: {self.hypothesis}")
lines.append("")

# Hypothesis
lines.append(f"Experiment Hypothesis: {self.hypothesis}")
lines.append("")

# Reasoning
if self.reason is not None:
lines.append(f"Reason: {self.reason}")
lines.append("")

# Additional info
if hasattr(self, "appendix") and self.appendix is not None: # FIXME: compatibility with old traces
lines.append(f"Appendix: {self.appendix}")
return "\n".join(lines)

return "\n".join(lines).rstrip()


class DSCombinedHypothesis(DSHypothesis):
"""
A combined hypothesis that merges multiple individual hypotheses.
This preserves information when multiple hypotheses are generated but only one can be used.
"""

def __init__(self, hypothesis_list: list[DSHypothesis]) -> None:
if not hypothesis_list:
raise ValueError("hypothesis_list cannot be empty")

# Merge multiple hypotheses into combined strings
combined_hypothesis = "\n\n".join(
[f"Approach {i+1}: {h.hypothesis}" for i, h in enumerate(hypothesis_list) if h.hypothesis]
)

combined_reason = "\n\n".join(
[f"Approach {i+1} Reason: {h.reason}" for i, h in enumerate(hypothesis_list) if h.reason]
)

combined_problem_name = " | ".join([h.problem_name for h in hypothesis_list if h.problem_name])

combined_problem_desc = "\n\n".join(
[f"{h.problem_name}: {h.problem_desc}" for h in hypothesis_list if h.problem_desc]
)

# For no-sota case, force to use Workflow component
main_component = "Workflow"

super().__init__(
component=main_component,
hypothesis=combined_hypothesis,
reason=combined_reason,
problem_name=combined_problem_name,
problem_desc=combined_problem_desc,
problem_label=hypothesis_list[0].problem_label,
appendix="\n\n".join([h.appendix for h in hypothesis_list if h.appendix]) or None,
)

# Store original hypotheses for potential future use
self.original_hypotheses = hypothesis_list

def __str__(self) -> str:
lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]
# TODO: we can use T().r() to render the text.
lines = [f"Combined Hypothesis ({len(self.original_hypotheses)} approaches for {self.component} component)"]

lines.append("")

# Problems section - using new PROBLEMS label style
if self.original_hypotheses and any(h.problem_name and h.problem_desc for h in self.original_hypotheses):
lines.append("Target Problems and Descriptions:")
for h in self.original_hypotheses:
if h.problem_name and h.problem_desc:
lines.append(f"• {h.problem_name}: {h.problem_desc}")
lines.append("")

# Component - matching DSHypothesis style
lines.append(f"Chosen Component: {self.component}")
lines.append("")

# Proposed solutions - renamed from "Hypothesis" for clarity
if any(h.hypothesis for h in self.original_hypotheses):
lines.append("Experiment Hypotheses:")
for i, h in enumerate(self.original_hypotheses, 1):
if h.hypothesis:
lines.append(f"{i}. {h.hypothesis}")
lines.append("")

# Reasoning - matching DSHypothesis style
if any(h.reason for h in self.original_hypotheses):
lines.append("Reason:")
for i, h in enumerate(self.original_hypotheses, 1):
if h.reason:
lines.append(f"{i}. {h.reason}")
lines.append("")

# Additional info - matching DSHypothesis style
appendix_items = [h.appendix for h in self.original_hypotheses if h.appendix]
if appendix_items:
lines.append(f"Appendix: {'; '.join(appendix_items)}")

return "\n".join(lines).rstrip()


class DSTrace(Trace[DataScienceScen, KnowledgeBase]):
Expand Down
19 changes: 14 additions & 5 deletions rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
from rdagent.oai.llm_utils import APIBackend, md5_hash
from rdagent.scenarios.data_science.dev.feedback import ExperimentFeedback
from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
from rdagent.scenarios.data_science.proposal.exp_gen.base import DSHypothesis, DSTrace
from rdagent.scenarios.data_science.proposal.exp_gen.base import (
DSCombinedHypothesis,
DSHypothesis,
DSTrace,
)
from rdagent.scenarios.data_science.proposal.exp_gen.draft.draft import (
DSDraftExpGen, # TODO: DSDraftExpGen should be moved to router in the further
)
Expand Down Expand Up @@ -966,7 +970,12 @@ def task_gen(
# Persist for later stages
task.package_info = get_packages(pkgs)

exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=hypotheses[0])
# When no sota_exp, combine all hypotheses to preserve information
if len(hypotheses) > 1:
combined_hypothesis = DSCombinedHypothesis(hypotheses)
exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=combined_hypothesis)
else:
exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=hypotheses[0])
if sota_exp is not None:
exp.experiment_workspace.inject_code_from_file_dict(sota_exp.experiment_workspace)

Expand Down Expand Up @@ -1155,9 +1164,9 @@ def gen(
sota_exp_desc=sota_exp_desc,
sota_exp=sota_exp,
hypotheses=(
[new_hypothesis]
if len(trace.hist) > 0
else self.get_all_hypotheses(all_problems, improved_hypotheses_dict)
self.get_all_hypotheses(all_problems, improved_hypotheses_dict)
if sota_exp is None
else [new_hypothesis]
),
pipeline=pipeline,
failed_exp_feedback_list_desc=failed_exp_feedback_list_desc,
Expand Down