Skip to content

Commit

Permalink
feat: align mlebench data and evaluation & several fix on kaggle work…
Browse files Browse the repository at this point in the history
…flow (#477)

* several improvement on kaggle loop

* small refinement on prompt

* fix bugs

* add the score of each model in every experiment

* fix ci error

* fix error in ventilator tpl

* fix CI

---------

Co-authored-by: Xu Yang <[email protected]>
Co-authored-by: Bowen Xian <[email protected]>
Co-authored-by: WinstonLiye <[email protected]>
Co-authored-by: TPLin22 <[email protected]>
  • Loading branch information
5 people authored Nov 15, 2024
1 parent 2e38000 commit f6c522b
Show file tree
Hide file tree
Showing 25 changed files with 365 additions and 210 deletions.
5 changes: 1 addition & 4 deletions rdagent/app/kaggle/loop.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import subprocess
from collections import defaultdict
from concurrent.futures import TimeoutError
from typing import Any

import fire
Expand All @@ -14,7 +12,6 @@
Hypothesis2Experiment,
HypothesisExperiment2Feedback,
HypothesisGen,
Trace,
)
from rdagent.core.scenario import Scenario
from rdagent.core.utils import import_class
Expand Down Expand Up @@ -116,7 +113,7 @@ def running(self, prev_out: dict[str, Any]):

return exp

skip_loop_error = (ModelEmptyError, FactorEmptyError, TimeoutError)
skip_loop_error = (ModelEmptyError, FactorEmptyError)


def main(path=None, step_n=None, competition=None):
Expand Down
5 changes: 4 additions & 1 deletion rdagent/components/coder/model_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,10 @@ def evaluate(
else:
gt_np_array = None

shape_feedback, shape_decision = shape_evaluator(gen_np_array, (batch_size, 1))
shape_feedback, shape_decision = shape_evaluator(
gen_np_array,
(batch_size, self.scen.model_output_channel if hasattr(self.scen, "model_output_channel") else 1),
)
value_feedback, value_decision = value_evaluator(gen_np_array, gt_np_array)
code_feedback, _ = ModelCodeEvaluator(scen=self.scen).evaluate(
target_task=target_task,
Expand Down
1 change: 1 addition & 0 deletions rdagent/components/knowledge_management/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class UndirectedNode(Node):
def __init__(self, content: str = "", label: str = "", embedding: Any = None) -> None:
super().__init__(content, label, embedding)
self.neighbors: set[UndirectedNode] = set()
assert isinstance(content, str), "content must be a string"

def add_neighbor(self, node: UndirectedNode) -> None:
self.neighbors.add(node)
Expand Down
2 changes: 1 addition & 1 deletion rdagent/components/proposal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, scen: Scenario):
self.targets = "model tuning"


class FactorAndModelHypothesisGen(FactorHypothesisGen):
class FactorAndModelHypothesisGen(LLMHypothesisGen):
def __init__(self, scen: Scenario):
super().__init__(scen)
self.targets = "feature engineering and model building"
Expand Down
7 changes: 0 additions & 7 deletions rdagent/components/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,5 @@ def get_cache_key(self, exp: Experiment) -> str:
def assign_cached_result(self, exp: Experiment, cached_res: Experiment) -> Experiment:
if exp.based_experiments and exp.based_experiments[-1].result is None:
exp.based_experiments[-1].result = cached_res.based_experiments[-1].result
if cached_res.experiment_workspace.workspace_path.exists():
for csv_file in cached_res.experiment_workspace.workspace_path.glob("*.csv"):
shutil.copy(csv_file, exp.experiment_workspace.workspace_path)
for py_file in (cached_res.experiment_workspace.workspace_path / "feature").glob("*.py"):
shutil.copy(py_file, exp.experiment_workspace.workspace_path / "feature")
for py_file in (cached_res.experiment_workspace.workspace_path / "model").glob("*.py"):
shutil.copy(py_file, exp.experiment_workspace.workspace_path / "model")
exp.result = cached_res.result
return exp
1 change: 1 addition & 0 deletions rdagent/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def __init__(
self.sub_workspace_list: list[ASpecificWSForSubTasks | None] = [None] * len(self.sub_tasks)
self.based_experiments: Sequence[ASpecificWSForExperiment] = based_experiments
self.result: object = None # The result of the experiment, can be different types in different scenarios.
self.sub_results: dict[str, float] = {}
self.experiment_workspace: ASpecificWSForExperiment | None = None


Expand Down
2 changes: 1 addition & 1 deletion rdagent/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def multiprocessing_wrapper(func_calls: list[tuple[Callable, tuple]], n: int) ->
list
"""
if n == 1:
if n == 1 or max(1, min(n, len(func_calls))) == 1:
return [f(*args) for f, args in func_calls]

with mp.Pool(processes=max(1, min(n, len(func_calls)))) as pool:
Expand Down
4 changes: 3 additions & 1 deletion rdagent/log/ui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def summary_window():
if state.alpha158_metrics is not None:
selected = ["alpha158"] + [i for i in df.index if state.h_decisions[int(i[6:])]]
else:
selected = [i for i in df.index if state.h_decisions[int(i[6:])]]
selected = [i for i in df.index if i == "Baseline" or state.h_decisions[int(i[6:])]]
df = df.loc[selected]
if df.shape[0] == 1:
st.table(df.iloc[0])
Expand Down Expand Up @@ -637,6 +637,7 @@ def evolving_window():
for j, w in enumerate(ws):
with wtabs[j]:
# Evolving Code
st.markdown(f"**Workspace Path**: {w.workspace_path}")
for k, v in w.code_dict.items():
with st.expander(f":green[`{k}`]", expanded=True):
st.code(v, language="python")
Expand Down Expand Up @@ -681,6 +682,7 @@ def evolving_window():
st.text_input("log path", key="log_path", on_change=refresh, label_visibility="collapsed")
else:
folders = [folder.relative_to(main_log_path) for folder in main_log_path.iterdir() if folder.is_dir()]
folders = sorted(folders, key=lambda x: x.name)
st.selectbox(f"**Select from `{main_log_path}`**", folders, key="log_path", on_change=refresh)
else:
st.text_input(":blue[**log path**]", key="log_path", on_change=refresh)
Expand Down
107 changes: 61 additions & 46 deletions rdagent/scenarios/kaggle/developer/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
from jinja2 import Environment, StrictUndefined

from rdagent.components.knowledge_management.graph import UndirectedNode
from rdagent.core.experiment import Experiment
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import (
Expand All @@ -14,6 +15,7 @@
)
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_utils import APIBackend
from rdagent.scenarios.kaggle.experiment.kaggle_experiment import KG_SELECT_MAPPING
from rdagent.utils import convert2bool

prompt_dict = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
Expand Down Expand Up @@ -59,17 +61,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
Any: The feedback generated for the given experiment and hypothesis.
"""
logger.info("Generating feedback...")
hypothesis_text = hypothesis.hypothesis
current_result = exp.result
tasks_factors = []
if exp.sub_tasks:
tasks_factors = []
for task in exp.sub_tasks:
try:
task_info = task.get_task_information_and_implementation_result()
tasks_factors.append(task_info)
except AttributeError:
print(f"Warning: Task {task} does not have get_task_information_and_implementation_result method")

evaluation_description = None
# Check if there are any based experiments
Expand All @@ -84,11 +76,6 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
) # Compare with itself
print("Warning: No previous experiments to compare against. Using current result as baseline.")

available_features = {
task_info: feature_shape for task_info, feature_shape in exp.experiment_workspace.data_description
}
model_code = exp.experiment_workspace.model_description

# Generate the user prompt based on the action type
if hypothesis.action == "Model tuning":
prompt_key = "model_tuning_feedback_generation"
Expand All @@ -104,35 +91,56 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
.render(scenario=self.scen.get_scenario_all_desc(filtered_tag="feedback"))
)

last_task_and_code = None
if trace.hist:
last_task_and_code = (
trace.hist[-1][1].experiment_workspace.data_description
if trace.hist[-1][0].action == "Feature engineering" or trace.hist[-1][0].action == "Feature processing"
else trace.hist[-1][1].experiment_workspace.model_description
)
sota_exp = exp.based_experiments[-1] if exp.based_experiments else None
assert sota_exp is not None
sota_features = str(exp.based_experiments[-1].experiment_workspace.data_description)
sota_models = json.dumps(exp.based_experiments[-1].experiment_workspace.model_description, indent=2)
sota_result = exp.based_experiments[-1].result
sota_sub_results = exp.based_experiments[-1].sub_results

current_hypothesis = hypothesis.hypothesis
current_hypothesis_reason = hypothesis.reason
current_target_action = hypothesis.action
current_sub_exps_to_code = {}
if hypothesis.action == "Model tuning":
current_sub_exps_to_code[exp.sub_tasks[0].get_task_information()] = exp.sub_workspace_list[0].code
elif hypothesis.action == "Model feature selection":
current_sub_exps_to_code[exp.sub_tasks[0].get_task_information()] = exp.experiment_workspace.code_dict[
KG_SELECT_MAPPING[exp.sub_tasks[0].model_type]
]
else:
current_sub_exps_to_code = {
sub_ws.target_task.get_task_information(): sub_ws.code for sub_ws in exp.sub_workspace_list
}
current_sub_exps_to_code_str = json.dumps(current_sub_exps_to_code, indent=2)
current_result = exp.result
current_sub_results = exp.sub_results

last_hypothesis_and_feedback = None
if trace.hist and len(trace.hist) > 0:
last_hypothesis_and_feedback = (trace.hist[-1][0], trace.hist[-1][2])

# Prepare render dictionary
render_dict = {
"last_hypothesis": trace.hist[-1][0] if trace.hist else None,
"last_task_and_code": last_task_and_code,
"last_result": trace.hist[-1][1].result if trace.hist else None,
"sota_task_and_code": (
exp.based_experiments[-1].experiment_workspace.data_description if exp.based_experiments else None
),
"sota_result": exp.based_experiments[-1].result if exp.based_experiments else None,
"hypothesis": hypothesis,
"exp": exp,
"model_code": model_code, # This turn
"available_features": available_features, # This turn
"combined_result": combined_result, # This turn and sota
"hypothesis_text": hypothesis_text, # This turn
"task_details": tasks_factors, # This turn
"sota_features": sota_features,
"sota_models": sota_models,
"sota_result": sota_result,
"sota_sub_results": sota_sub_results,
"current_hypothesis": current_hypothesis,
"current_hypothesis_reason": current_hypothesis_reason,
"current_target_action": current_target_action,
"current_sub_exps_to_code": current_sub_exps_to_code_str,
"current_result": current_result,
"current_sub_results": current_sub_results,
"combined_result": combined_result,
"evaluation_description": evaluation_description,
"last_hypothesis_and_feedback": last_hypothesis_and_feedback,
}

usr_prompt = (
Environment(undefined=StrictUndefined).from_string(prompt_dict[prompt_key]["user"]).render(**render_dict)
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["kg_feedback_generation_user"])
.render(**render_dict)
)

response = APIBackend().build_messages_and_create_chat_completion(
Expand Down Expand Up @@ -160,22 +168,29 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
percentile_ranking = (insert_position) / (len(sorted_scores)) * 100

experiment_feedback = {
"current_competition": self.scen.get_competition_full_desc(),
"hypothesis_text": hypothesis_text,
"hypothesis_text": current_hypothesis,
"tasks_factors": current_sub_exps_to_code,
"current_result": current_result,
"model_code": model_code,
"available_features": available_features,
"observations": observations,
"hypothesis_evaluation": hypothesis_evaluation,
"reason": reason,
"percentile_ranking": percentile_ranking,
}

if self.scen.if_using_vector_rag:
raise NotImplementedError("Vector RAG is not implemented yet since there are plenty bugs!")
self.scen.vector_base.add_experience_to_vector_base(experiment_feedback)
self.scen.vector_base.dump()
elif self.scen.if_using_graph_rag:
trace.knowledge_base.add_document(experiment_feedback, self.scen)
competition_node = UndirectedNode(content=self.scen.get_competition_full_desc(), label="competition")
hypothesis_node = UndirectedNode(content=hypothesis.hypothesis, label=hypothesis.action)
exp_code_nodes = []
for exp, code in current_sub_exps_to_code.items():
exp_code_nodes.append(UndirectedNode(content=exp, label="experiments"))
if code != "":
exp_code_nodes.append(UndirectedNode(content=code, label="code"))
conclusion_node = UndirectedNode(content=response, label="conclusion")
all_nodes = [competition_node, hypothesis_node, *exp_code_nodes, conclusion_node]
all_nodes = trace.knowledge_base.batch_embedding(all_nodes)
for node in all_nodes:
if node is not competition_node:
trace.knowledge_base.add_node(node, competition_node)

if self.scen.if_action_choosing_based_on_UCB:
self.scen.action_counts[hypothesis.action] += 1
Expand Down
34 changes: 32 additions & 2 deletions rdagent/scenarios/kaggle/developer/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import shutil
from pathlib import Path

import pandas as pd

from rdagent.components.runner import CachedRunner
from rdagent.core.exception import CoderError, FactorEmptyError, ModelEmptyError
from rdagent.core.experiment import ASpecificExp
from rdagent.core.experiment import ASpecificExp, Experiment
from rdagent.core.prompts import Prompts
from rdagent.core.utils import cache_with_pickle
from rdagent.oai.llm_utils import md5_hash
Expand All @@ -28,6 +30,18 @@ def get_cache_key(self, exp: ASpecificExp) -> str:
cached_key_from_exp = CachedRunner.get_cache_key(self, exp)
return md5_hash(codes + cached_key_from_exp)

def assign_cached_result(self, exp: Experiment, cached_res: Experiment) -> Experiment:
exp = CachedRunner.assign_cached_result(self, exp, cached_res)
if cached_res.experiment_workspace.workspace_path.exists():
for csv_file in cached_res.experiment_workspace.workspace_path.glob("*.csv"):
shutil.copy(csv_file, exp.experiment_workspace.workspace_path)
for py_file in (cached_res.experiment_workspace.workspace_path / "feature").glob("*.py"):
shutil.copy(py_file, exp.experiment_workspace.workspace_path / "feature")
for py_file in (cached_res.experiment_workspace.workspace_path / "model").glob("*.py"):
shutil.copy(py_file, exp.experiment_workspace.workspace_path / "model")
exp.experiment_workspace.data_description = cached_res.experiment_workspace.data_description
return exp

@cache_with_pickle(get_cache_key, CachedRunner.assign_cached_result)
def init_develop(self, exp: KGFactorExperiment | KGModelExperiment) -> KGFactorExperiment | KGModelExperiment:
"""
Expand All @@ -40,6 +54,11 @@ def init_develop(self, exp: KGFactorExperiment | KGModelExperiment) -> KGFactorE

exp.result = result

sub_result_score_path = Path(exp.experiment_workspace.workspace_path) / "sub_submission_score.csv"
if sub_result_score_path.exists():
sub_submission_df = pd.read_csv(sub_result_score_path)
exp.sub_results = sub_submission_df.set_index("Model")["score"].to_dict()

return exp


Expand Down Expand Up @@ -67,6 +86,10 @@ def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
raise CoderError("No result is returned from the experiment workspace")

exp.result = result
sub_result_score_path = Path(exp.experiment_workspace.workspace_path) / "sub_submission_score.csv"
if sub_result_score_path.exists():
sub_submission_df = pd.read_csv(sub_result_score_path)
exp.sub_results = sub_submission_df.set_index("Model")["score"].to_dict()

return exp

Expand All @@ -79,10 +102,13 @@ def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
for sub_ws in exp.sub_workspace_list:
if sub_ws.code_dict == {}:
continue
execued_df = sub_ws.execute()[1]
if execued_df is None:
continue
implemented_factor_count += 1
target_feature_file_name = f"feature/feature_{current_feature_file_count:05d}.py"
exp.experiment_workspace.inject_code(**{target_feature_file_name: sub_ws.code_dict["factor.py"]})
feature_shape = sub_ws.execute()[1].shape[-1]
feature_shape = execued_df.shape[-1]
exp.experiment_workspace.data_description.append((sub_ws.target_task.get_task_information(), feature_shape))
current_feature_file_count += 1
if implemented_factor_count == 0:
Expand All @@ -100,5 +126,9 @@ def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
raise CoderError("No result is returned from the experiment workspace")

exp.result = result
sub_result_score_path = Path(exp.experiment_workspace.workspace_path) / "sub_submission_score.csv"
if sub_result_score_path.exists():
sub_submission_df = pd.read_csv(sub_result_score_path)
exp.sub_results = sub_submission_df.set_index("Model")["score"].to_dict()

return exp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ RUN python -m pip install numpy
RUN python -m pip install pandas
# RUN pip install pyg_lib torch_scatter torch_sparse torch_cluster -f https://data.pyg.org/whl/torch-2.3.0%2Bcu121.html
RUN pip install torch_geometric
RUN pip install pytorch_lightning
RUN pip install ogb
RUN pip install networkx
RUN pip install scikit-learn
RUN pip install catboost
RUN pip install xgboost
RUN pip install sparse
RUN pip install lightgbm
RUN pip install lightgbm==3.3.5
RUN pip install pyarrow
RUN pip install fastparquet
RUN pip install optuna
17 changes: 17 additions & 0 deletions rdagent/scenarios/kaggle/docker/mle_bench_docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-runtime
# For GPU support, please choose the proper tag from https://hub.docker.com/r/pytorch/pytorch/tags

RUN apt-get clean && apt-get update && apt-get install -y \
curl \
vim \
git \
build-essential \
git-lfs \
&& rm -rf /var/lib/apt/lists/*

RUN git clone https://github.com/openai/mle-bench.git
RUN cd mle-bench && git lfs fetch --all
RUN cd mle-bench && git lfs pull
RUN cd mle-bench && python -m pip install -e .

WORKDIR /workspace
Loading

0 comments on commit f6c522b

Please sign in to comment.