Skip to content

Commit

Permalink
feat(kaggle): several update in kaggle scenarios (#476)
Browse files Browse the repository at this point in the history
* udpate plot

* log and reduce token

* trace tag

* add simple_background parameter to get_scenario_all_desc

* update trace

* update first version code

* chat model map

* add annotation for stack index

* add annotation

* reformatted by black

* several update on kaggle scenarios

* update some new change

* fix CI

* fix CI

* fix a bug

* fix bugs in graph RAG

---------

Co-authored-by: Tim <[email protected]>
  • Loading branch information
peteryang1 and qew21 authored Nov 6, 2024
1 parent 2fc0d77 commit 245d211
Show file tree
Hide file tree
Showing 49 changed files with 515 additions and 380 deletions.
39 changes: 22 additions & 17 deletions rdagent/app/benchmark/factor/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ def result_all_key_order(self, x):
for i in x:
order_v.append(
{
"avg. Run successful rate": 0,
"avg. Format successful rate": 1,
"avg. Correlation (value only)": 2,
"max. Correlation": 3,
"max. accuracy": 4,
"avg. accuracy": 5,
"Avg Run SR": 0,
"Avg Format SR": 1,
"Avg Correlation": 2,
"Max Correlation": 3,
"Max Accuracy": 4,
"Avg Accuracy": 5,
}.get(i, i),
)
return order_v
Expand Down Expand Up @@ -140,12 +140,12 @@ def analyze_data(self, sum_df):

result_all = pd.concat(
{
"avg. Correlation (value only)": corr_res.iloc[:, 0],
"avg. Format successful rate": format_succ_rate_f.iloc[:, 0],
"avg. Run successful rate": succ_rate_f.iloc[:, 0],
"max. Correlation": corr_max_res.iloc[:, 0],
"max. accuracy": value_max_res.iloc[:, 0],
"avg. accuracy": value_avg_res.iloc[:, 0],
"Avg Correlation": corr_res.iloc[:, 0],
"Avg Format SR": format_succ_rate_f.iloc[:, 0],
"Avg Run SR": succ_rate_f.iloc[:, 0],
"Max Correlation": corr_max_res.iloc[:, 0],
"Max Accuracy": value_max_res.iloc[:, 0],
"Avg Accuracy": value_avg_res.iloc[:, 0],
},
axis=1,
)
Expand Down Expand Up @@ -179,11 +179,16 @@ def change_fs(font_size):

@staticmethod
def plot_data(data, file_name, title):
plt.figure(figsize=(10, 6))
sns.barplot(x="index", y="b", hue="a", data=data)
plt.xlabel("Method")
plt.figure(figsize=(10, 10))
plt.ylabel("Value")
plt.title(title)
colors = ["#3274A1", "#E1812C", "#3A923A", "#C03D3E"]
plt.bar(data["a"], data["b"], color=colors, capsize=5)
for idx, row in data.iterrows():
plt.text(idx, row["b"] + 0.01, f"{row['b']:.2f}", ha="center", va="bottom")
plt.suptitle(title, y=0.98)
plt.xticks(rotation=45)
plt.ylim(0, 1)
plt.tight_layout()
plt.savefig(file_name)


Expand All @@ -201,7 +206,7 @@ def main(
final_results_df = pd.DataFrame(final_results)

Plotter.change_fs(20)
plot_data = final_results_df.drop(["max. accuracy", "avg. accuracy"], axis=0).T
plot_data = final_results_df.drop(["Max Accuracy", "Avg Accuracy"], axis=0).T
plot_data = plot_data.reset_index().melt("index", var_name="a", value_name="b")
Plotter.plot_data(plot_data, "./comparison_plot.png", title)

Expand Down
3 changes: 2 additions & 1 deletion rdagent/app/kaggle/loop.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import subprocess
from collections import defaultdict
from concurrent.futures import TimeoutError
from typing import Any

import fire
Expand Down Expand Up @@ -115,7 +116,7 @@ def running(self, prev_out: dict[str, Any]):

return exp

skip_loop_error = (ModelEmptyError, FactorEmptyError)
skip_loop_error = (ModelEmptyError, FactorEmptyError, TimeoutError)


def main(path=None, step_n=None, competition=None):
Expand Down
13 changes: 9 additions & 4 deletions rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd
from jinja2 import Environment, StrictUndefined

from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS
from rdagent.components.coder.factor_coder.CoSTEER.evolvable_subjects import (
FactorEvolvingItem,
)
Expand Down Expand Up @@ -92,7 +93,11 @@ def evaluate(
.from_string(evaluate_prompts["evaluator_code_feedback_v1_system"])
.render(
scenario=(
self.scen.get_scenario_all_desc(target_task)
self.scen.get_scenario_all_desc(
target_task,
filtered_tag="feature",
simple_background=FACTOR_IMPLEMENT_SETTINGS.simple_background,
)
if self.scen is not None
else "No scenario description."
)
Expand Down Expand Up @@ -190,15 +195,15 @@ def evaluate(
)
buffer = io.StringIO()
gen_df.info(buf=buffer)
gen_df_info_str = f"The use is currently working on a feature related task.\nThe output dataframe info is:\n{buffer.getvalue()}"
gen_df_info_str = f"The user is currently working on a feature related task.\nThe output dataframe info is:\n{buffer.getvalue()}"
system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(
evaluate_prompts["evaluator_output_format_system"],
)
.render(
scenario=(
self.scen.get_scenario_all_desc(implementation.target_task)
self.scen.get_scenario_all_desc(implementation.target_task, filtered_tag="feature")
if self.scen is not None
else "No scenario description."
)
Expand Down Expand Up @@ -512,7 +517,7 @@ def evaluate(
.from_string(evaluate_prompts["evaluator_final_decision_v1_system"])
.render(
scenario=(
self.scen.get_scenario_all_desc(target_task)
self.scen.get_scenario_all_desc(target_task, filtered_tag="feature")
if self.scen is not None
else "No scenario description."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def implement_one_factor(
implement_prompts["evolving_strategy_factor_implementation_v1_system"],
)
.render(
scenario=self.scen.get_scenario_all_desc(target_task),
scenario=self.scen.get_scenario_all_desc(target_task, filtered_tag="feature"),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
)
)
Expand Down
3 changes: 3 additions & 0 deletions rdagent/components/coder/factor_coder/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class Config:
v2_error_summary: bool = False
v2_knowledge_sampler: float = 1.0

simple_background: bool = False
"""Whether to use simple background information for code feedback"""

file_based_execution_timeout: int = 120
"""Timeout in seconds for each factor implementation execution"""

Expand Down
4 changes: 2 additions & 2 deletions rdagent/components/coder/model_coder/CoSTEER/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def evaluate(
.from_string(evaluate_prompts["evaluator_code_feedback"]["system"])
.render(
scenario=(
self.scen.get_scenario_all_desc(target_task)
self.scen.get_scenario_all_desc(target_task, filtered_tag=target_task.model_type)
if self.scen is not None
else "No scenario description."
)
Expand Down Expand Up @@ -145,7 +145,7 @@ def evaluate(
.from_string(evaluate_prompts["evaluator_final_feedback"]["system"])
.render(
scenario=(
self.scen.get_scenario_all_desc(target_task)
self.scen.get_scenario_all_desc(target_task, filtered_tag=target_task.model_type)
if self.scen is not None
else "No scenario description."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def implement_one_model(
coder_prompts["evolving_strategy_model_coder"]["system"],
)
.render(
scenario=self.scen.get_scenario_all_desc(),
scenario=self.scen.get_scenario_all_desc(filtered_tag=target_task.model_type),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
current_code=current_code,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ valid_y = pd.Series(np.random.randint(0, 2, 8))

model = fit(train_X, train_y, valid_X, valid_y)
execution_model_output = predict(model, valid_X)

if isinstance(execution_model_output, torch.Tensor):
execution_model_output = execution_model_output.cpu().detach().numpy()


execution_feedback_str = f"Execution successful, output numpy ndarray shape: {execution_model_output.shape}"

pickle.dump(execution_model_output, open("execution_model_output.pkl", "wb"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from jinja2 import Environment, StrictUndefined

from rdagent.components.coder.factor_coder.factor import FactorExperiment
from rdagent.core.experiment import Experiment
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import (
Hypothesis,
Expand All @@ -18,10 +18,8 @@
prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml")


FactorHypothesis = Hypothesis
class LLMHypothesisGen(HypothesisGen):


class FactorHypothesisGen(HypothesisGen):
def __init__(self, scen: Scenario):
super().__init__(scen)

Expand All @@ -30,17 +28,17 @@ def __init__(self, scen: Scenario):
def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: ...

@abstractmethod
def convert_response(self, response: str) -> FactorHypothesis: ...
def convert_response(self, response: str) -> Hypothesis: ...

def gen(self, trace: Trace) -> FactorHypothesis:
def gen(self, trace: Trace) -> Hypothesis:
context_dict, json_flag = self.prepare_context(trace)

system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["hypothesis_gen"]["system_prompt"])
.render(
targets="factors",
scenario=self.scen.get_scenario_all_desc(),
targets=self.targets,
scenario=self.scen.get_scenario_all_desc(filtered_tag="hypothesis_and_experiment"),
hypothesis_output_format=context_dict["hypothesis_output_format"],
hypothesis_specification=context_dict["hypothesis_specification"],
)
Expand All @@ -49,7 +47,7 @@ def gen(self, trace: Trace) -> FactorHypothesis:
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["hypothesis_gen"]["user_prompt"])
.render(
targets="factors",
targets=self.targets,
hypothesis_and_feedback=context_dict["hypothesis_and_feedback"],
RAG=context_dict["RAG"],
)
Expand All @@ -62,29 +60,47 @@ def gen(self, trace: Trace) -> FactorHypothesis:
return hypothesis


class FactorHypothesis2Experiment(Hypothesis2Experiment[FactorExperiment]):
class FactorHypothesisGen(LLMHypothesisGen):
def __init__(self, scen: Scenario):
super().__init__(scen)
self.targets = "factors"


class ModelHypothesisGen(LLMHypothesisGen):
def __init__(self, scen: Scenario):
super().__init__(scen)
self.targets = "model tuning"


class FactorAndModelHypothesisGen(FactorHypothesisGen):
def __init__(self, scen: Scenario):
super().__init__(scen)
self.targets = "feature engineering and model building"


class LLMHypothesis2Experiment(Hypothesis2Experiment[Experiment]):
@abstractmethod
def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, bool]: ...

@abstractmethod
def convert_response(self, response: str, trace: Trace) -> FactorExperiment: ...
def convert_response(self, response: str, trace: Trace) -> Experiment: ...

def convert(self, hypothesis: Hypothesis, trace: Trace) -> FactorExperiment:
def convert(self, hypothesis: Hypothesis, trace: Trace) -> Experiment:
context, json_flag = self.prepare_context(hypothesis, trace)
system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["hypothesis2experiment"]["system_prompt"])
.render(
targets="factors",
scenario=trace.scen.get_scenario_all_desc(),
targets=self.targets,
scenario=trace.scen.get_scenario_all_desc(filtered_tag="hypothesis_and_experiment"),
experiment_output_format=context["experiment_output_format"],
)
)
user_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["hypothesis2experiment"]["user_prompt"])
.render(
targets="factors",
targets=self.targets,
target_hypothesis=context["target_hypothesis"],
hypothesis_and_feedback=context["hypothesis_and_feedback"],
target_list=context["target_list"],
Expand All @@ -95,3 +111,21 @@ def convert(self, hypothesis: Hypothesis, trace: Trace) -> FactorExperiment:
resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag)

return self.convert_response(resp, trace)


class FactorHypothesis2Experiment(LLMHypothesis2Experiment):
def __init__(self):
super().__init__()
self.targets = "factors"


class ModelHypothesis2Experiment(LLMHypothesis2Experiment):
def __init__(self):
super().__init__()
self.targets = "model tuning"


class FactorAndModelHypothesis2Experiment(LLMHypothesis2Experiment):
def __init__(self):
super().__init__()
self.targets = "feature engineering and model building"
Loading

0 comments on commit 245d211

Please sign in to comment.