microsoft · xisen-w · Nov 4, 2024 · Nov 15, 2024 · Nov 4, 2024 · Nov 15, 2024
diff --git a/.gitignore b/.gitignore
@@ -117,6 +117,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+.huaxia_env
 
 # Spyder project settings
 .spyderproject
@@ -151,7 +152,7 @@ reports/
 # git_ignore_folder
 git_ignore_folder/
 
-#cache
+# cache
 *cache*/
 *cache.json
 
@@ -169,4 +170,11 @@ mlruns/
 
 # shell script
 *.out
-*.sh
+
+# Logs
+*.log
+logs/
+log/
+
+# Ignore results directory
+RD-Agent/rdagent/scenarios/kaggle/automated_evaluation/results/
diff --git a/rdagent/scenarios/kaggle/automated_evaluation/eval.sh b/rdagent/scenarios/kaggle/automated_evaluation/eval.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+# Comments
+cat << "EOF" > /dev/null
+Experiment Setup Types:
+1. DS-Agent Mini-Case
+2. RD-Agent Basic
+3. RD-Agent Pro
+4. RD-Agent Max
+
+Each setup has specific configurations for:
+- base_model (4o|mini|4o)
+- rag_param (No|Simple|Advanced)
+- if_MAB (True|False)
+- if_feature_selection (True|False)
+- if_hypothesis_proposal (True|False)
+EOF
+
+# Get current time and script directory
+SCRIPT_PATH="$(realpath "$0")"
+SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
+current_time=$(date +"%Y%m%d_%H%M%S")
+export SCRIPT_DIR
+export current_time
+
+# Parse command line arguments
+PARALLEL=1
+CONF_PATH=./
+COMPETITION=""
+SETUP_TYPE=""
+
+while getopts ":sc:k:t:" opt; do
+    case $opt in
+        s)
+        echo "Disable parallel running (run experiments serially)" >&2
+        PARALLEL=0
+        ;;
+        c)
+        echo "Setting conf path $OPTARG" >&2
+        CONF_PATH=$OPTARG
+        ;;
+        k)
+        echo "Setting Kaggle competition $OPTARG" >&2
+        COMPETITION=$OPTARG
+        ;;
+        t)
+        echo "Setting setup type $OPTARG" >&2
+        SETUP_TYPE=$OPTARG
+        ;;
+        \?)
+        echo "Invalid option: -$OPTARG" >&2
+        exit 1
+        ;;
+    esac
+done
+
+# Validate required parameters
+if [ -z "$COMPETITION" ] || [ -z "$SETUP_TYPE" ]; then
+    echo "Error: Competition (-k) and setup type (-t) are required"
+    exit 1
+fi
+
+# Create necessary directories
+mkdir -p "${SCRIPT_DIR}/results/${current_time}"
+mkdir -p "${SCRIPT_DIR}/logs/${current_time}"
+
+# Configure experiment based on setup type
+configure_experiment() {
+    local setup=$1
+    case $setup in
+        "mini-case")
+            echo "if_using_vector_rag=True" > "${SCRIPT_DIR}/override.env"
+            echo "if_using_graph_rag=False" >> "${SCRIPT_DIR}/override.env"
+            echo "if_action_choosing_based_on_UCB=True" >> "${SCRIPT_DIR}/override.env"
+            echo "model_feature_selection_coder=True" >> "${SCRIPT_DIR}/override.env"
+            echo "hypothesis_gen=False" >> "${SCRIPT_DIR}/override.env"
+            ;;
+        "basic")
+            echo "if_using_vector_rag=False" > "${SCRIPT_DIR}/override.env"
+            echo "if_using_graph_rag=False" >> "${SCRIPT_DIR}/override.env"
+            echo "if_action_choosing_based_on_UCB=False" >> "${SCRIPT_DIR}/override.env"
+            echo "model_feature_selection_coder=True" >> "${SCRIPT_DIR}/override.env"
+            echo "hypothesis_gen=True" >> "${SCRIPT_DIR}/override.env"
+            ;;
+        "pro")
+            echo "if_using_vector_rag=True" > "${SCRIPT_DIR}/override.env"
+            echo "if_using_graph_rag=False" >> "${SCRIPT_DIR}/override.env"
+            echo "if_action_choosing_based_on_UCB=True" >> "${SCRIPT_DIR}/override.env"
+            echo "model_feature_selection_coder=True" >> "${SCRIPT_DIR}/override.env"
+            echo "hypothesis_gen=True" >> "${SCRIPT_DIR}/override.env"
+            ;;
+        "max")
+            echo "if_using_vector_rag=True" > "${SCRIPT_DIR}/override.env"
+            echo "if_using_graph_rag=True" >> "${SCRIPT_DIR}/override.env"
+            echo "if_action_choosing_based_on_UCB=True" >> "${SCRIPT_DIR}/override.env"
+            echo "model_feature_selection_coder=True" >> "${SCRIPT_DIR}/override.env"
+            echo "hypothesis_gen=True" >> "${SCRIPT_DIR}/override.env"
+            ;;
+    esac
+}
+
+# Execute experiment
+run_experiment() {
+    local setup_type=$1
+    local competition=$2
+
+    configure_experiment "$setup_type"
+
+    # Run the main experiment loop
+    python -m rdagent.app.kaggle.loop \
+        --competition "$competition" \
+        --setup "$setup_type" \
+        --result_path "${SCRIPT_DIR}/results/${current_time}/result.json" \
+        >> "${SCRIPT_DIR}/logs/${current_time}/experiment.log" 2>&1
+
+    # Store experiment setup and results
+    cat > "${SCRIPT_DIR}/results/${current_time}/experiment_info.json" << EOF
+{
+    "setup": {
+        "competition": "$competition",
+        "setup_type": "$setup_type",
+        "timestamp": "$current_time"
+    },
+    "results": $(cat "${SCRIPT_DIR}/results/${current_time}/result.json")
+}
+EOF
+}
+
+# Run the experiment
+run_experiment "$SETUP_TYPE" "$COMPETITION"
+
+# Cleanup
+trap 'rm -f "${SCRIPT_DIR}/override.env"' EXIT
+
+echo "Experiment completed. Results are stored in ${SCRIPT_DIR}/results/${current_time}"
+
diff --git a/rdagent/scenarios/kaggle/automated_evaluation/results/20241107_051618/experiment_info.json b/rdagent/scenarios/kaggle/automated_evaluation/results/20241107_051618/experiment_info.json
@@ -0,0 +1,8 @@
+{
+    "setup": {
+        "competition": "sf-crime",
+        "setup_type": "mini-case",
+        "timestamp": "20241107_051618"
+    },
+    "results": 
+}
diff --git a/scripts/exp/ablation/README.md b/scripts/exp/ablation/README.md
@@ -0,0 +1,9 @@
+# Introduction
+
+| name | .env      | desc |
+| --   | --        | --   |
+| full | full.env |  enable all features    |
+| minicase | minicase.env | enable minicase |
+
+
+
diff --git a/scripts/exp/ablation/env/basic.env b/scripts/exp/ablation/env/basic.env
@@ -0,0 +1,5 @@
+if_using_vector_rag=False
+if_using_graph_rag=False
+if_action_choosing_based_on_UCB=False
+model_feature_selection_coder=True
+hypothesis_gen=True 
diff --git a/scripts/exp/ablation/env/full.env b/scripts/exp/ablation/env/full.env
@@ -0,0 +1 @@
+
diff --git a/scripts/exp/ablation/env/max.env b/scripts/exp/ablation/env/max.env
@@ -0,0 +1,5 @@
+if_using_vector_rag=True
+if_using_graph_rag=True
+if_action_choosing_based_on_UCB=True
+model_feature_selection_coder=True
+hypothesis_gen=True 
diff --git a/scripts/exp/ablation/env/mini-case.env b/scripts/exp/ablation/env/mini-case.env
@@ -0,0 +1,5 @@
+if_using_vector_rag=True
+if_using_graph_rag=False
+if_action_choosing_based_on_UCB=True
+model_feature_selection_coder=True
+hypothesis_gen=False 
diff --git a/scripts/exp/ablation/env/pro.env b/scripts/exp/ablation/env/pro.env
@@ -0,0 +1,5 @@
+if_using_vector_rag=True
+if_using_graph_rag=False
+if_action_choosing_based_on_UCB=True
+model_feature_selection_coder=True
+hypothesis_gen=True 
diff --git a/scripts/exp/tools/README.md b/scripts/exp/tools/README.md
@@ -0,0 +1,3 @@
+The tools in the directory contains following generalfeatures
+- collecting envs and run each
+- collect results and generate summary
diff --git a/scripts/exp/tools/collect.py b/scripts/exp/tools/collect.py
@@ -0,0 +1,68 @@
+import os
+import json
+from pathlib import Path
+from datetime import datetime
+
+def collect_results(dir_path) -> list[dict]:
+    summary = []
+    for root, _, files in os.walk(dir_path):
+        for file in files:
+            if file.endswith("_result.json"):
+                config_name = file.replace("_result.json", "")
+                with open(os.path.join(root, file), "r") as f:
+                    data = json.load(f)
+                    # Extract both CV and Kaggle submission results
+                    summary.append({
+                        "config": config_name,
+                        "cv_results": data.get("cv_score", None),
+                        "kaggle_score": data.get("kaggle_score", None),
+                        "trace": data.get("trace", {})
+                    })
+    return summary
+
+def generate_summary(results, output_path):
+    summary = {
+        "configs": {},
+        "best_cv_result": {"config": None, "score": None},
+        "best_kaggle_result": {"config": None, "score": None},
+        "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S")
+    }
+
+    for result in results:
+        config = result["config"]
+        metrics = {
+            "cv_score": result["cv_results"],
+            "kaggle_score": result["kaggle_score"],
+            "iterations": len(result["trace"].get("steps", [])),
+            "best_model": result["trace"].get("best_model")
+        }
+
+        summary["configs"][config] = metrics
+
+        # Update best CV result
+        if (metrics["cv_score"] is not None and 
+            (summary["best_cv_result"]["score"] is None or 
+             metrics["cv_score"] > summary["best_cv_result"]["score"])):
+            summary["best_cv_result"].update({
+                "config": config,
+                "score": metrics["cv_score"]
+            })
+
+        # Update best Kaggle result
+        if (metrics["kaggle_score"] is not None and 
+            (summary["best_kaggle_result"]["score"] is None or 
+             metrics["kaggle_score"] > summary["best_kaggle_result"]["score"])):
+            summary["best_kaggle_result"].update({
+                "config": config,
+                "score": metrics["kaggle_score"]
+            })
+
+    with open(output_path, "w") as f:
+        json.dump(summary, f, indent=4)
+
+if __name__ == "__main__":
+    result_dir = os.path.join(os.getenv("EXP_DIR"), "results")
+    results = collect_results(result_dir)
+    generate_summary(results, os.path.join(result_dir, "summary.json"))
+    print("Summary generated successfully at ", os.path.join(result_dir, "summary.json"))
+
diff --git a/scripts/exp/tools/run_envs.sh b/scripts/exp/tools/run_envs.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+cat << "EOF" > /dev/null
+Given a directory with *.env files.  Run each one.
+
+usage for example:
+
+  1) directly run command without extra shared envs
+  ./run_envs.sh -d <dir_to_*.envfiles> -j <number of parallel process> -- <command>
+
+  2) load shared envs `.env` before running command with different envs.
+  dotenv run -- ./run_envs.sh -d <dir_to_*.envfiles> -j <number of parallel process> -- <command>
+
+EOF
+
+# Function to display usage
+usage() {
+  echo "Usage: $0 -d <dir_to_*.envfiles> -j <number of parallel process> -- <command>"
+  exit 1
+}
+
+# Parse command line arguments
+while getopts "d:j:" opt; do
+  case $opt in
+    d) DIR=$OPTARG ;;
+    j) JOBS=$OPTARG ;;
+    *) usage ;;
+  esac
+done
+
+# Shift to get the command
+shift $((OPTIND -1))
+
+# Check if directory and jobs are set
+if [ -z "$DIR" ] || [ -z "$JOBS" ] || [ $# -eq 0 ]; then
+  usage
+fi
+
+COMMAND="$@"
+
+# Before running commands
+echo "Running experiments with following env files:"
+find "$DIR" -name "*.env" -exec echo "{}" \;
+
+# Export and run each .env file in parallel
+find "$DIR" -name "*.env" | xargs -n 1 -P "$JOBS" -I {} sh -c "
+  set -a
+  . {}
+  set +a
+  $COMMAND
+"
+
diff --git a/scripts/exp/tools/test_system.sh b/scripts/exp/tools/test_system.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Test directory setup
+TEST_DIR="test_run"
+mkdir -p "$TEST_DIR/results"
+mkdir -p "$TEST_DIR/logs"
+
+# Test 1: Environment loading verification
+echo "Testing environment loading..."
+./scripts/exp/tools/run_envs.sh -d scripts/exp/ablation/env -j 1 -- env | grep "if_using"
+
+# Test 2: Run actual experiments
+echo "Running experiments with different configurations..."
+./scripts/exp/tools/run_envs.sh -d scripts/exp/ablation/env -j 4 -- \
+    python -m rdagent.app.kaggle.loop \
+    --competition "titanic" \
+    --result_path "${TEST_DIR}/results/$(basename {} .env)_result.json"
+
+# Test 3: Result collection
+echo "Collecting and analyzing results..."
+EXP_DIR="$TEST_DIR" python scripts/exp/tools/collect.py
+
+# Display results location
+echo "Test results available at: $TEST_DIR"
+
+# Cleanup
+rm -rf "$TEST_DIR"