feat: kaggle refactor (#489)

* init trail * Add spec info * auto unzip mlebench prepared data for out scenario * successfully run example * successfully run main * simplify load traing * extract load_from_raw_data * split the fies(still buggy) It should stop on ~20 epoch and reach the end * some changes * Fix bug to run example * (success) until feature * refine model and ensemble * add metrics in ens.py * update README & spec.md * ens change * fix ens bug * Delete rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/train.py * add template_path in KG_conf * fix test kaggle * CI * make test_import not check kaggle template codes --------- Co-authored-by: Bowen Xian <[email protected]>
microsoft · Nov 20, 2024 · 1b057d0 · 1b057d0
1 parent a9caab7
commit 1b057d0
Show file tree

Hide file tree

Showing 184 changed files with 588 additions and 11 deletions.
diff --git a/rdagent/app/kaggle/conf.py b/rdagent/app/kaggle/conf.py
@@ -44,6 +44,9 @@ class Config:
     competition: str = ""
     """Kaggle competition name, e.g., 'sf-crime'"""
 
+    template_path: str = "rdagent/scenarios/kaggle/experiment/templates"
+    """Kaggle competition base templates path"""
+
     local_data_path: str = ""
     """Folder storing Kaggle competition data"""
 

diff --git a/rdagent/scenarios/kaggle/experiment/kaggle_experiment.py b/rdagent/scenarios/kaggle/experiment/kaggle_experiment.py
@@ -38,7 +38,9 @@ class KGModelExperiment(ModelExperiment[ModelTask, KGFBWorkspace, ModelFBWorkspa
     def __init__(self, *args, source_feature_size: int = None, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self.experiment_workspace = KGFBWorkspace(
-            template_folder_path=Path(__file__).parent / f"{KAGGLE_IMPLEMENT_SETTING.competition}_template"
+            template_folder_path=Path(__file__).resolve()
+            / Path(KAGGLE_IMPLEMENT_SETTING.template_path).resolve()
+            / KAGGLE_IMPLEMENT_SETTING.competition
         )
         if len(self.based_experiments) > 0:
             self.experiment_workspace.inject_code(**self.based_experiments[-1].experiment_workspace.code_dict)
@@ -62,7 +64,9 @@ class KGFactorExperiment(FeatureExperiment[FactorTask, KGFBWorkspace, FactorFBWo
     def __init__(self, *args, source_feature_size: int = None, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self.experiment_workspace = KGFBWorkspace(
-            template_folder_path=Path(__file__).parent / f"{KAGGLE_IMPLEMENT_SETTING.competition}_template"
+            template_folder_path=Path(__file__).resolve()
+            / Path(KAGGLE_IMPLEMENT_SETTING.template_path).resolve()
+            / KAGGLE_IMPLEMENT_SETTING.competition
         )
         if len(self.based_experiments) > 0:
             self.experiment_workspace.inject_code(**self.based_experiments[-1].experiment_workspace.code_dict)

diff --git a/...g-week-1_template/fea_share_preprocess.py → ...orecasting-week-1/fea_share_preprocess.py b/...g-week-1_template/fea_share_preprocess.py → ...orecasting-week-1/fea_share_preprocess.py
diff --git a/...asting-week-1_template/feature/feature.py → ...bal-forecasting-week-1/feature/feature.py b/...asting-week-1_template/feature/feature.py → ...bal-forecasting-week-1/feature/feature.py
diff --git a/...ng-week-1_template/model/model_xgboost.py → ...forecasting-week-1/model/model_xgboost.py b/...ng-week-1_template/model/model_xgboost.py → ...forecasting-week-1/model/model_xgboost.py
diff --git a/...g-week-1_template/model/select_xgboost.py → ...orecasting-week-1/model/select_xgboost.py b/...g-week-1_template/model/select_xgboost.py → ...orecasting-week-1/model/select_xgboost.py
diff --git a/...obal-forecasting-week-1_template/train.py → ...ovid19-global-forecasting-week-1/train.py b/...obal-forecasting-week-1_template/train.py → ...ovid19-global-forecasting-week-1/train.py
diff --git a/...cognizer_template/fea_share_preprocess.py → .../digit-recognizer/fea_share_preprocess.py b/...cognizer_template/fea_share_preprocess.py → .../digit-recognizer/fea_share_preprocess.py
diff --git a/...it-recognizer_template/feature/feature.py → ...lates/digit-recognizer/feature/feature.py b/...it-recognizer_template/feature/feature.py → ...lates/digit-recognizer/feature/feature.py
diff --git a/...git-recognizer_template/model/model_nn.py → ...plates/digit-recognizer/model/model_nn.py b/...git-recognizer_template/model/model_nn.py → ...plates/digit-recognizer/model/model_nn.py
diff --git a/...ecognizer_template/model/model_xgboost.py → ...s/digit-recognizer/model/model_xgboost.py b/...ecognizer_template/model/model_xgboost.py → ...s/digit-recognizer/model/model_xgboost.py
diff --git a/...ognizer_template/model/select_lightgbm.py → ...digit-recognizer/model/select_lightgbm.py b/...ognizer_template/model/select_lightgbm.py → ...digit-recognizer/model/select_lightgbm.py
diff --git a/...it-recognizer_template/model/select_nn.py → ...lates/digit-recognizer/model/select_nn.py b/...it-recognizer_template/model/select_nn.py → ...lates/digit-recognizer/model/select_nn.py
diff --git a/...zer_template/model/select_randomforest.py → ...t-recognizer/model/select_randomforest.py b/...zer_template/model/select_randomforest.py → ...t-recognizer/model/select_randomforest.py
diff --git a/...cognizer_template/model/select_xgboost.py → .../digit-recognizer/model/select_xgboost.py b/...cognizer_template/model/select_xgboost.py → .../digit-recognizer/model/select_xgboost.py
diff --git a/...riment/digit-recognizer_template/train.py → ...iment/templates/digit-recognizer/train.py b/...riment/digit-recognizer_template/train.py → ...iment/templates/digit-recognizer/train.py
diff --git a/...learning_template/fea_share_preprocess.py → ...language-learning/fea_share_preprocess.py b/...learning_template/fea_share_preprocess.py → ...language-learning/fea_share_preprocess.py
diff --git a/...uage-learning_template/feature/feature.py → ...lish-language-learning/feature/feature.py b/...uage-learning_template/feature/feature.py → ...lish-language-learning/feature/feature.py
diff --git a/...ning_template/model/model_randomforest.py → ...uage-learning/model/model_randomforest.py b/...ning_template/model/model_randomforest.py → ...uage-learning/model/model_randomforest.py
diff --git a/...-learning_template/model/model_xgboost.py → ...-language-learning/model/model_xgboost.py b/...-learning_template/model/model_xgboost.py → ...-language-learning/model/model_xgboost.py
diff --git a/...ing_template/model/select_randomforest.py → ...age-learning/model/select_randomforest.py b/...ing_template/model/select_randomforest.py → ...age-learning/model/select_randomforest.py
diff --git a/...learning_template/model/select_xgboost.py → ...language-learning/model/select_xgboost.py b/...learning_template/model/select_xgboost.py → ...language-learning/model/select_xgboost.py
diff --git a/...glish-language-learning_template/train.py → ...-prize-english-language-learning/train.py b/...glish-language-learning_template/train.py → ...-prize-english-language-learning/train.py
diff --git a/...ediction_template/fea_share_preprocess.py → ...r-type-prediction/fea_share_preprocess.py b/...ediction_template/fea_share_preprocess.py → ...r-type-prediction/fea_share_preprocess.py
diff --git a/...pe-prediction_template/feature/feature.py → ...-cover-type-prediction/feature/feature.py b/...pe-prediction_template/feature/feature.py → ...-cover-type-prediction/feature/feature.py
diff --git a/...tion_template/model/model_randomforest.py → ...pe-prediction/model/model_randomforest.py b/...tion_template/model/model_randomforest.py → ...pe-prediction/model/model_randomforest.py
diff --git a/...rediction_template/model/model_xgboost.py → ...er-type-prediction/model/model_xgboost.py b/...rediction_template/model/model_xgboost.py → ...er-type-prediction/model/model_xgboost.py
diff --git a/...diction_template/model/select_lightgbm.py → ...-type-prediction/model/select_lightgbm.py b/...diction_template/model/select_lightgbm.py → ...-type-prediction/model/select_lightgbm.py
diff --git a/...pe-prediction_template/model/select_nn.py → ...-cover-type-prediction/model/select_nn.py b/...pe-prediction_template/model/select_nn.py → ...-cover-type-prediction/model/select_nn.py
diff --git a/...ion_template/model/select_randomforest.py → ...e-prediction/model/select_randomforest.py b/...ion_template/model/select_randomforest.py → ...e-prediction/model/select_randomforest.py
diff --git a/...ediction_template/model/select_xgboost.py → ...r-type-prediction/model/select_xgboost.py b/...ediction_template/model/select_xgboost.py → ...r-type-prediction/model/select_xgboost.py
diff --git a/...t-cover-type-prediction_template/train.py → ...tes/forest-cover-type-prediction/train.py b/...t-cover-type-prediction_template/train.py → ...tes/forest-cover-type-prediction/train.py
diff --git a/...er-type-prediction_template/train_past.py → ...orest-cover-type-prediction/train_past.py b/...er-type-prediction_template/train_past.py → ...orest-cover-type-prediction/train_past.py
diff --git a/...ta_tpl_deprecated/fea_share_preprocess.py → ...ta_tpl_deprecated/fea_share_preprocess.py b/...ta_tpl_deprecated/fea_share_preprocess.py → ...ta_tpl_deprecated/fea_share_preprocess.py
diff --git a/...nt/meta_tpl_deprecated/feature/feature.py → ...es/meta_tpl_deprecated/feature/feature.py b/...nt/meta_tpl_deprecated/feature/feature.py → ...es/meta_tpl_deprecated/feature/feature.py
diff --git a/...ent/meta_tpl_deprecated/model/model_nn.py → ...tes/meta_tpl_deprecated/model/model_nn.py b/...ent/meta_tpl_deprecated/model/model_nn.py → ...tes/meta_tpl_deprecated/model/model_nn.py
diff --git a/...pl_deprecated/model/model_randomforest.py → ...pl_deprecated/model/model_randomforest.py b/...pl_deprecated/model/model_randomforest.py → ...pl_deprecated/model/model_randomforest.py
diff --git a/...eta_tpl_deprecated/model/model_xgboost.py → ...eta_tpl_deprecated/model/model_xgboost.py b/...eta_tpl_deprecated/model/model_xgboost.py → ...eta_tpl_deprecated/model/model_xgboost.py
diff --git a/...e/experiment/meta_tpl_deprecated/train.py → ...nt/templates/meta_tpl_deprecated/train.py b/...e/experiment/meta_tpl_deprecated/train.py → ...nt/templates/meta_tpl_deprecated/train.py
diff --git a/...ediction_template/fea_share_preprocess.py → ...i-fare-prediction/fea_share_preprocess.py b/...ediction_template/fea_share_preprocess.py → ...i-fare-prediction/fea_share_preprocess.py
diff --git a/...re-prediction_template/feature/feature.py → ...y-taxi-fare-prediction/feature/feature.py b/...re-prediction_template/feature/feature.py → ...y-taxi-fare-prediction/feature/feature.py
diff --git a/...prediction_template/model/model_linear.py → ...axi-fare-prediction/model/model_linear.py b/...prediction_template/model/model_linear.py → ...axi-fare-prediction/model/model_linear.py
diff --git a/...diction_template/model/select_lightgbm.py → ...-fare-prediction/model/select_lightgbm.py b/...diction_template/model/select_lightgbm.py → ...-fare-prediction/model/select_lightgbm.py
diff --git a/...rediction_template/model/select_linear.py → ...xi-fare-prediction/model/select_linear.py b/...rediction_template/model/select_linear.py → ...xi-fare-prediction/model/select_linear.py
diff --git a/...re-prediction_template/model/select_nn.py → ...y-taxi-fare-prediction/model/select_nn.py b/...re-prediction_template/model/select_nn.py → ...y-taxi-fare-prediction/model/select_nn.py
diff --git a/...ion_template/model/select_randomforest.py → ...e-prediction/model/select_randomforest.py b/...ion_template/model/select_randomforest.py → ...e-prediction/model/select_randomforest.py
diff --git a/...ediction_template/model/select_xgboost.py → ...i-fare-prediction/model/select_xgboost.py b/...ediction_template/model/select_xgboost.py → ...i-fare-prediction/model/select_xgboost.py
diff --git a/...ty-taxi-fare-prediction_template/train.py → ...w-york-city-taxi-fare-prediction/train.py b/...ty-taxi-fare-prediction_template/train.py → ...w-york-city-taxi-fare-prediction/train.py
diff --git a/...ediction_template/fea_share_preprocess.py → ...tility-prediction/fea_share_preprocess.py b/...ediction_template/fea_share_preprocess.py → ...tility-prediction/fea_share_preprocess.py
diff --git a/...ty-prediction_template/feature/feature.py → ...-volatility-prediction/feature/feature.py b/...ty-prediction_template/feature/feature.py → ...-volatility-prediction/feature/feature.py
diff --git a/...tion_template/model/model_randomforest.py → ...ty-prediction/model/model_randomforest.py b/...tion_template/model/model_randomforest.py → ...ty-prediction/model/model_randomforest.py
diff --git a/...rediction_template/model/model_xgboost.py → ...atility-prediction/model/model_xgboost.py b/...rediction_template/model/model_xgboost.py → ...atility-prediction/model/model_xgboost.py
diff --git a/...diction_template/model/select_lightgbm.py → ...ility-prediction/model/select_lightgbm.py b/...diction_template/model/select_lightgbm.py → ...ility-prediction/model/select_lightgbm.py
diff --git a/...ty-prediction_template/model/select_nn.py → ...-volatility-prediction/model/select_nn.py b/...ty-prediction_template/model/select_nn.py → ...-volatility-prediction/model/select_nn.py
diff --git a/...ion_template/model/select_randomforest.py → ...y-prediction/model/select_randomforest.py b/...ion_template/model/select_randomforest.py → ...y-prediction/model/select_randomforest.py
diff --git a/...ediction_template/model/select_xgboost.py → ...tility-prediction/model/select_xgboost.py b/...ediction_template/model/select_xgboost.py → ...tility-prediction/model/select_xgboost.py
diff --git a/...d-volatility-prediction_template/train.py → ...r-realized-volatility-prediction/train.py b/...d-volatility-prediction_template/train.py → ...r-realized-volatility-prediction/train.py
diff --git a/...es-s3e11_template/fea_share_preprocess.py → ...ound-series-s3e11/fea_share_preprocess.py b/...es-s3e11_template/fea_share_preprocess.py → ...ound-series-s3e11/fea_share_preprocess.py
diff --git a/...-series-s3e11_template/feature/feature.py → ...layground-series-s3e11/feature/feature.py b/...-series-s3e11_template/feature/feature.py → ...layground-series-s3e11/feature/feature.py
diff --git a/...3e11_template/model/model_randomforest.py → ...-series-s3e11/model/model_randomforest.py b/...3e11_template/model/model_randomforest.py → ...-series-s3e11/model/model_randomforest.py
diff --git a/...ies-s3e11_template/model/model_xgboost.py → ...round-series-s3e11/model/model_xgboost.py b/...ies-s3e11_template/model/model_xgboost.py → ...round-series-s3e11/model/model_xgboost.py
diff --git a/...s-s3e11_template/model/select_lightgbm.py → ...und-series-s3e11/model/select_lightgbm.py b/...s-s3e11_template/model/select_lightgbm.py → ...und-series-s3e11/model/select_lightgbm.py
diff --git a/...-series-s3e11_template/model/select_nn.py → ...layground-series-s3e11/model/select_nn.py b/...-series-s3e11_template/model/select_nn.py → ...layground-series-s3e11/model/select_nn.py
diff --git a/...e11_template/model/select_randomforest.py → ...series-s3e11/model/select_randomforest.py b/...e11_template/model/select_randomforest.py → ...series-s3e11/model/select_randomforest.py
diff --git a/...es-s3e11_template/model/select_xgboost.py → ...ound-series-s3e11/model/select_xgboost.py b/...es-s3e11_template/model/select_xgboost.py → ...ound-series-s3e11/model/select_xgboost.py
diff --git a/...playground-series-s3e11_template/train.py → ...emplates/playground-series-s3e11/train.py b/...playground-series-s3e11_template/train.py → ...emplates/playground-series-s3e11/train.py
diff --git a/...es-s3e14_template/fea_share_preprocess.py → ...ound-series-s3e14/fea_share_preprocess.py b/...es-s3e14_template/fea_share_preprocess.py → ...ound-series-s3e14/fea_share_preprocess.py
diff --git a/...-series-s3e14_template/feature/feature.py → ...layground-series-s3e14/feature/feature.py b/...-series-s3e14_template/feature/feature.py → ...layground-series-s3e14/feature/feature.py
diff --git a/...3e14_template/model/model_randomforest.py → ...-series-s3e14/model/model_randomforest.py b/...3e14_template/model/model_randomforest.py → ...-series-s3e14/model/model_randomforest.py
diff --git a/...ies-s3e14_template/model/model_xgboost.py → ...round-series-s3e14/model/model_xgboost.py b/...ies-s3e14_template/model/model_xgboost.py → ...round-series-s3e14/model/model_xgboost.py
diff --git a/...s-s3e14_template/model/select_lightgbm.py → ...und-series-s3e14/model/select_lightgbm.py b/...s-s3e14_template/model/select_lightgbm.py → ...und-series-s3e14/model/select_lightgbm.py
diff --git a/...-series-s3e14_template/model/select_nn.py → ...layground-series-s3e14/model/select_nn.py b/...-series-s3e14_template/model/select_nn.py → ...layground-series-s3e14/model/select_nn.py
diff --git a/...e14_template/model/select_randomforest.py → ...series-s3e14/model/select_randomforest.py b/...e14_template/model/select_randomforest.py → ...series-s3e14/model/select_randomforest.py
diff --git a/...es-s3e14_template/model/select_xgboost.py → ...ound-series-s3e14/model/select_xgboost.py b/...es-s3e14_template/model/select_xgboost.py → ...ound-series-s3e14/model/select_xgboost.py
diff --git a/...playground-series-s3e14_template/train.py → ...emplates/playground-series-s3e14/train.py b/...playground-series-s3e14_template/train.py → ...emplates/playground-series-s3e14/train.py
diff --git a/...es-s3e16_template/fea_share_preprocess.py → ...ound-series-s3e16/fea_share_preprocess.py b/...es-s3e16_template/fea_share_preprocess.py → ...ound-series-s3e16/fea_share_preprocess.py
diff --git a/...-series-s3e16_template/feature/feature.py → ...layground-series-s3e16/feature/feature.py b/...-series-s3e16_template/feature/feature.py → ...layground-series-s3e16/feature/feature.py
diff --git a/...3e16_template/model/model_randomforest.py → ...-series-s3e16/model/model_randomforest.py b/...3e16_template/model/model_randomforest.py → ...-series-s3e16/model/model_randomforest.py
diff --git a/...ies-s3e16_template/model/model_xgboost.py → ...round-series-s3e16/model/model_xgboost.py b/...ies-s3e16_template/model/model_xgboost.py → ...round-series-s3e16/model/model_xgboost.py
diff --git a/...s-s3e16_template/model/select_lightgbm.py → ...und-series-s3e16/model/select_lightgbm.py b/...s-s3e16_template/model/select_lightgbm.py → ...und-series-s3e16/model/select_lightgbm.py
diff --git a/...-series-s3e16_template/model/select_nn.py → ...layground-series-s3e16/model/select_nn.py b/...-series-s3e16_template/model/select_nn.py → ...layground-series-s3e16/model/select_nn.py
diff --git a/...e16_template/model/select_randomforest.py → ...series-s3e16/model/select_randomforest.py b/...e16_template/model/select_randomforest.py → ...series-s3e16/model/select_randomforest.py
diff --git a/...es-s3e16_template/model/select_xgboost.py → ...ound-series-s3e16/model/select_xgboost.py b/...es-s3e16_template/model/select_xgboost.py → ...ound-series-s3e16/model/select_xgboost.py
diff --git a/...playground-series-s3e16_template/train.py → ...emplates/playground-series-s3e16/train.py b/...playground-series-s3e16_template/train.py → ...emplates/playground-series-s3e16/train.py
diff --git a/...es-s3e26_template/fea_share_preprocess.py → ...ound-series-s3e26/fea_share_preprocess.py b/...es-s3e26_template/fea_share_preprocess.py → ...ound-series-s3e26/fea_share_preprocess.py
diff --git a/...-series-s3e26_template/feature/feature.py → ...layground-series-s3e26/feature/feature.py b/...-series-s3e26_template/feature/feature.py → ...layground-series-s3e26/feature/feature.py
diff --git a/...3e26_template/model/model_randomforest.py → ...-series-s3e26/model/model_randomforest.py b/...3e26_template/model/model_randomforest.py → ...-series-s3e26/model/model_randomforest.py
diff --git a/...ies-s3e26_template/model/model_xgboost.py → ...round-series-s3e26/model/model_xgboost.py b/...ies-s3e26_template/model/model_xgboost.py → ...round-series-s3e26/model/model_xgboost.py
diff --git a/...s-s3e26_template/model/select_lightgbm.py → ...und-series-s3e26/model/select_lightgbm.py b/...s-s3e26_template/model/select_lightgbm.py → ...und-series-s3e26/model/select_lightgbm.py
diff --git a/...-series-s3e26_template/model/select_nn.py → ...layground-series-s3e26/model/select_nn.py b/...-series-s3e26_template/model/select_nn.py → ...layground-series-s3e26/model/select_nn.py
diff --git a/...e26_template/model/select_randomforest.py → ...series-s3e26/model/select_randomforest.py b/...e26_template/model/select_randomforest.py → ...series-s3e26/model/select_randomforest.py
diff --git a/...es-s3e26_template/model/select_xgboost.py → ...ound-series-s3e26/model/select_xgboost.py b/...es-s3e26_template/model/select_xgboost.py → ...ound-series-s3e26/model/select_xgboost.py
diff --git a/...playground-series-s3e26_template/train.py → ...emplates/playground-series-s3e26/train.py b/...playground-series-s3e26_template/train.py → ...emplates/playground-series-s3e26/train.py
diff --git a/...ies-s4e5_template/fea_share_preprocess.py → ...round-series-s4e5/fea_share_preprocess.py b/...ies-s4e5_template/fea_share_preprocess.py → ...round-series-s4e5/fea_share_preprocess.py
diff --git a/...d-series-s4e5_template/feature/feature.py → ...playground-series-s4e5/feature/feature.py b/...d-series-s4e5_template/feature/feature.py → ...playground-series-s4e5/feature/feature.py
diff --git a/...s4e5_template/model/model_randomforest.py → ...d-series-s4e5/model/model_randomforest.py b/...s4e5_template/model/model_randomforest.py → ...d-series-s4e5/model/model_randomforest.py
diff --git a/...ries-s4e5_template/model/model_xgboost.py → ...ground-series-s4e5/model/model_xgboost.py b/...ries-s4e5_template/model/model_xgboost.py → ...ground-series-s4e5/model/model_xgboost.py
diff --git a/...es-s4e5_template/model/select_lightgbm.py → ...ound-series-s4e5/model/select_lightgbm.py b/...es-s4e5_template/model/select_lightgbm.py → ...ound-series-s4e5/model/select_lightgbm.py
diff --git a/...d-series-s4e5_template/model/select_nn.py → ...playground-series-s4e5/model/select_nn.py b/...d-series-s4e5_template/model/select_nn.py → ...playground-series-s4e5/model/select_nn.py
diff --git a/...4e5_template/model/select_randomforest.py → ...-series-s4e5/model/select_randomforest.py b/...4e5_template/model/select_randomforest.py → ...-series-s4e5/model/select_randomforest.py
diff --git a/...ies-s4e5_template/model/select_xgboost.py → ...round-series-s4e5/model/select_xgboost.py b/...ies-s4e5_template/model/select_xgboost.py → ...round-series-s4e5/model/select_xgboost.py
diff --git a/.../playground-series-s4e5_template/train.py → ...templates/playground-series-s4e5/train.py b/.../playground-series-s4e5_template/train.py → ...templates/playground-series-s4e5/train.py
diff --git a/...ies-s4e8_template/fea_share_preprocess.py → ...round-series-s4e8/fea_share_preprocess.py b/...ies-s4e8_template/fea_share_preprocess.py → ...round-series-s4e8/fea_share_preprocess.py
diff --git a/...d-series-s4e8_template/feature/feature.py → ...playground-series-s4e8/feature/feature.py b/...d-series-s4e8_template/feature/feature.py → ...playground-series-s4e8/feature/feature.py
diff --git a/...s4e8_template/model/model_randomforest.py → ...d-series-s4e8/model/model_randomforest.py b/...s4e8_template/model/model_randomforest.py → ...d-series-s4e8/model/model_randomforest.py
diff --git a/...ries-s4e8_template/model/model_xgboost.py → ...ground-series-s4e8/model/model_xgboost.py b/...ries-s4e8_template/model/model_xgboost.py → ...ground-series-s4e8/model/model_xgboost.py
diff --git a/...es-s4e8_template/model/select_lightgbm.py → ...ound-series-s4e8/model/select_lightgbm.py b/...es-s4e8_template/model/select_lightgbm.py → ...ound-series-s4e8/model/select_lightgbm.py
diff --git a/...d-series-s4e8_template/model/select_nn.py → ...playground-series-s4e8/model/select_nn.py b/...d-series-s4e8_template/model/select_nn.py → ...playground-series-s4e8/model/select_nn.py
diff --git a/...4e8_template/model/select_randomforest.py → ...-series-s4e8/model/select_randomforest.py b/...4e8_template/model/select_randomforest.py → ...-series-s4e8/model/select_randomforest.py
diff --git a/...ies-s4e8_template/model/select_xgboost.py → ...round-series-s4e8/model/select_xgboost.py b/...ies-s4e8_template/model/select_xgboost.py → ...round-series-s4e8/model/select_xgboost.py
diff --git a/.../playground-series-s4e8_template/train.py → ...templates/playground-series-s4e8/train.py b/.../playground-series-s4e8_template/train.py → ...templates/playground-series-s4e8/train.py
diff --git a/...ies-s4e9_template/fea_share_preprocess.py → ...round-series-s4e9/fea_share_preprocess.py b/...ies-s4e9_template/fea_share_preprocess.py → ...round-series-s4e9/fea_share_preprocess.py
diff --git a/...d-series-s4e9_template/feature/feature.py → ...playground-series-s4e9/feature/feature.py b/...d-series-s4e9_template/feature/feature.py → ...playground-series-s4e9/feature/feature.py
diff --git a/...s4e9_template/model/model_randomforest.py → ...d-series-s4e9/model/model_randomforest.py b/...s4e9_template/model/model_randomforest.py → ...d-series-s4e9/model/model_randomforest.py
diff --git a/...ries-s4e9_template/model/model_xgboost.py → ...ground-series-s4e9/model/model_xgboost.py b/...ries-s4e9_template/model/model_xgboost.py → ...ground-series-s4e9/model/model_xgboost.py
diff --git a/...es-s4e9_template/model/select_lightgbm.py → ...ound-series-s4e9/model/select_lightgbm.py b/...es-s4e9_template/model/select_lightgbm.py → ...ound-series-s4e9/model/select_lightgbm.py
diff --git a/...d-series-s4e9_template/model/select_nn.py → ...playground-series-s4e9/model/select_nn.py b/...d-series-s4e9_template/model/select_nn.py → ...playground-series-s4e9/model/select_nn.py
diff --git a/...4e9_template/model/select_randomforest.py → ...-series-s4e9/model/select_randomforest.py b/...4e9_template/model/select_randomforest.py → ...-series-s4e9/model/select_randomforest.py
diff --git a/...ies-s4e9_template/model/select_xgboost.py → ...round-series-s4e9/model/select_xgboost.py b/...ies-s4e9_template/model/select_xgboost.py → ...round-series-s4e9/model/select_xgboost.py
diff --git a/.../playground-series-s4e9_template/train.py → ...templates/playground-series-s4e9/train.py b/.../playground-series-s4e9_template/train.py → ...templates/playground-series-s4e9/train.py
diff --git a/...sf-crime_template/fea_share_preprocess.py → ...emplates/sf-crime/fea_share_preprocess.py b/...sf-crime_template/fea_share_preprocess.py → ...emplates/sf-crime/fea_share_preprocess.py
diff --git a/...ment/sf-crime_template/feature/feature.py → ...ent/templates/sf-crime/feature/feature.py b/...ment/sf-crime_template/feature/feature.py → ...ent/templates/sf-crime/feature/feature.py
diff --git a/...rime_template/model/model_randomforest.py → ...ates/sf-crime/model/model_randomforest.py b/...rime_template/model/model_randomforest.py → ...ates/sf-crime/model/model_randomforest.py
diff --git a/.../sf-crime_template/model/model_xgboost.py → ...templates/sf-crime/model/model_xgboost.py b/.../sf-crime_template/model/model_xgboost.py → ...templates/sf-crime/model/model_xgboost.py
diff --git a/...f-crime_template/model/select_lightgbm.py → ...mplates/sf-crime/model/select_lightgbm.py b/...f-crime_template/model/select_lightgbm.py → ...mplates/sf-crime/model/select_lightgbm.py
diff --git a/...ment/sf-crime_template/model/select_nn.py → ...ent/templates/sf-crime/model/select_nn.py b/...ment/sf-crime_template/model/select_nn.py → ...ent/templates/sf-crime/model/select_nn.py
diff --git a/...ime_template/model/select_randomforest.py → ...tes/sf-crime/model/select_randomforest.py b/...ime_template/model/select_randomforest.py → ...tes/sf-crime/model/select_randomforest.py
diff --git a/...sf-crime_template/model/select_xgboost.py → ...emplates/sf-crime/model/select_xgboost.py b/...sf-crime_template/model/select_xgboost.py → ...emplates/sf-crime/model/select_xgboost.py
diff --git a/...gle/experiment/sf-crime_template/train.py → ...le/experiment/templates/sf-crime/train.py b/...gle/experiment/sf-crime_template/train.py → ...le/experiment/templates/sf-crime/train.py
diff --git a/...-titanic_template/fea_share_preprocess.py → ...spaceship-titanic/fea_share_preprocess.py b/...-titanic_template/fea_share_preprocess.py → ...spaceship-titanic/fea_share_preprocess.py
diff --git a/...eship-titanic_template/feature/feature.py → ...ates/spaceship-titanic/feature/feature.py b/...eship-titanic_template/feature/feature.py → ...ates/spaceship-titanic/feature/feature.py
diff --git a/...anic_template/model/model_randomforest.py → ...eship-titanic/model/model_randomforest.py b/...anic_template/model/model_randomforest.py → ...eship-titanic/model/model_randomforest.py
diff --git a/...p-titanic_template/model/model_xgboost.py → .../spaceship-titanic/model/model_xgboost.py b/...p-titanic_template/model/model_xgboost.py → .../spaceship-titanic/model/model_xgboost.py
diff --git a/...titanic_template/model/select_lightgbm.py → ...paceship-titanic/model/select_lightgbm.py b/...titanic_template/model/select_lightgbm.py → ...paceship-titanic/model/select_lightgbm.py
diff --git a/...eship-titanic_template/model/select_nn.py → ...ates/spaceship-titanic/model/select_nn.py b/...eship-titanic_template/model/select_nn.py → ...ates/spaceship-titanic/model/select_nn.py
diff --git a/...nic_template/model/select_randomforest.py → ...ship-titanic/model/select_randomforest.py b/...nic_template/model/select_randomforest.py → ...ship-titanic/model/select_randomforest.py
diff --git a/...-titanic_template/model/select_xgboost.py → ...spaceship-titanic/model/select_xgboost.py b/...-titanic_template/model/select_xgboost.py → ...spaceship-titanic/model/select_xgboost.py
diff --git a/...iment/spaceship-titanic_template/train.py → ...ment/templates/spaceship-titanic/train.py b/...iment/spaceship-titanic_template/train.py → ...ment/templates/spaceship-titanic/train.py
diff --git a/...hallenge_template/fea_share_preprocess.py → ...ssifier-challenge/fea_share_preprocess.py b/...hallenge_template/fea_share_preprocess.py → ...ssifier-challenge/fea_share_preprocess.py
diff --git a/...ier-challenge_template/feature/feature.py → ...g-classifier-challenge/feature/feature.py b/...ier-challenge_template/feature/feature.py → ...g-classifier-challenge/feature/feature.py
diff --git a/...challenge_template/model/model_xgboost.py → ...assifier-challenge/model/model_xgboost.py b/...challenge_template/model/model_xgboost.py → ...assifier-challenge/model/model_xgboost.py
diff --git a/...allenge_template/model/select_lightgbm.py → ...sifier-challenge/model/select_lightgbm.py b/...allenge_template/model/select_lightgbm.py → ...sifier-challenge/model/select_lightgbm.py
diff --git a/...hallenge_template/model/select_xgboost.py → ...ssifier-challenge/model/select_xgboost.py b/...hallenge_template/model/select_xgboost.py → ...ssifier-challenge/model/select_xgboost.py
diff --git a/...rg-classifier-challenge_template/train.py → ...oil-iceberg-classifier-challenge/train.py b/...rg-classifier-challenge_template/train.py → ...oil-iceberg-classifier-challenge/train.py
diff --git a/...dec-2021_template/fea_share_preprocess.py → ...d-series-dec-2021/fea_share_preprocess.py b/...dec-2021_template/fea_share_preprocess.py → ...d-series-dec-2021/fea_share_preprocess.py
diff --git a/...ries-dec-2021_template/feature/feature.py → ...ground-series-dec-2021/feature/feature.py b/...ries-dec-2021_template/feature/feature.py → ...ground-series-dec-2021/feature/feature.py
diff --git a/...2021_template/model/model_randomforest.py → ...ries-dec-2021/model/model_randomforest.py b/...2021_template/model/model_randomforest.py → ...ries-dec-2021/model/model_randomforest.py
diff --git a/...-dec-2021_template/model/model_xgboost.py → ...nd-series-dec-2021/model/model_xgboost.py b/...-dec-2021_template/model/model_xgboost.py → ...nd-series-dec-2021/model/model_xgboost.py
diff --git a/...ec-2021_template/model/select_lightgbm.py → ...-series-dec-2021/model/select_lightgbm.py b/...ec-2021_template/model/select_lightgbm.py → ...-series-dec-2021/model/select_lightgbm.py
diff --git a/...ries-dec-2021_template/model/select_nn.py → ...ground-series-dec-2021/model/select_nn.py b/...ries-dec-2021_template/model/select_nn.py → ...ground-series-dec-2021/model/select_nn.py
diff --git a/...021_template/model/select_randomforest.py → ...ies-dec-2021/model/select_randomforest.py b/...021_template/model/select_randomforest.py → ...ies-dec-2021/model/select_randomforest.py
diff --git a/...dec-2021_template/model/select_xgboost.py → ...d-series-dec-2021/model/select_xgboost.py b/...dec-2021_template/model/select_xgboost.py → ...d-series-dec-2021/model/select_xgboost.py
diff --git a/...yground-series-dec-2021_template/train.py → ...bular-playground-series-dec-2021/train.py b/...yground-series-dec-2021_template/train.py → ...bular-playground-series-dec-2021/train.py
diff --git a/...may-2022_template/fea_share_preprocess.py → ...d-series-may-2022/fea_share_preprocess.py b/...may-2022_template/fea_share_preprocess.py → ...d-series-may-2022/fea_share_preprocess.py
diff --git a/...ries-may-2022_template/feature/feature.py → ...ground-series-may-2022/feature/feature.py b/...ries-may-2022_template/feature/feature.py → ...ground-series-may-2022/feature/feature.py
diff --git a/...2022_template/model/model_randomforest.py → ...ries-may-2022/model/model_randomforest.py b/...2022_template/model/model_randomforest.py → ...ries-may-2022/model/model_randomforest.py
diff --git a/...-may-2022_template/model/model_xgboost.py → ...nd-series-may-2022/model/model_xgboost.py b/...-may-2022_template/model/model_xgboost.py → ...nd-series-may-2022/model/model_xgboost.py
diff --git a/...ay-2022_template/model/select_lightgbm.py → ...-series-may-2022/model/select_lightgbm.py b/...ay-2022_template/model/select_lightgbm.py → ...-series-may-2022/model/select_lightgbm.py
diff --git a/...ries-may-2022_template/model/select_nn.py → ...ground-series-may-2022/model/select_nn.py b/...ries-may-2022_template/model/select_nn.py → ...ground-series-may-2022/model/select_nn.py
diff --git a/...022_template/model/select_randomforest.py → ...ies-may-2022/model/select_randomforest.py b/...022_template/model/select_randomforest.py → ...ies-may-2022/model/select_randomforest.py
diff --git a/...may-2022_template/model/select_xgboost.py → ...d-series-may-2022/model/select_xgboost.py b/...may-2022_template/model/select_xgboost.py → ...d-series-may-2022/model/select_xgboost.py
diff --git a/...yground-series-may-2022_template/train.py → ...bular-playground-series-may-2022/train.py b/...yground-series-may-2022_template/train.py → ...bular-playground-series-may-2022/train.py
diff --git a/...ediction_template/fea_share_preprocess.py → ...essure-prediction/fea_share_preprocess.py b/...ediction_template/fea_share_preprocess.py → ...essure-prediction/fea_share_preprocess.py
diff --git a/...re-prediction_template/feature/feature.py → ...or-pressure-prediction/feature/feature.py b/...re-prediction_template/feature/feature.py → ...or-pressure-prediction/feature/feature.py
diff --git a/...tion_template/model/model_randomforest.py → ...re-prediction/model/model_randomforest.py b/...tion_template/model/model_randomforest.py → ...re-prediction/model/model_randomforest.py
diff --git a/...rediction_template/model/model_xgboost.py → ...ressure-prediction/model/model_xgboost.py b/...rediction_template/model/model_xgboost.py → ...ressure-prediction/model/model_xgboost.py
diff --git a/...diction_template/model/select_lightgbm.py → ...ssure-prediction/model/select_lightgbm.py b/...diction_template/model/select_lightgbm.py → ...ssure-prediction/model/select_lightgbm.py
diff --git a/...re-prediction_template/model/select_nn.py → ...or-pressure-prediction/model/select_nn.py b/...re-prediction_template/model/select_nn.py → ...or-pressure-prediction/model/select_nn.py
diff --git a/...ion_template/model/select_randomforest.py → ...e-prediction/model/select_randomforest.py b/...ion_template/model/select_randomforest.py → ...e-prediction/model/select_randomforest.py
diff --git a/...ediction_template/model/select_xgboost.py → ...essure-prediction/model/select_xgboost.py b/...ediction_template/model/select_xgboost.py → ...essure-prediction/model/select_xgboost.py
diff --git a/...tor-pressure-prediction_template/train.py → ...s/ventilator-pressure-prediction/train.py b/...tor-pressure-prediction_template/train.py → ...s/ventilator-pressure-prediction/train.py
diff --git a/rdagent/scenarios/kaggle/experiment/workspace.py b/rdagent/scenarios/kaggle/experiment/workspace.py
@@ -26,11 +26,10 @@
 
 
 class KGFBWorkspace(FBWorkspace):
-    def __init__(self, template_folder_path: Path, *args, entry="python train.py", **kwargs) -> None:
+    def __init__(self, template_folder_path: Path, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self.inject_code_from_folder(template_folder_path)
         self.data_description: List[Tuple[str, int]] = []
-        self.entry = entry  # this is for debugging (you may want to change it into `sleep 1000`)
 
     @property
     def model_description(self) -> dict[str, str]:
@@ -86,7 +85,6 @@ def execute(self, run_env: dict = {}, *args, **kwargs) -> str:
 
         execute_log = kgde.run(
             local_path=str(self.workspace_path),
-            entry=self.entry,
             env=run_env,
             running_extra_volume=running_extra_volume,
         )

diff --git a/rdagent/scenarios/kaggle/kaggle_crawler.py b/rdagent/scenarios/kaggle/kaggle_crawler.py
@@ -122,7 +122,7 @@ def download_data(competition: str, local_path: str = KAGGLE_IMPLEMENT_SETTING.l
                 f"/bin/sh -c 'cp -r ./zip_files/{competition}/prepared/public/* ./{competition}'", local_path=local_path
             )
             mleb_env.run(
-                f"/bin/sh -c 'cp -r ./zip_files/{competition}/prepared/private/test.csv ./{competition}/valid.csv'",
+                f'/bin/sh -c \'for zip_file in ./{competition}/*.zip; do dir_name="${{zip_file%.zip}}"; mkdir -p "$dir_name"; unzip -o "$zip_file" -d "$dir_name"; done\'',
                 local_path=local_path,
             )
             # NOTE:

diff --git a/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/README.md b/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/README.md
@@ -0,0 +1,43 @@
+# Motivation of the example
+We use a runnable concrete example to demonstrate what the project should be like after being generated by a large language model.
+
+
+# Content example and the workflow
+
+> NOTE: the `README.md` itself is note generated by LLM. the content remains are generated by LLM.
+>
+
+
+## Extra input information beyond the competition information
+
+[[../meta/spec.md]]
+- [ ] TODO
+
+## Step0: Specification generation
+
+- Generate specification
+  [[spec.md]]
+  - [ ] TODO: perfect
+- Generate loading data
+  [[load_data.py]]
+
+- Why do we merge this step together.
+  - Successfully run `load_data.py` is a kind of verification of `spec.md`
+
+
+## Step1: write the feature engineering code
+- We can generate some file like [[feat01.py]] that match the pattern `feat.*\.py`
+
+## Step2: Model training
+
+
+## Step3: ensemble and decision
+- generate `ens_and_decsion`
+  - why we generate score on ensemble phase
+  - ensemble has following tasks which has great overlap 
+    - ensemble usually check the performance before ensemble
+    - A additional step to record performance is easier.
+
+## Step4: Build workflow
+
+[[main.py]]
diff --git a/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/ens.py b/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/ens.py
@@ -0,0 +1,55 @@
+import numpy as np
+import pandas as pd
+from sklearn.metrics import roc_auc_score
+
+
+def ens_and_decision(test_pred_l: list[np.ndarray], val_pred_l: list[np.ndarray], val_label: np.ndarray) -> np.ndarray:
+    """
+    Handle the following:
+    1) Ensemble predictions using a simple average.
+    2) Make final decision after ensemble (convert the predictions to final binary form).
+
+    Parameters
+    ----------
+    test_pred_l : list[np.ndarray]
+        List of predictions on the test data.
+    val_pred_l : list[np.ndarray]
+        List of predictions on the validation data.
+    val_label : np.ndarray
+        True labels of the validation data.
+
+    Returns
+    -------
+    np.ndarray
+        Binary predictions on the test data.
+    """
+
+    scores = []
+    for id, val_pred in enumerate(val_pred_l):
+        scores.append(roc_auc_score(val_label, val_pred))
+
+    # Normalize the scores to get weights
+    total_score = sum(scores)
+    weights = [score / total_score for score in scores]
+
+    # Weighted average of test predictions
+    weighted_test_pred = np.zeros_like(test_pred_l[0])
+    for weight, test_pred in zip(weights, test_pred_l):
+        weighted_test_pred += weight * test_pred
+
+    weighted_valid_pred = np.zeros_like(val_pred_l[0])
+    for weight, val_pred in zip(weights, val_pred_l):
+        weighted_valid_pred += weight * val_pred
+
+    weighted_valid_pred_score = roc_auc_score(val_label, weighted_valid_pred)
+
+    scores_df = pd.DataFrame(
+        {
+            "Model": list(range(len(val_pred_l))) + ["weighted_average_ensemble"],
+            "AUROC": scores + [weighted_valid_pred_score],
+        }
+    )
+    scores_df.to_csv("scores.csv", index=False)
+
+    pred_binary_l = [0 if value < 0.50 else 1 for value in weighted_test_pred]
+    return np.array(pred_binary_l)
diff --git a/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/feat01.py b/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/feat01.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+
+def feat_eng(
+    X: np.ndarray,
+    y: np.ndarray | None = None,
+    X_fit: np.ndarray | None = None,
+    y_fit: np.ndarray | None = None,
+    param: object | None = None,
+) -> tuple[np.ndarray, np.ndarray | None, object]:
+    """
+    Perform feature engineering on the input data.
+
+    Parameters:
+    - X: np.ndarray
+        The input data to be transformed. A concrete example could be:
+        array([[[[207, 194, 203],
+                ...,
+                [191, 183, 164],
+                [176, 168, 149],
+                [181, 173, 152]]]], dtype=uint8)
+    - y: np.ndarray | None
+        The target data. A concrete example could be:
+        array([1, 0, 1, 0, 1, 1, ..., ])
+    - X_fit: np.ndarray | None
+        Data for fitting the transformation parameters.
+    - y_fit: np.ndarray | None
+        Target data for fitting.
+    - param: object | None
+        Pre-fitted parameters for transformation.
+
+    Returns:
+    - transformed_data: np.ndarray
+        Transformed data.
+    - transformed_target: np.ndarray | None
+        Transformed target data.
+    - fitted_param: object
+        Fitted parameters.
+
+    Notes:
+    - Some preprocessing (e.g., data selection) is based on y.
+
+    Typical usage:
+    .. code-block:: python
+
+        X_transformed, y_transformed, fitted_param = feat_eng(X, y, X, y)
+        X_test_transformed, _, _ = feat_eng(X_test, fitted_param)
+    """
+    # This is an example of identity feature transformation.
+    # We'll not change the content of the data, but we'll demonstrate the typical workflow of feature engineering.
+    if param is None:
+        # Get parameters from the X_fit and y_fit
+        pass
+    # Use the fitted parameters to transform the data X, y
+    return X, y, param
diff --git a/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/load_data.py b/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/load_data.py
@@ -0,0 +1,82 @@
+"""
+Load competition data to uniform format
+"""
+
+import os
+
+import numpy as np
+import pandas as pd
+from PIL import Image
+
+
+def load_test_images(folder):
+    images = []
+    filenames = []
+    for filename in os.listdir(folder):
+        img = Image.open(os.path.join(folder, filename))
+        if img is not None:
+            images.append(np.array(img))
+            filenames.append(filename)
+    return np.array(images), filenames
+
+
+def load_images_and_labels(csv_file, image_folder):
+    images = []
+    labels = []
+    df = pd.read_csv(csv_file)
+    for idx, row in df.iterrows():
+        img = Image.open(os.path.join(image_folder, row["id"]))
+        if img is not None:
+            images.append(np.array(img))
+            labels.append(row["has_cactus"])
+    return np.array(images), np.array(labels)
+
+
+def load_from_raw_data() -> tuple[np.ndarray, np.ndarray, np.ndarray, list[str]]:
+    """
+    load raw data from disk to get data in uniform data
+
+    Return:
+        X: np.array
+
+            a concrete example could be:
+
+            .. code-block:: text
+
+                array([[[[207, 194, 203],
+                        ...,
+                        [191, 183, 164],
+                        [176, 168, 149],
+                        [181, 173, 152]]]], dtype=uint8)
+
+        y: np.array
+
+            a concrete example could be:
+
+            .. code-block:: python
+
+                array([1, 0, 1, 0, 1, 1, ..., ])
+
+        X_test: np.array
+
+            a concrete example is similar to `X`.
+
+        test_ids: the id representing the image. it is used to generate the submission file
+
+            a concrete example could be:
+
+            .. code-block:: python
+
+                ['1398ad045aa57aee5f38e7661e9d49e8.jpg',
+                '0051207eb794887c619341090de84b50.jpg',
+                'a8202dd82c42e252bef921ada7607b6c.jpg',
+                '76c329ff9e3c5036b616f4e88ebba814.jpg',
+                ...]
+    """
+    X, y = load_images_and_labels("/kaggle/input/train.csv", "/kaggle/input/train/")
+
+    test_folder = "/kaggle/input/test/"
+    X_test, test_filenames = load_test_images(test_folder)
+    # Store filenames separately
+    test_ids = [os.path.basename(filename).replace(".tif", "") for filename in test_filenames]
+    return X, y, X_test, test_ids
diff --git a/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/main.py b/rdagent/scenarios/kaggle/tpl_ex/aerial-cactus-identification/main.py
@@ -0,0 +1,37 @@
+from load_data import load_from_raw_data
+from sklearn.model_selection import train_test_split
+
+# Load data
+train_images, train_labels, test_images, test_ids = load_from_raw_data()
+
+
+# feature engineering
+from feat01 import feat_eng
+
+train_images, train_lables, train_param = feat_eng(train_images, train_labels)
+test_images, _, _ = feat_eng(test_images, param=train_param)
+
+
+# (Cross) Validation
+train_images, validation_images, train_labels, validation_labels = train_test_split(
+    train_images, train_labels, test_size=0.1, random_state=42
+)
+
+
+# Model workflow
+from model01 import model_workflow
+
+val_pred, test_pred = model_workflow(train_images, train_labels, validation_images, validation_labels, test_images)
+
+
+# Ensemble
+from ens import ens_and_decision
+
+pred_binary = ens_and_decision([test_pred], [val_pred], validation_labels)
+
+
+# Save
+with open("submission.csv", "w") as csv_file:
+    csv_file.write("id,has_cactus\n")
+    for tid, prediction in zip(test_ids, pred_binary):
+        csv_file.write(f"{tid},{prediction}\n")