-
Notifications
You must be signed in to change notification settings - Fork 6
/
train_and_evaluate_mcan_img_only.sh
executable file
·105 lines (76 loc) · 2.88 KB
/
train_and_evaluate_mcan_img_only.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env bash
# SA: be careful about the lr in the config
source activate visdialch
# Common paths
export CURRENT_DIR=${PWD}
export PARENT_DIR="$(dirname "$CURRENT_DIR")"
cd $PARENT_DIR
export CODE_DIR=$PARENT_DIR
export CONFIG_DIR=$CODE_DIR/configs
export PROJECT_DIR="$(dirname "$PARENT_DIR")"
## SA: todo if config also from
export CONFIG_YML=$CONFIG_DIR/mcan_img_only.yml
export DATA_DIR=$PROJECT_DIR/data
export MODEL_DIR=$PROJECT_DIR/models/hwu_models/
read -p "Enter the GPU id (as 0/1/2): " GPU_ID
read -p "Enter the model name: " MODEL_NAME
export MODEL_NAME=${MODEL_NAME:-mcan_img_only}
export SAVE_MODEL_DIR=$MODEL_DIR/$MODEL_NAME
mkdir -p $SAVE_MODEL_DIR
echo "Model saved in: " $SAVE_MODEL_DIR
GPU_ID=${GPU_ID:-"0 1 2 3"}
echo "Running on gpus : " $GPU_ID
read -p "Is train: (1 - Yes, 0 - no): " IS_TRAIN
IS_TRAIN=${IS_TRAIN:-0}
if [ $IS_TRAIN == 1 ]; then
echo "Training"
CURRENT_DATE=$(date)
CURRENT_TIME=$(date +"%T")
echo "Current time : $CURRENT_DATE"
export TRAIN_LOG_FILE=$SAVE_MODEL_DIR/train_logs_${MODEL_NAME}.txt
read -p "Is finetune only: (1 - Yes, 0 - no): " IS_FINETUNE
IS_FINETUNE=${IS_FINETUNE:-1}
if [ $IS_FINETUNE == 1 ]; then
export PHASE="finetuning"
else
export PHASE="both"
fi
## SA: todo checkpointing for all
read -p "Enter the checkpoint finetune number: " CHECKPOINT_FINETUNE_NUM
CHECKPOINT_FINETUNE_NUM=${CHECKPOINT_FINETUNE_NUM:-19}
export CHECKPOINT_FINETUNE_PATH=$SAVE_MODEL_DIR/checkpoint_${CHECKPOINT_FINETUNE_NUM}.pth
echo "Training on local"
python train.py \
--train-json $DATA_DIR/visdial_1.0_train.json \
--val-json $DATA_DIR/visdial_1.0_val.json \
--val-dense-json $DATA_DIR/visdial_1.0_val_dense_annotations.json \
--train-dense-json $DATA_DIR/visdial_1.0_train_dense_annotations.json \
--save-dirpath $SAVE_MODEL_DIR \
--config-yml $CONFIG_YML \
--validate \
--load_finetune_pthpath $CHECKPOINT_FINETUNE_PATH \
--phase $PHASE \
--data_dir $DATA_DIR \
--gpu-ids $GPU_ID >> $TRAIN_LOG_FILE # provide more ids for multi-GPU execution other args...
fi
read -p "Enter the test checkpoint number: " CHECKPOINT_TEST_NUM
CHECKPOINT_TEST_NUM=${CHECKPOINT_TEST_NUM:-best_ndcg}
export CHECKPOINT_TEST_PATH=$SAVE_MODEL_DIR/checkpoint_${CHECKPOINT_TEST_NUM}.pth
read -p "Enter split type as (val or test): " SPLIT
SPLIT=${SPLIT:-"test"}
export RANKS_PATH=$SAVE_MODEL_DIR/ranks_${SPLIT}_${CHECKPOINT_TEST_NUM}.json
export LOG_PATH=$SAVE_MODEL_DIR/evaluate_${SPLIT}_${CHECKPOINT_TEST_NUM}.log
CURRENT_DATE=$(date)
CURRENT_TIME=$(date +"%T")
echo "Current time : $CURRENT_TIME $CURRENT_DATE"
python evaluate.py \
--val-json $DATA_DIR/visdial_1.0_val.json \
--val-dense-json $DATA_DIR/visdial_1.0_val_dense_annotations.json \
--test-json $DATA_DIR/visdial_1.0_test.json \
--config-yml $CONFIG_YML \
--load-pthpath $CHECKPOINT_TEST_PATH \
--split $SPLIT \
--save-ranks-path $RANKS_PATH \
--save-dirpath $SAVE_MODEL_DIR \
--data_dir $DATA_DIR \
--gpu-ids $GPU_ID >> $LOG_PATH