Skip to content

Commit

Permalink
⚗️ Style Transfer: score hp changes
Browse files Browse the repository at this point in the history
  • Loading branch information
simonmeoni committed Oct 18, 2024
1 parent eebebb0 commit 754acb2
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 14 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export CUDA_VISIBLE_DEVICES=1
export CUDA_VISIBLE_DEVICES=$1
python style_transfer/run_rb_gen.py model.name=meta-llama/Llama-3.2-3B-Instruct \
model.peft_config.target_modules='["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]' \
dataset.name=bio-datasets/mimic_style_transfer \
Expand All @@ -10,5 +10,6 @@ python style_transfer/run_rb_gen.py model.name=meta-llama/Llama-3.2-3B-Instruct
dataset.sft_dataset=null \
sft.training_args.eval_steps=30 \
score.train.train_size=0.3 \
dpo.training_args.num_train_epochs=80 \
dpo.percentile=70
dpo.training_args.num_train_epochs=40 \
dpo.percentile=70 \
score.batch_size=64
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export CUDA_VISIBLE_DEVICES=1
export CUDA_VISIBLE_DEVICES=$1
python style_transfer/run_rb_gen.py model.name=meta-llama/Llama-3.2-3B-Instruct \
model.peft_config.target_modules='["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]' \
dataset.name=bio-datasets/mimic_style_transfer \
Expand All @@ -10,5 +10,6 @@ python style_transfer/run_rb_gen.py model.name=meta-llama/Llama-3.2-3B-Instruct
sft.training_args.eval_steps=30 \
score.train.train_size=0.3 \
dataset.sft_dataset.size=300 \
dpo.training_args.num_train_epochs=80 \
dpo.percentile=70
dpo.training_args.num_train_epochs=40 \
dpo.percentile=70 \
score.batch_size=64
7 changes: 4 additions & 3 deletions lib/style-transfer/bash/experiment/rb_gen/az/llama3.2-3b.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export CUDA_VISIBLE_DEVICES=0
export CUDA_VISIBLE_DEVICES=$1
python style_transfer/run_rb_gen.py model.name=meta-llama/Llama-3.2-3B-Instruct \
model.peft_config.target_modules='["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]' \
dataset.name=bio-datasets/mimic_style_transfer \
Expand All @@ -10,5 +10,6 @@ python style_transfer/run_rb_gen.py model.name=meta-llama/Llama-3.2-3B-Instruct
dataset.sft_dataset=null \
sft.training_args.eval_steps=30 \
score.train.train_size=0.3 \
dpo.training_args.num_train_epochs=80 \
dpo.percentile=70
dpo.training_args.num_train_epochs=40 \
dpo.percentile=70 \
score.batch_size=64
2 changes: 1 addition & 1 deletion lib/style-transfer/bash/experiment/rb_gen/az/test-azure.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
export CUDA_VISIBLE_DEVICES=0
export CUDA_VISIBLE_DEVICES=$1
python style_transfer/run_rb_gen.py
8 changes: 4 additions & 4 deletions lib/style-transfer/configs/rb_gen/score/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ model:
model_name_or_path: "sentence-transformers/all-mpnet-base-v2"

train:
warmup_steps: 50
use_ground_truth: false
epochs: 1
train_size: 0.5
warmup_steps: 10
use_ground_truth: true
epochs: 5
train_size: 0.3
loss:
_target_: sentence_transformers.losses.ContrastiveTensionLoss
_partial_: true

0 comments on commit 754acb2

Please sign in to comment.