llama2-13Bを日本語マルチターンデータセットで訓練したモデルを公開しました
MeZOを使ってllama2-13Bを日本語マルチターンデータセットで訓練しました
データセットを新しいものに対応させるためにtasks.pyとtemplate.pyとmezo.shに変更を加えています。
class ChatDataset(Dataset):
metric_name = "f1"
generation = True
def __init__(self, subtask=None, **kwargs) -> None:
self.load_dataset()
def load_dataset(self):
dataset = load_dataset("shi3z/Japanese_Wikipedia_Conversation")
train_examples = dataset["train"]["conversations"][100:]
valid_examples = dataset["train"]["conversations"][:100]
#for tr in dataset['train']:
# print(tr)
train_samples = [self.build_sample(example, idx) for idx, example in enumerate(train_examples)]
valid_samples = [self.build_sample(example, idx) for idx, example in enumerate(valid_examples)]
self.samples = {"train": train_samples, "valid": valid_samples}
# for generative tasks, candidates are []
def build_sample(self, example, idx):
s=""
for line in example[:-1]:
s+=line["from"]+": "+line["value"]+"\n"
lastline= example[-1]
s+=lastline["from"]+": "
return Sample(
id=idx,
data={"text":s},
candidates=None,
correct_candidate=[lastline["value"]]
)
def get_template(self, template_version=0):
return {0: ChatTemplate}[template_version]()
class DROPTemplate(Template):
def encode(self, sample):
question = sample.data['question'].strip()
# title = sample.data['title']
context = sample.data['context']
answer = sample.data['answers'][0] # there are multiple answers. for the prompt we only take the first one
return f"Passage: {context}\nQuestion: {question}\nAnswer:"
def verbalize(self, sample, candidate):
question = sample.data['question'].strip()
# title = sample.data['title']
context = sample.data['context']
answer = sample.data['answers'][0] # there are multiple answers. for the prompt we only take the first one
return f"Passage: {context}\nQuestion: {question}\nAnswer: {answer}\n"
def encode_sfc(self, sample):
raise NotImplementedError
def verbalize_sfc(self, sample, candidate):
raise NotImplementedError
class ChatTemplate(Template):
def encode(self, sample):
return f"{sample}"
def verbalize(self, sample, candidate):
return f"{sample}{candidate}\n"
def encode_sfc(self, sample):
raise NotImplementedError
def verbalize_sfc(self, sample, candidate):
raise NotImplementedError
MODEL=${MODEL:-facebook/opt-1.3b}
MODEL_NAME=(${MODEL//\// })
MODEL_NAME="${MODEL_NAME[-1]}"
BS=${BS:-16}
LR=${LR:-1e-5}
EPS=${EPS:-1e-3}
SEED=${SEED:-0}
TRAIN=${TRAIN:-1000}
DEV=${DEV:-500}
EVAL=${EVAL:-1000}
STEPS=${STEPS:-20000}
EVAL_STEPS=${EVAL_STEPS:-4000}
MODE=${MODE:-ft}
EXTRA_ARGS=""
if [ "$MODE" == "prefix" ]; then
EXTRA_ARGS="--prefix_tuning --num_prefix 5 --no_reparam --prefix_init_by_real_act"
elif [ "$MODE" == "lora" ]; then
EXTRA_ARGS="--lora"
fi
TAG=mezo-$MODE-$STEPS-$BS-$LR-$EPS-$SEED
TASK_ARGS=""
case $TASK in
# For Copa, ReCoRD, SQuAD, DROP, we set --train_as_classification False; for others, set this flag to True
CB) # It has <1000 training examples. Only use 100 for dev
DEV=100
;;
Copa) # It has <1000 training examples. Only use 100 for dev
DEV=100
TASK_ARGS="--train_as_classification False"
;;
ReCoRD)
TASK_ARGS="--train_as_classification False"
;;
DROP)
TASK_ARGS="--train_as_classification False"
;;
SQuAD)
TASK_ARGS="--train_as_classification False"
;;
Chat)
TASK_ARGS="--train_as_classification False"
;;
esac
echo $TAG
echo "BS: $BS"
echo "LR: $LR"
echo "EPS: $EPS"
echo "SEED: $SEED"
echo "TRAIN/EVAL STEPS: $STEPS/$EVAL_STEPS"
echo "MODE: $MODE"
echo "Extra args: $EXTRA_ARGS $TASK_ARGS"
python run.py \
--model_name $MODEL \
--task_name $TASK \
--output_dir result/$TASK-${MODEL_NAME}-$TAG --tag $TAG --train_set_seed $SEED --num_train $TRAIN --num_dev $DEV --num_eval $EVAL --logging_steps 10 \
--max_steps $STEPS \
--trainer zo --load_float16 \
--learning_rate $LR --zo_eps $EPS --per_device_train_batch_size $BS --lr_scheduler_type "constant" \
--load_best_model_at_end --evaluation_strategy steps --save_strategy steps --save_total_limit 1 \
--eval_steps $EVAL_STEPS --save_steps $EVAL_STEPS \
--train_as_classification \
$EXTRA_ARGS \
$TASK_ARGS \
"$@"
学習に使ったコマンドライン
$ MODEL=meta-llama/Llama-2-13b-chat-hf TASK=Chat MODE=ft LR=1e-3 EPS=1e-1 nohup bash mezo.sh&
学習にかかった時間は約30時間でした。
出力結果はhuggingfaceに置いてあります。それなりにうまく学習できた模様