Skip to content

Commit e5e873e

Browse files
committed
added usage recipe
1 parent ef06d44 commit e5e873e

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
defaults: ../../sft.yaml
2+
sft:
3+
max_num_steps: 350
4+
val_period: 20
5+
val_global_batch_size: 128
6+
val_micro_batch_size: 2
7+
checkpointing:
8+
checkpoint_dir: results/sft-tmblog-llama3.1-8b-lora-megatron
9+
save_period: 20
10+
policy:
11+
model_name: meta-llama/Llama-3.1-8B
12+
tokenizer:
13+
name: meta-llama/Llama-3.1-8B-Instruct
14+
chat_template: default
15+
dtensor_cfg:
16+
enabled: false
17+
megatron_cfg:
18+
enabled: true
19+
lora_cfg:
20+
enabled: true
21+
dim: 128
22+
alpha: 128
23+
optimizer:
24+
kwargs:
25+
lr: 2.0e-05
26+
weight_decay: 0.01
27+
eps: 1.0e-08
28+
train_global_batch_size: 128
29+
max_total_sequence_length: 4096
30+
make_sequence_length_divisible_by: 2
31+
data:
32+
dataset_name: tulu3_sft_mixture
33+
add_generation_prompt: true
34+
seed: 42
35+
logger:
36+
log_dir: logs/sft-tmblog-llama3.1-8b
37+
tensorboard_enabled: false
38+
wandb:
39+
project: nemo-rl
40+
name: sft-tmblog-llama3.1-8b
41+
tensorboard:
42+
log_dir: tb_logs-sft-dev-tulu3
43+
cluster:
44+
gpus_per_node: 8

0 commit comments

Comments
 (0)