DriCo-adapters / args.json
anonymous-24421's picture
Upload DriCo LoRA adapters
8a0e578 verified
{
"data_dir": "dataset/actor/",
"only_actor": "all",
"base_model": "Qwen/Qwen2.5-7B-Instruct",
"output_dir": "out/",
"gamma": 0.99,
"tau": 0.005,
"beta_dpo": 1,
"beta_q": 1.0,
"beta_sft": 0.5,
"num_candidates": 3,
"k_next": 3,
"actor_gen_max_new": 30,
"next_gen_max_new": 30,
"gen_temperature": 1.5,
"beta_format_penalty": 1.0,
"format_penalty_value": -4.0,
"total_steps": 200000,
"sft_warmup_steps": 2000,
"q_pretrain_steps": 2000,
"valid_every_steps": 2000,
"valid_n_batches": 20,
"eval_every_steps": 245645,
"save_every_steps": 2000,
"batch_size": 8,
"grad_accum_steps": 1,
"lr": 3e-05,
"lr_q": 0.0003,
"weight_decay": 0.01,
"warmup_ratio": 0.001,
"max_grad_norm": 1.0,
"max_length": 2048,
"use_chat_template": false,
"valid_ratio": 0.0,
"log_every": 5,
"color_log": true,
"seed": 42,
"lora_r": 16,
"lora_alpha": 32,
"lora_r_q": 16,
"lora_alpha_q": 32,
"lora_dropout": 0.05,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"fp16": false,
"bf16": true,
"gradient_checkpointing": false,
"layout": "new_env",
"eval_verbose": false,
"gpu_id": 0,
"test_prompt_dir": "prompts/test/",
"wandb": true,
"wandb_project": "overcooked-qdpo",
"wandb_run_name": "3phase-sft-q-dpo"
}