ilacunza commited on May 28, 2025

Commit

d513eb6

verified ·

1 Parent(s): 31dc204

Upload folder using huggingface_hub

Browse files

Files changed (23) hide show

.gitattributes +3 -0
config.json +30 -0
generation_config.json +6 -0
model-00001-of-00004.safetensors +3 -0
model-00002-of-00004.safetensors +3 -0
model-00003-of-00004.safetensors +3 -0
model-00004-of-00004.safetensors +3 -0
model.safetensors.index.json +298 -0
special_tokens_map.json +28 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +64 -0
trainer_state.json +0 -0
wandb/debug-internal.log +0 -0
wandb/debug.log +27 -0
wandb/offline-run-20250402_134739-nwi5m2nq/files/config.yaml +660 -0
wandb/offline-run-20250402_134739-nwi5m2nq/files/output.log +3 -0
wandb/offline-run-20250402_134739-nwi5m2nq/files/wandb-metadata.json +932 -0
wandb/offline-run-20250402_134739-nwi5m2nq/files/wandb-summary.json +1 -0
wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug-internal.log +0 -0
wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug.log +27 -0
wandb/offline-run-20250402_134739-nwi5m2nq/run-nwi5m2nq.wandb +3 -0
wandb/offline-run-20250402_134739-nwi5m2nq/run-nwi5m2nq.wandb.synced +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+wandb/offline-run-20250402_134739-nwi5m2nq/files/output.log filter=lfs diff=lfs merge=lfs -text
+wandb/offline-run-20250402_134739-nwi5m2nq/run-nwi5m2nq.wandb filter=lfs diff=lfs merge=lfs -text

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "/gpfs/projects/bsc88/text/models/instruction-tuning/models/base_models_with_special_tokens/iberianLLM_7B_xdogeCPT_34k",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.2",
+  "use_cache": true,
+  "vocab_size": 256000
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.40.2"
+}

model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b38716c54f507511b7d41e7e1471e5593058a6b093c901d633a0df8ac8bbf327
+size 4982973048

model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:232a0aa2eeff088c94f2f783057a5db561c3c027fa4453bffffcadeabe44806e
+size 4995660232

model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5ec5007d9bd490602f9e5a4f217464b363279d05b8e57761927f7bed326cf76
+size 3460482936

model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2219f41f755a621731c33555c7d40b82c8f9a33464a5304fc0f1bc39a7ba0707
+size 2097152128

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,298 @@

+{
+  "metadata": {
+    "total_size": 15536234496
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00004-of-00004.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.norm.weight": "model-00003-of-00004.safetensors"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<unk>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e38708bcb6da1791b245492580b4acc2738ffb0270490b06cd319c5aecafa61
+size 21011862

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3019afe457a8082afa56cb2983f3e7e8ead0655c3c408c13dddcb5984c13e3a
+size 4918006

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 8192,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,27 @@

+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Configure stats pid to 1538306
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Loading settings from /home/bsc/bsc088369/.config/wandb/settings
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Loading settings from /gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1/wandb/settings
+2025-04-02 13:47:39,397 WARNING MainThread:1538306 [wandb_setup.py:_flush():76] Unknown environment variable: WANDB_CONFIG_DIR
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'root_dir': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0', 'init_timeout': '600', 'project': 'instruction-tuning', 'run_name': 'iberianLLM_7B_xdogeCPT_34k_instruct-v1.0_18503701', 'mode': 'offline'}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'fastchat/train/train.py', 'program_abspath': '/gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1/fastchat/train/train.py', 'program': '/gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1/fastchat/train/train.py'}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:_log_setup():520] Logging user logs to /gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0/wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug.log
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:_log_setup():521] Logging internal logs to /gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0/wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug-internal.log
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():560] calling init triggers
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
+config: {}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():610] starting backend
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():614] setting up manager
+2025-04-02 13:47:39,398 INFO    MainThread:1538306 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-04-02 13:47:39,400 INFO    MainThread:1538306 [wandb_init.py:init():622] backend started and connected
+2025-04-02 13:47:39,403 INFO    MainThread:1538306 [wandb_init.py:init():711] updated telemetry
+2025-04-02 13:47:39,500 INFO    MainThread:1538306 [wandb_init.py:init():744] communicating run to backend with 600.0 second timeout
+2025-04-02 13:47:39,504 INFO    MainThread:1538306 [wandb_init.py:init():795] starting run threads in backend
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_console_start():2374] atexit reg
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_redirect():2229] redirect: wrap_raw
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_redirect():2294] Wrapping output streams.
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_redirect():2319] Redirects installed.
+2025-04-02 13:47:40,892 INFO    MainThread:1538306 [wandb_init.py:init():838] run started, returning control to user process
+2025-04-02 13:47:40,893 INFO    MainThread:1538306 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 256000, 'max_position_embeddings': 8192, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/base_models_with_special_tokens/iberianLLM_7B_xdogeCPT_34k', 'transformers_version': '4.40.2', 'head_dim': 128, 'mlp_bias': False, 'model_type': 'llama', 'output_dir': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0/runs/Apr02_13-46-41_as03r3b01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1.0, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 15, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 0.01, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': 'deepspeed_configs/ds_type3_config_autombs.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': 5.0, 'optim_target_modules': None, 'cache_dir': None, 'model_max_length': 8192}
+2025-04-03 05:48:27,018 WARNING MsgRouterThr:1538306 [router.py:message_loop():77] message_loop has been closed

wandb/offline-run-20250402_134739-nwi5m2nq/files/config.yaml ADDED Viewed

	@@ -0,0 +1,660 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.9.16
+    cli_version: 0.17.0
+    framework: huggingface
+    huggingface_version: 4.40.2
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    start_time: 1743594459
+    t:
+      1:
+      - 1
+      - 11
+      - 49
+      - 51
+      - 55
+      - 71
+      - 98
+      - 105
+      2:
+      - 1
+      - 11
+      - 49
+      - 51
+      - 55
+      - 71
+      - 98
+      - 105
+      3:
+      - 4
+      - 7
+      - 23
+      - 37
+      - 42
+      - 62
+      - 66
+      4: 3.9.16
+      5: 0.17.0
+      6: 4.40.2
+      8:
+      - 5
+      9:
+        1: transformers_trainer
+      13: linux-x86_64
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+vocab_size:
+  desc: null
+  value: 256000
+max_position_embeddings:
+  desc: null
+  value: 8192
+hidden_size:
+  desc: null
+  value: 4096
+intermediate_size:
+  desc: null
+  value: 11008
+num_hidden_layers:
+  desc: null
+  value: 32
+num_attention_heads:
+  desc: null
+  value: 32
+num_key_value_heads:
+  desc: null
+  value: 8
+hidden_act:
+  desc: null
+  value: silu
+initializer_range:
+  desc: null
+  value: 0.02
+rms_norm_eps:
+  desc: null
+  value: 1.0e-05
+pretraining_tp:
+  desc: null
+  value: 1
+use_cache:
+  desc: null
+  value: false
+rope_theta:
+  desc: null
+  value: 10000.0
+rope_scaling:
+  desc: null
+  value: null
+attention_bias:
+  desc: null
+  value: false
+attention_dropout:
+  desc: null
+  value: 0.0
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: bfloat16
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: false
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+is_encoder_decoder:
+  desc: null
+  value: false
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 20
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value: null
+architectures:
+  desc: null
+  value:
+  - LlamaForCausalLM
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': LABEL_0
+    '1': LABEL_1
+label2id:
+  desc: null
+  value:
+    LABEL_0: 0
+    LABEL_1: 1
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+bos_token_id:
+  desc: null
+  value: 1
+pad_token_id:
+  desc: null
+  value: null
+eos_token_id:
+  desc: null
+  value: 2
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: null
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: /gpfs/projects/bsc88/text/models/instruction-tuning/models/base_models_with_special_tokens/iberianLLM_7B_xdogeCPT_34k
+transformers_version:
+  desc: null
+  value: 4.40.2
+head_dim:
+  desc: null
+  value: 128
+mlp_bias:
+  desc: null
+  value: false
+model_type:
+  desc: null
+  value: llama
+output_dir:
+  desc: null
+  value: /gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0
+overwrite_output_dir:
+  desc: null
+  value: false
+do_train:
+  desc: null
+  value: false
+do_eval:
+  desc: null
+  value: true
+do_predict:
+  desc: null
+  value: false
+evaluation_strategy:
+  desc: null
+  value: steps
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 1
+per_device_eval_batch_size:
+  desc: null
+  value: 2
+per_gpu_train_batch_size:
+  desc: null
+  value: null
+per_gpu_eval_batch_size:
+  desc: null
+  value: null
+gradient_accumulation_steps:
+  desc: null
+  value: 8
+eval_accumulation_steps:
+  desc: null
+  value: null
+eval_delay:
+  desc: null
+  value: 0
+learning_rate:
+  desc: null
+  value: 1.0e-05
+weight_decay:
+  desc: null
+  value: 0.0
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 1.0
+num_train_epochs:
+  desc: null
+  value: 2.0
+max_steps:
+  desc: null
+  value: -1
+lr_scheduler_type:
+  desc: null
+  value: cosine
+lr_scheduler_kwargs:
+  desc: null
+  value: {}
+warmup_ratio:
+  desc: null
+  value: 0.03
+warmup_steps:
+  desc: null
+  value: 0
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: /gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0/runs/Apr02_13-46-41_as03r3b01
+logging_strategy:
+  desc: null
+  value: steps
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 1.0
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: epoch
+save_steps:
+  desc: null
+  value: 500
+save_total_limit:
+  desc: null
+  value: 15
+save_safetensors:
+  desc: null
+  value: true
+save_on_each_node:
+  desc: null
+  value: false
+save_only_model:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_cpu:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 42
+data_seed:
+  desc: null
+  value: null
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: true
+fp16:
+  desc: null
+  value: false
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: null
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: null
+tpu_num_cores:
+  desc: null
+  value: null
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: []
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: 0.01
+dataloader_num_workers:
+  desc: null
+  value: 0
+dataloader_prefetch_factor:
+  desc: null
+  value: null
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: /gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: null
+load_best_model_at_end:
+  desc: null
+  value: false
+metric_for_best_model:
+  desc: null
+  value: null
+greater_is_better:
+  desc: null
+  value: null
+ignore_data_skip:
+  desc: null
+  value: false
+fsdp:
+  desc: null
+  value: []
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value:
+    min_num_params: 0
+    xla: false
+    xla_fsdp_v2: false
+    xla_fsdp_grad_ckpt: false
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: null
+accelerator_config:
+  desc: null
+  value:
+    split_batches: false
+    dispatch_batches: null
+    even_batches: true
+    use_seedable_sampler: true
+    gradient_accumulation_kwargs: null
+deepspeed:
+  desc: null
+  value: deepspeed_configs/ds_type3_config_autombs.json
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: adamw_torch
+optim_args:
+  desc: null
+  value: null
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: false
+length_column_name:
+  desc: null
+  value: length
+report_to:
+  desc: null
+  value:
+  - wandb
+ddp_find_unused_parameters:
+  desc: null
+  value: null
+ddp_bucket_cap_mb:
+  desc: null
+  value: null
+ddp_broadcast_buffers:
+  desc: null
+  value: null
+dataloader_pin_memory:
+  desc: null
+  value: true
+dataloader_persistent_workers:
+  desc: null
+  value: false
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: false
+resume_from_checkpoint:
+  desc: null
+  value: null
+hub_model_id:
+  desc: null
+  value: null
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+hub_always_push:
+  desc: null
+  value: false
+gradient_checkpointing:
+  desc: null
+  value: true
+gradient_checkpointing_kwargs:
+  desc: null
+  value: null
+include_inputs_for_metrics:
+  desc: null
+  value: false
+eval_do_concat_batches:
+  desc: null
+  value: true
+fp16_backend:
+  desc: null
+  value: auto
+push_to_hub_model_id:
+  desc: null
+  value: null
+push_to_hub_organization:
+  desc: null
+  value: null
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: false
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: null
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: null
+torch_compile_mode:
+  desc: null
+  value: null
+dispatch_batches:
+  desc: null
+  value: null
+split_batches:
+  desc: null
+  value: null
+include_tokens_per_second:
+  desc: null
+  value: false
+include_num_input_tokens_seen:
+  desc: null
+  value: false
+neftune_noise_alpha:
+  desc: null
+  value: 5.0
+optim_target_modules:
+  desc: null
+  value: null
+cache_dir:
+  desc: null
+  value: null
+model_max_length:
+  desc: null
+  value: 8192

wandb/offline-run-20250402_134739-nwi5m2nq/files/output.log ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a248c9fea501a6a130276aafd24f843ac8b443700db305febfa7ef503ec703cd
+size 62978542

wandb/offline-run-20250402_134739-nwi5m2nq/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,932 @@

+{
+    "os": "Linux-5.14.0-284.30.1.el9_2.x86_64-x86_64-with-glibc2.34",
+    "python": "3.9.16",
+    "heartbeatAt": "2025-04-02T11:47:39.624181",
+    "startedAt": "2025-04-02T11:47:39.383806",
+    "docker": null,
+    "cuda": null,
+    "args": [
+        "--deepspeed",
+        "deepspeed_configs/ds_type3_config_autombs.json",
+        "--model_name_or_path",
+        "/gpfs/projects/bsc88/text/models/instruction-tuning/models/base_models_with_special_tokens/iberianLLM_7B_xdogeCPT_34k",
+        "--preprocess_conv_template",
+        "bsc_chat_template_system_0.5",
+        "--neftune_noise_alpha",
+        "5",
+        "--data_paths",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_ca-en_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_ca-es_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_ca_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_en-ca_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_en-es_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_en-pt_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_en_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_es-ca_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_es-en_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_es-eu_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_es-gl_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_es_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_eu-es_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_eu_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_gl-es_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_gl_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_pt-en_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/exp6.1_sampling_pt_v0.1.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/new_alpaca-cleaned_en.json",
+        "/gpfs/projects/bsc88/data/04-instruction-samplings/system-prompt-experiments/exp6.1_iberian-sp/new_open-orca-system-prompt_en.json",
+        "--bf16",
+        "True",
+        "--output_dir",
+        "/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0",
+        "--num_train_epochs",
+        "2",
+        "--per_device_train_batch_size",
+        "1",
+        "--per_device_eval_batch_size",
+        "2",
+        "--gradient_accumulation_steps",
+        "8",
+        "--evaluation_strategy",
+        "steps",
+        "--eval_steps",
+        "0.01",
+        "--save_strategy",
+        "epoch",
+        "--save_total_limit",
+        "15",
+        "--learning_rate",
+        "1e-5",
+        "--weight_decay",
+        "0.",
+        "--warmup_ratio",
+        "0.03",
+        "--lr_scheduler_type",
+        "cosine",
+        "--logging_steps",
+        "1",
+        "--model_max_length",
+        "8192",
+        "--gradient_checkpointing",
+        "True",
+        "--lazy_preprocess",
+        "True",
+        "--add_chat_template",
+        "True",
+        "--local_rank",
+        "0",
+        "--report_to",
+        "wandb"
+    ],
+    "state": "running",
+    "program": "/gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1/fastchat/train/train.py",
+    "codePathLocal": "fastchat/train/train.py",
+    "codePath": "fastchat/train/train.py",
+    "git": {
+        "remote": "https://github.com/langtech-bsc/it-chat-v1.git",
+        "commit": "019a772ed26e859b133058d581696f57f0fa613e"
+    },
+    "email": "jaumepratscris@gmail.com",
+    "root": "/gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1",
+    "host": "as03r3b01",
+    "username": "bsc088369",
+    "executable": "/gpfs/projects/bsc88/text/environments/instruction-tuning_inference_mn5_python3.9_20241107/bin/python",
+    "cpu_count": 80,
+    "cpu_count_logical": 160,
+    "cpu_freq": {
+        "current": 3699.94764375,
+        "min": 800.0,
+        "max": 3700.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 3133.761,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3699.997,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3699.997,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3697.752,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.001,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        },
+        {
+            "current": 3700.0,
+            "min": 800.0,
+            "max": 3700.0
+        }
+    ],
+    "disk": {
+        "/": {
+            "total": 65243.953125,
+            "used": 15974.58984375
+        }
+    },
+    "gpu": "NVIDIA H100",
+    "gpu_count": 4,
+    "gpu_devices": [
+        {
+            "name": "NVIDIA H100",
+            "memory_total": 68416438272
+        },
+        {
+            "name": "NVIDIA H100",
+            "memory_total": 68416438272
+        },
+        {
+            "name": "NVIDIA H100",
+            "memory_total": 68416438272
+        },
+        {
+            "name": "NVIDIA H100",
+            "memory_total": 68416438272
+        }
+    ],
+    "memory": {
+        "total": 503.48146057128906
+    }
+}

wandb/offline-run-20250402_134739-nwi5m2nq/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"train/loss": 0.7396, "train/grad_norm": 1.615809772493014, "train/learning_rate": 0.0, "train/epoch": 1.9995261786306564, "train/global_step": 2110, "_timestamp": 1743652092.048609, "_runtime": 57632.64785003662, "_step": 2205, "eval/loss": 1.0688345432281494, "eval/runtime": 63.103, "eval/samples_per_second": 43.231, "eval/steps_per_second": 0.681, "train_runtime": 57633.4394, "train_samples_per_second": 9.374, "train_steps_per_second": 0.037, "total_flos": 7068238416445440.0, "train_loss": 0.9307103937271082, "_wandb": {"runtime": 57646}}

wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug.log ADDED Viewed

	@@ -0,0 +1,27 @@

+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Current SDK version is 0.17.0
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Configure stats pid to 1538306
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Loading settings from /home/bsc/bsc088369/.config/wandb/settings
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Loading settings from /gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1/wandb/settings
+2025-04-02 13:47:39,397 WARNING MainThread:1538306 [wandb_setup.py:_flush():76] Unknown environment variable: WANDB_CONFIG_DIR
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'root_dir': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0', 'init_timeout': '600', 'project': 'instruction-tuning', 'run_name': 'iberianLLM_7B_xdogeCPT_34k_instruct-v1.0_18503701', 'mode': 'offline'}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'fastchat/train/train.py', 'program_abspath': '/gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1/fastchat/train/train.py', 'program': '/gpfs/projects/bsc88/text/models/instruction-tuning/it-chat-v1/fastchat/train/train.py'}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:_log_setup():520] Logging user logs to /gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0/wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug.log
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:_log_setup():521] Logging internal logs to /gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0/wandb/offline-run-20250402_134739-nwi5m2nq/logs/debug-internal.log
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():560] calling init triggers
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():567] wandb.init called with sweep_config: {}
+config: {}
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():610] starting backend
+2025-04-02 13:47:39,397 INFO    MainThread:1538306 [wandb_init.py:init():614] setting up manager
+2025-04-02 13:47:39,398 INFO    MainThread:1538306 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-04-02 13:47:39,400 INFO    MainThread:1538306 [wandb_init.py:init():622] backend started and connected
+2025-04-02 13:47:39,403 INFO    MainThread:1538306 [wandb_init.py:init():711] updated telemetry
+2025-04-02 13:47:39,500 INFO    MainThread:1538306 [wandb_init.py:init():744] communicating run to backend with 600.0 second timeout
+2025-04-02 13:47:39,504 INFO    MainThread:1538306 [wandb_init.py:init():795] starting run threads in backend
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_console_start():2374] atexit reg
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_redirect():2229] redirect: wrap_raw
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_redirect():2294] Wrapping output streams.
+2025-04-02 13:47:40,849 INFO    MainThread:1538306 [wandb_run.py:_redirect():2319] Redirects installed.
+2025-04-02 13:47:40,892 INFO    MainThread:1538306 [wandb_init.py:init():838] run started, returning control to user process
+2025-04-02 13:47:40,893 INFO    MainThread:1538306 [wandb_run.py:_config_callback():1376] config_cb None None {'vocab_size': 256000, 'max_position_embeddings': 8192, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/base_models_with_special_tokens/iberianLLM_7B_xdogeCPT_34k', 'transformers_version': '4.40.2', 'head_dim': 128, 'mlp_bias': False, 'model_type': 'llama', 'output_dir': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 2, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 2.0, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0/runs/Apr02_13-46-41_as03r3b01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 1.0, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': 15, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 0.01, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/gpfs/projects/bsc88/text/models/instruction-tuning/models/checkpoints/iberoLLM_april25/iberianLLM_7B_xdogeCPT_34k_instruct-v1.0', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}, 'deepspeed': 'deepspeed_configs/ds_type3_config_autombs.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': 5.0, 'optim_target_modules': None, 'cache_dir': None, 'model_max_length': 8192}
+2025-04-03 05:48:27,018 WARNING MsgRouterThr:1538306 [router.py:message_loop():77] message_loop has been closed

wandb/offline-run-20250402_134739-nwi5m2nq/run-nwi5m2nq.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:477cbc8e46ae66274b7508f2abc3b45976e10b9cbe1b5c0468a9a6b966408b63
+size 115718034

wandb/offline-run-20250402_134739-nwi5m2nq/run-nwi5m2nq.wandb.synced ADDED Viewed

File without changes