Upload folder using huggingface_hub
Browse files- config.json +0 -0
- model-00001-of-00002.safetensors +2 -2
- model-00002-of-00002.safetensors +2 -2
- model.safetensors.index.json +25 -73
- recipe.yaml +1 -1
config.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d35795482399c22b20165089b381739017b132adeb3208e74f9808da6700ae61
|
| 3 |
+
size 5000680840
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:889d27f737530f2c0b5ab3187f74e33024baa5d25a6362cc15e69f0428c6bfe9
|
| 3 |
+
size 3311230472
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00002-of-00002.safetensors",
|
|
@@ -556,9 +556,7 @@
|
|
| 556 |
"model.layers.0.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 557 |
"model.layers.0.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 558 |
"model.layers.0.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 559 |
-
"model.layers.0.mlp.shared_expert_gate.
|
| 560 |
-
"model.layers.0.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 561 |
-
"model.layers.0.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 562 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 563 |
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 564 |
"model.layers.0.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -1126,9 +1124,7 @@
|
|
| 1126 |
"model.layers.1.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 1127 |
"model.layers.1.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 1128 |
"model.layers.1.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 1129 |
-
"model.layers.1.mlp.shared_expert_gate.
|
| 1130 |
-
"model.layers.1.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 1131 |
-
"model.layers.1.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 1132 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 1133 |
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 1134 |
"model.layers.1.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -1696,9 +1692,7 @@
|
|
| 1696 |
"model.layers.10.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 1697 |
"model.layers.10.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 1698 |
"model.layers.10.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 1699 |
-
"model.layers.10.mlp.shared_expert_gate.
|
| 1700 |
-
"model.layers.10.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 1701 |
-
"model.layers.10.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 1702 |
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 1703 |
"model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 1704 |
"model.layers.10.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -2266,9 +2260,7 @@
|
|
| 2266 |
"model.layers.11.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 2267 |
"model.layers.11.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 2268 |
"model.layers.11.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 2269 |
-
"model.layers.11.mlp.shared_expert_gate.
|
| 2270 |
-
"model.layers.11.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 2271 |
-
"model.layers.11.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 2272 |
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 2273 |
"model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 2274 |
"model.layers.11.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -2836,9 +2828,7 @@
|
|
| 2836 |
"model.layers.12.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 2837 |
"model.layers.12.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 2838 |
"model.layers.12.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 2839 |
-
"model.layers.12.mlp.shared_expert_gate.
|
| 2840 |
-
"model.layers.12.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 2841 |
-
"model.layers.12.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 2842 |
"model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 2843 |
"model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 2844 |
"model.layers.12.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -3406,9 +3396,7 @@
|
|
| 3406 |
"model.layers.13.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 3407 |
"model.layers.13.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 3408 |
"model.layers.13.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 3409 |
-
"model.layers.13.mlp.shared_expert_gate.
|
| 3410 |
-
"model.layers.13.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 3411 |
-
"model.layers.13.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 3412 |
"model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 3413 |
"model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 3414 |
"model.layers.13.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -3976,9 +3964,7 @@
|
|
| 3976 |
"model.layers.14.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 3977 |
"model.layers.14.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 3978 |
"model.layers.14.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 3979 |
-
"model.layers.14.mlp.shared_expert_gate.
|
| 3980 |
-
"model.layers.14.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 3981 |
-
"model.layers.14.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 3982 |
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 3983 |
"model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 3984 |
"model.layers.14.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -4546,9 +4532,7 @@
|
|
| 4546 |
"model.layers.15.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 4547 |
"model.layers.15.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 4548 |
"model.layers.15.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 4549 |
-
"model.layers.15.mlp.shared_expert_gate.
|
| 4550 |
-
"model.layers.15.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 4551 |
-
"model.layers.15.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 4552 |
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 4553 |
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 4554 |
"model.layers.15.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -5116,9 +5100,7 @@
|
|
| 5116 |
"model.layers.16.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 5117 |
"model.layers.16.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 5118 |
"model.layers.16.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 5119 |
-
"model.layers.16.mlp.shared_expert_gate.
|
| 5120 |
-
"model.layers.16.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 5121 |
-
"model.layers.16.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 5122 |
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 5123 |
"model.layers.16.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 5124 |
"model.layers.16.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -5686,9 +5668,7 @@
|
|
| 5686 |
"model.layers.17.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 5687 |
"model.layers.17.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 5688 |
"model.layers.17.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 5689 |
-
"model.layers.17.mlp.shared_expert_gate.
|
| 5690 |
-
"model.layers.17.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 5691 |
-
"model.layers.17.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 5692 |
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 5693 |
"model.layers.17.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 5694 |
"model.layers.17.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -6256,9 +6236,7 @@
|
|
| 6256 |
"model.layers.18.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 6257 |
"model.layers.18.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 6258 |
"model.layers.18.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 6259 |
-
"model.layers.18.mlp.shared_expert_gate.
|
| 6260 |
-
"model.layers.18.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 6261 |
-
"model.layers.18.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 6262 |
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 6263 |
"model.layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 6264 |
"model.layers.18.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -6826,9 +6804,7 @@
|
|
| 6826 |
"model.layers.19.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 6827 |
"model.layers.19.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 6828 |
"model.layers.19.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 6829 |
-
"model.layers.19.mlp.shared_expert_gate.
|
| 6830 |
-
"model.layers.19.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 6831 |
-
"model.layers.19.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 6832 |
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 6833 |
"model.layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 6834 |
"model.layers.19.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -7396,9 +7372,7 @@
|
|
| 7396 |
"model.layers.2.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 7397 |
"model.layers.2.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 7398 |
"model.layers.2.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 7399 |
-
"model.layers.2.mlp.shared_expert_gate.
|
| 7400 |
-
"model.layers.2.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 7401 |
-
"model.layers.2.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 7402 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 7403 |
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 7404 |
"model.layers.2.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -7966,9 +7940,7 @@
|
|
| 7966 |
"model.layers.20.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 7967 |
"model.layers.20.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 7968 |
"model.layers.20.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 7969 |
-
"model.layers.20.mlp.shared_expert_gate.
|
| 7970 |
-
"model.layers.20.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 7971 |
-
"model.layers.20.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 7972 |
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 7973 |
"model.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 7974 |
"model.layers.20.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -8536,9 +8508,7 @@
|
|
| 8536 |
"model.layers.21.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 8537 |
"model.layers.21.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 8538 |
"model.layers.21.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 8539 |
-
"model.layers.21.mlp.shared_expert_gate.
|
| 8540 |
-
"model.layers.21.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 8541 |
-
"model.layers.21.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 8542 |
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 8543 |
"model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 8544 |
"model.layers.21.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -9106,9 +9076,7 @@
|
|
| 9106 |
"model.layers.22.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 9107 |
"model.layers.22.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 9108 |
"model.layers.22.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 9109 |
-
"model.layers.22.mlp.shared_expert_gate.
|
| 9110 |
-
"model.layers.22.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 9111 |
-
"model.layers.22.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 9112 |
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 9113 |
"model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 9114 |
"model.layers.22.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -9676,9 +9644,7 @@
|
|
| 9676 |
"model.layers.23.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 9677 |
"model.layers.23.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 9678 |
"model.layers.23.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 9679 |
-
"model.layers.23.mlp.shared_expert_gate.
|
| 9680 |
-
"model.layers.23.mlp.shared_expert_gate.weight_scale": "model-00002-of-00002.safetensors",
|
| 9681 |
-
"model.layers.23.mlp.shared_expert_gate.weight_shape": "model-00002-of-00002.safetensors",
|
| 9682 |
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 9683 |
"model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 9684 |
"model.layers.23.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
@@ -10246,9 +10212,7 @@
|
|
| 10246 |
"model.layers.3.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 10247 |
"model.layers.3.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 10248 |
"model.layers.3.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 10249 |
-
"model.layers.3.mlp.shared_expert_gate.
|
| 10250 |
-
"model.layers.3.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 10251 |
-
"model.layers.3.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 10252 |
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 10253 |
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 10254 |
"model.layers.3.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -10816,9 +10780,7 @@
|
|
| 10816 |
"model.layers.4.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 10817 |
"model.layers.4.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 10818 |
"model.layers.4.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 10819 |
-
"model.layers.4.mlp.shared_expert_gate.
|
| 10820 |
-
"model.layers.4.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 10821 |
-
"model.layers.4.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 10822 |
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 10823 |
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 10824 |
"model.layers.4.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -11386,9 +11348,7 @@
|
|
| 11386 |
"model.layers.5.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 11387 |
"model.layers.5.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 11388 |
"model.layers.5.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 11389 |
-
"model.layers.5.mlp.shared_expert_gate.
|
| 11390 |
-
"model.layers.5.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 11391 |
-
"model.layers.5.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 11392 |
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 11393 |
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 11394 |
"model.layers.5.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -11956,9 +11916,7 @@
|
|
| 11956 |
"model.layers.6.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 11957 |
"model.layers.6.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 11958 |
"model.layers.6.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 11959 |
-
"model.layers.6.mlp.shared_expert_gate.
|
| 11960 |
-
"model.layers.6.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 11961 |
-
"model.layers.6.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 11962 |
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 11963 |
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 11964 |
"model.layers.6.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -12526,9 +12484,7 @@
|
|
| 12526 |
"model.layers.7.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 12527 |
"model.layers.7.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 12528 |
"model.layers.7.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 12529 |
-
"model.layers.7.mlp.shared_expert_gate.
|
| 12530 |
-
"model.layers.7.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 12531 |
-
"model.layers.7.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 12532 |
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 12533 |
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 12534 |
"model.layers.7.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -13096,9 +13052,7 @@
|
|
| 13096 |
"model.layers.8.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 13097 |
"model.layers.8.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 13098 |
"model.layers.8.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 13099 |
-
"model.layers.8.mlp.shared_expert_gate.
|
| 13100 |
-
"model.layers.8.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 13101 |
-
"model.layers.8.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 13102 |
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 13103 |
"model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 13104 |
"model.layers.8.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
@@ -13666,9 +13620,7 @@
|
|
| 13666 |
"model.layers.9.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 13667 |
"model.layers.9.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 13668 |
"model.layers.9.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 13669 |
-
"model.layers.9.mlp.shared_expert_gate.
|
| 13670 |
-
"model.layers.9.mlp.shared_expert_gate.weight_scale": "model-00001-of-00002.safetensors",
|
| 13671 |
-
"model.layers.9.mlp.shared_expert_gate.weight_shape": "model-00001-of-00002.safetensors",
|
| 13672 |
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 13673 |
"model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 13674 |
"model.layers.9.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 8310237312
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00002-of-00002.safetensors",
|
|
|
|
| 556 |
"model.layers.0.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 557 |
"model.layers.0.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 558 |
"model.layers.0.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 559 |
+
"model.layers.0.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 560 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 561 |
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 562 |
"model.layers.0.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 1124 |
"model.layers.1.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 1125 |
"model.layers.1.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 1126 |
"model.layers.1.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 1127 |
+
"model.layers.1.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 1128 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 1129 |
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 1130 |
"model.layers.1.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 1692 |
"model.layers.10.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 1693 |
"model.layers.10.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 1694 |
"model.layers.10.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 1695 |
+
"model.layers.10.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 1696 |
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 1697 |
"model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 1698 |
"model.layers.10.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 2260 |
"model.layers.11.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 2261 |
"model.layers.11.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 2262 |
"model.layers.11.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 2263 |
+
"model.layers.11.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 2264 |
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 2265 |
"model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 2266 |
"model.layers.11.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 2828 |
"model.layers.12.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 2829 |
"model.layers.12.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 2830 |
"model.layers.12.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 2831 |
+
"model.layers.12.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 2832 |
"model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 2833 |
"model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 2834 |
"model.layers.12.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 3396 |
"model.layers.13.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 3397 |
"model.layers.13.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 3398 |
"model.layers.13.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 3399 |
+
"model.layers.13.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 3400 |
"model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 3401 |
"model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 3402 |
"model.layers.13.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 3964 |
"model.layers.14.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 3965 |
"model.layers.14.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 3966 |
"model.layers.14.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 3967 |
+
"model.layers.14.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 3968 |
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 3969 |
"model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 3970 |
"model.layers.14.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 4532 |
"model.layers.15.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 4533 |
"model.layers.15.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 4534 |
"model.layers.15.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 4535 |
+
"model.layers.15.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 4536 |
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 4537 |
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 4538 |
"model.layers.15.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 5100 |
"model.layers.16.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 5101 |
"model.layers.16.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 5102 |
"model.layers.16.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 5103 |
+
"model.layers.16.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 5104 |
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 5105 |
"model.layers.16.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 5106 |
"model.layers.16.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 5668 |
"model.layers.17.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 5669 |
"model.layers.17.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 5670 |
"model.layers.17.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 5671 |
+
"model.layers.17.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 5672 |
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 5673 |
"model.layers.17.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 5674 |
"model.layers.17.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 6236 |
"model.layers.18.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 6237 |
"model.layers.18.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 6238 |
"model.layers.18.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 6239 |
+
"model.layers.18.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 6240 |
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 6241 |
"model.layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 6242 |
"model.layers.18.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 6804 |
"model.layers.19.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 6805 |
"model.layers.19.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 6806 |
"model.layers.19.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 6807 |
+
"model.layers.19.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 6808 |
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 6809 |
"model.layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 6810 |
"model.layers.19.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 7372 |
"model.layers.2.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 7373 |
"model.layers.2.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 7374 |
"model.layers.2.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 7375 |
+
"model.layers.2.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 7376 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 7377 |
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 7378 |
"model.layers.2.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 7940 |
"model.layers.20.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 7941 |
"model.layers.20.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 7942 |
"model.layers.20.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 7943 |
+
"model.layers.20.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 7944 |
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 7945 |
"model.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 7946 |
"model.layers.20.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 8508 |
"model.layers.21.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 8509 |
"model.layers.21.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 8510 |
"model.layers.21.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 8511 |
+
"model.layers.21.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 8512 |
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 8513 |
"model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 8514 |
"model.layers.21.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 9076 |
"model.layers.22.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 9077 |
"model.layers.22.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 9078 |
"model.layers.22.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 9079 |
+
"model.layers.22.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 9080 |
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 9081 |
"model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 9082 |
"model.layers.22.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 9644 |
"model.layers.23.mlp.shared_expert.up_proj.weight_packed": "model-00002-of-00002.safetensors",
|
| 9645 |
"model.layers.23.mlp.shared_expert.up_proj.weight_scale": "model-00002-of-00002.safetensors",
|
| 9646 |
"model.layers.23.mlp.shared_expert.up_proj.weight_shape": "model-00002-of-00002.safetensors",
|
| 9647 |
+
"model.layers.23.mlp.shared_expert_gate.weight": "model-00002-of-00002.safetensors",
|
|
|
|
|
|
|
| 9648 |
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
| 9649 |
"model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
| 9650 |
"model.layers.23.self_attn.k_proj.weight_packed": "model-00002-of-00002.safetensors",
|
|
|
|
| 10212 |
"model.layers.3.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 10213 |
"model.layers.3.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 10214 |
"model.layers.3.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 10215 |
+
"model.layers.3.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 10216 |
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 10217 |
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 10218 |
"model.layers.3.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 10780 |
"model.layers.4.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 10781 |
"model.layers.4.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 10782 |
"model.layers.4.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 10783 |
+
"model.layers.4.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 10784 |
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 10785 |
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 10786 |
"model.layers.4.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 11348 |
"model.layers.5.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 11349 |
"model.layers.5.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 11350 |
"model.layers.5.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 11351 |
+
"model.layers.5.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 11352 |
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 11353 |
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 11354 |
"model.layers.5.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 11916 |
"model.layers.6.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 11917 |
"model.layers.6.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 11918 |
"model.layers.6.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 11919 |
+
"model.layers.6.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 11920 |
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 11921 |
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 11922 |
"model.layers.6.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 12484 |
"model.layers.7.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 12485 |
"model.layers.7.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 12486 |
"model.layers.7.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 12487 |
+
"model.layers.7.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 12488 |
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 12489 |
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 12490 |
"model.layers.7.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 13052 |
"model.layers.8.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 13053 |
"model.layers.8.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 13054 |
"model.layers.8.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 13055 |
+
"model.layers.8.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 13056 |
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 13057 |
"model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 13058 |
"model.layers.8.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
|
|
|
| 13620 |
"model.layers.9.mlp.shared_expert.up_proj.weight_packed": "model-00001-of-00002.safetensors",
|
| 13621 |
"model.layers.9.mlp.shared_expert.up_proj.weight_scale": "model-00001-of-00002.safetensors",
|
| 13622 |
"model.layers.9.mlp.shared_expert.up_proj.weight_shape": "model-00001-of-00002.safetensors",
|
| 13623 |
+
"model.layers.9.mlp.shared_expert_gate.weight": "model-00001-of-00002.safetensors",
|
|
|
|
|
|
|
| 13624 |
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 13625 |
"model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
| 13626 |
"model.layers.9.self_attn.k_proj.weight_packed": "model-00001-of-00002.safetensors",
|
recipe.yaml
CHANGED
|
@@ -3,4 +3,4 @@ DEFAULT_stage:
|
|
| 3 |
GPTQModifier:
|
| 4 |
scheme: W4A16
|
| 5 |
targets: Linear
|
| 6 |
-
ignore: [lm_head, 're:.*mlp.gate$']
|
|
|
|
| 3 |
GPTQModifier:
|
| 4 |
scheme: W4A16
|
| 5 |
targets: Linear
|
| 6 |
+
ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
|