snezhanata commited on
Commit
5f84c02
·
verified ·
1 Parent(s): 9446310

Upload MinistralForCausalLM

Browse files
config.json CHANGED
@@ -1,17 +1,56 @@
1
  {
2
  "architectures": [
3
- "MistralForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 1,
 
7
  "eos_token_id": 2,
8
  "head_dim": 128,
9
  "hidden_act": "silu",
10
  "hidden_size": 4096,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 12288,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "max_position_embeddings": 32768,
14
- "model_type": "mistral",
15
  "num_attention_heads": 32,
16
  "num_hidden_layers": 36,
17
  "num_key_value_heads": 8,
@@ -19,8 +58,7 @@
19
  "rope_theta": 100000000.0,
20
  "sliding_window": 32768,
21
  "tie_word_embeddings": false,
22
- "torch_dtype": "bfloat16",
23
- "transformers_version": "4.52.4",
24
  "use_cache": true,
25
  "vocab_size": 131072
26
  }
 
1
  {
2
  "architectures": [
3
+ "MinistralForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 1,
7
+ "dtype": "bfloat16",
8
  "eos_token_id": 2,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 12288,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "sliding_attention",
17
+ "sliding_attention",
18
+ "sliding_attention",
19
+ "full_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "full_attention",
24
+ "sliding_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "full_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "full_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "full_attention",
36
+ "sliding_attention",
37
+ "sliding_attention",
38
+ "sliding_attention",
39
+ "full_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "full_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "sliding_attention",
47
+ "full_attention",
48
+ "sliding_attention",
49
+ "sliding_attention",
50
+ "sliding_attention"
51
+ ],
52
  "max_position_embeddings": 32768,
53
+ "model_type": "ministral",
54
  "num_attention_heads": 32,
55
  "num_hidden_layers": 36,
56
  "num_key_value_heads": 8,
 
58
  "rope_theta": 100000000.0,
59
  "sliding_window": 32768,
60
  "tie_word_embeddings": false,
61
+ "transformers_version": "4.57.0",
 
62
  "use_cache": true,
63
  "vocab_size": 131072
64
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.52.4"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.57.0"
6
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c54ea93c343608170f1fff82483c151649c93b12351d79f566b1bc76d46217e
3
  size 4983007904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375f7ea202d2981377fb48eab97cb151b85a8e7e58236b3f1fb37c673924d48f
3
  size 4983007904
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8739b6deabcd3e6dcd7ed50bbbdc5aa7789092e219e419900ab614727df23ad1
3
  size 4999836776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:132faa2a11957be0e5240633597917e78f6c26512805fe608b0c36d9f33229c3
3
  size 4999836776
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fda17c543dadcfb5f41a2947b5dc3afc804c298c112c2938d129e36e6bb17590
3
  size 4983067960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e748256b0517c34b7349cd3fba5c524a5585f707cb5f2fa883e84bbbc5122c62
3
  size 4983067960
model.safetensors.index.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "metadata": {
 
3
  "total_size": 16039616512
4
  },
5
  "weight_map": {
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 8019808256,
4
  "total_size": 16039616512
5
  },
6
  "weight_map": {