danielhanchen commited on
Commit
623c397
·
verified ·
1 Parent(s): ab7e7ad

Upload folder using huggingface_hub

Browse files
chat_template.jinja CHANGED
@@ -1,3 +1,4 @@
 
1
  {%- macro render_content(msg) -%}
2
  {%- set c = msg.get('content') -%}
3
  {%- if c is string -%}
@@ -48,8 +49,8 @@
48
  {%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
49
  {%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
50
 
51
- {%- if tools -%}
52
- <|im_system|>tool_declare<|im_middle|>{{ tools | tojson | replace(', ', ',') | replace(': ', ':') }}<|im_end|>
53
  {%- endif -%}
54
 
55
  {%- for message in hist_msgs -%}
@@ -93,4 +94,5 @@
93
 
94
  {%- if add_generation_prompt -%}
95
  <|im_assistant|>assistant<|im_middle|>
96
- {%- endif -%}
 
 
1
+ {# Unsloth template fixes #}
2
  {%- macro render_content(msg) -%}
3
  {%- set c = msg.get('content') -%}
4
  {%- if c is string -%}
 
49
  {%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
50
  {%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
51
 
52
+ {%- if tools -%}{%- set tools_json = tools|tojson -%}{%- set tools_json = tools_json.split(': ') | join(':') -%}{%- set tools_json = tools_json.split(', ') | join(',') -%}
53
+ <|im_system|>tool_declare<|im_middle|>{{ tools_json }}<|im_end|>
54
  {%- endif -%}
55
 
56
  {%- for message in hist_msgs -%}
 
94
 
95
  {%- if add_generation_prompt -%}
96
  <|im_assistant|>assistant<|im_middle|>
97
+ {%- endif -%}
98
+ {# Copyright 2025-present Unsloth. Apache 2.0 License. #}
config.json CHANGED
@@ -1,7 +1,5 @@
1
  {
2
  "_attn_implementation_autoset": false,
3
- "_name_or_path": "",
4
- "add_cross_attention": false,
5
  "architectures": [
6
  "DeepseekV3ForCausalLM"
7
  ],
@@ -13,66 +11,31 @@
13
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
14
  },
15
  "aux_loss_alpha": 0.001,
16
- "bad_words_ids": null,
17
- "begin_suppress_tokens": null,
18
  "bos_token_id": 163584,
19
- "chunk_size_feed_forward": 0,
20
- "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": null,
22
- "diversity_penalty": 0.0,
23
- "do_sample": false,
24
- "early_stopping": false,
25
- "encoder_no_repeat_ngram_size": 0,
26
  "eos_token_id": 163586,
27
  "ep_size": 1,
28
- "exponential_decay_length_penalty": null,
29
- "finetuning_task": null,
30
  "first_k_dense_replace": 1,
31
- "forced_bos_token_id": null,
32
- "forced_eos_token_id": null,
33
  "hidden_act": "silu",
34
  "hidden_size": 7168,
35
- "id2label": {
36
- "0": "LABEL_0",
37
- "1": "LABEL_1"
38
- },
39
  "initializer_range": 0.02,
40
  "intermediate_size": 18432,
41
- "is_decoder": false,
42
- "is_encoder_decoder": false,
43
  "kv_lora_rank": 512,
44
- "label2id": {
45
- "LABEL_0": 0,
46
- "LABEL_1": 1
47
- },
48
- "length_penalty": 1.0,
49
- "max_length": 20,
50
  "max_position_embeddings": 262144,
51
- "min_length": 0,
52
- "model_type": "kimi_k2",
53
  "moe_intermediate_size": 2048,
54
  "moe_layer_freq": 1,
55
  "n_group": 1,
56
  "n_routed_experts": 384,
57
  "n_shared_experts": 1,
58
- "no_repeat_ngram_size": 0,
59
  "norm_topk_prob": true,
60
  "num_attention_heads": 64,
61
- "num_beam_groups": 1,
62
- "num_beams": 1,
63
  "num_experts_per_tok": 8,
64
  "num_hidden_layers": 61,
65
  "num_key_value_heads": 64,
66
  "num_nextn_predict_layers": 0,
67
- "num_return_sequences": 1,
68
- "output_attentions": false,
69
- "output_hidden_states": false,
70
- "output_scores": false,
71
  "pad_token_id": 163839,
72
- "prefix": null,
73
  "pretraining_tp": 1,
74
- "problem_type": null,
75
- "pruned_heads": {},
76
  "q_lora_rank": 1536,
77
  "qk_nope_head_dim": 128,
78
  "qk_rope_head_dim": 64,
@@ -109,10 +72,6 @@
109
  "quant_method": "compressed-tensors",
110
  "quantization_status": "compressed"
111
  },
112
- "remove_invalid_values": false,
113
- "repetition_penalty": 1.0,
114
- "return_dict": true,
115
- "return_dict_in_generate": false,
116
  "rms_norm_eps": 1e-05,
117
  "rope_scaling": {
118
  "beta_fast": 1.0,
@@ -126,24 +85,12 @@
126
  "rope_theta": 50000.0,
127
  "routed_scaling_factor": 2.827,
128
  "scoring_func": "sigmoid",
129
- "sep_token_id": null,
130
  "seq_aux": true,
131
- "suppress_tokens": null,
132
- "task_specific_params": null,
133
- "temperature": 1.0,
134
- "tf_legacy_loss": false,
135
- "tie_encoder_decoder": false,
136
  "tie_word_embeddings": false,
137
- "tokenizer_class": null,
138
- "top_k": 50,
139
- "top_p": 1.0,
140
  "topk_group": 1,
141
  "topk_method": "noaux_tc",
142
- "torch_dtype": "bfloat16",
143
- "torchscript": false,
144
- "transformers_version": "4.51.3",
145
- "typical_p": 1.0,
146
- "use_bfloat16": false,
147
  "use_cache": true,
148
  "v_head_dim": 128,
149
  "vocab_size": 163840
 
1
  {
2
  "_attn_implementation_autoset": false,
 
 
3
  "architectures": [
4
  "DeepseekV3ForCausalLM"
5
  ],
 
11
  "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
12
  },
13
  "aux_loss_alpha": 0.001,
 
 
14
  "bos_token_id": 163584,
15
+ "torch_dtype": "bfloat16",
 
 
 
 
 
 
16
  "eos_token_id": 163586,
17
  "ep_size": 1,
 
 
18
  "first_k_dense_replace": 1,
 
 
19
  "hidden_act": "silu",
20
  "hidden_size": 7168,
 
 
 
 
21
  "initializer_range": 0.02,
22
  "intermediate_size": 18432,
 
 
23
  "kv_lora_rank": 512,
 
 
 
 
 
 
24
  "max_position_embeddings": 262144,
25
+ "model_type": "deepseek_v3",
 
26
  "moe_intermediate_size": 2048,
27
  "moe_layer_freq": 1,
28
  "n_group": 1,
29
  "n_routed_experts": 384,
30
  "n_shared_experts": 1,
 
31
  "norm_topk_prob": true,
32
  "num_attention_heads": 64,
 
 
33
  "num_experts_per_tok": 8,
34
  "num_hidden_layers": 61,
35
  "num_key_value_heads": 64,
36
  "num_nextn_predict_layers": 0,
 
 
 
 
37
  "pad_token_id": 163839,
 
38
  "pretraining_tp": 1,
 
 
39
  "q_lora_rank": 1536,
40
  "qk_nope_head_dim": 128,
41
  "qk_rope_head_dim": 64,
 
72
  "quant_method": "compressed-tensors",
73
  "quantization_status": "compressed"
74
  },
 
 
 
 
75
  "rms_norm_eps": 1e-05,
76
  "rope_scaling": {
77
  "beta_fast": 1.0,
 
85
  "rope_theta": 50000.0,
86
  "routed_scaling_factor": 2.827,
87
  "scoring_func": "sigmoid",
 
88
  "seq_aux": true,
 
 
 
 
 
89
  "tie_word_embeddings": false,
 
 
 
90
  "topk_group": 1,
91
  "topk_method": "noaux_tc",
92
+ "transformers_version": "4.57.1",
93
+ "unsloth_fixed": true,
 
 
 
94
  "use_cache": true,
95
  "v_head_dim": 128,
96
  "vocab_size": 163840
figures/kimi-logo.png CHANGED

Git LFS Details

  • SHA256: 4a80f64242bf907940765adc7bcf340c28dd83334b07c5503792a26495d1933b
  • Pointer size: 130 Bytes
  • Size of remote file: 88 kB
special_tokens_map.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_end|>",
4
+ "<|im_user|>",
5
+ "<|im_assistant|>",
6
+ "<|start_header_id|>",
7
+ "<|end_header_id|>",
8
+ "[EOT]",
9
+ "<|im_system|>",
10
+ "<|im_middle|>"
11
+ ],
12
+ "bos_token": {
13
+ "content": "[BOS]",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": {
27
+ "content": "[PAD]",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "unk_token": {
34
+ "content": "[UNK]",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
tokenizer_config.json CHANGED
@@ -163,18 +163,20 @@
163
  "<|im_system|>",
164
  "<|im_middle|>"
165
  ],
 
 
 
 
 
 
166
  "bos_token": "[BOS]",
167
  "clean_up_tokenization_spaces": false,
168
  "eos_token": "[EOS]",
169
  "extra_special_tokens": {},
170
- "model_max_length": 1000000000000000019884624838656,
171
  "pad_token": "[PAD]",
 
172
  "tokenizer_class": "TikTokenTokenizer",
173
  "unk_token": "[UNK]",
174
- "auto_map": {
175
- "AutoTokenizer": [
176
- "tokenization_kimi.TikTokenTokenizer",
177
- null
178
- ]
179
- }
180
  }
 
163
  "<|im_system|>",
164
  "<|im_middle|>"
165
  ],
166
+ "auto_map": {
167
+ "AutoTokenizer": [
168
+ "tokenization_kimi.TikTokenTokenizer",
169
+ null
170
+ ]
171
+ },
172
  "bos_token": "[BOS]",
173
  "clean_up_tokenization_spaces": false,
174
  "eos_token": "[EOS]",
175
  "extra_special_tokens": {},
176
+ "model_max_length": 262144,
177
  "pad_token": "[PAD]",
178
+ "padding_side": "left",
179
  "tokenizer_class": "TikTokenTokenizer",
180
  "unk_token": "[UNK]",
181
+ "chat_template": "{# Unsloth template fixes #}\n{%- macro render_content(msg) -%}\n {%- set c = msg.get('content') -%}\n {%- if c is string -%}\n {{ c }}\n {%- elif c is not none -%}\n {% for content in c -%}\n {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}\n <|media_start|>image<|media_content|><|media_pad|><|media_end|>\n {% else -%}\n {{ content['text'] }}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n{%- endmacro -%}\n\n{% macro set_roles(message) -%}\n {%- set role_name = message.get('name') or message['role'] -%}\n {%- if message['role'] == 'user' -%}\n <|im_user|>{{role_name}}<|im_middle|>\n {%- elif message['role'] == 'assistant' -%}\n <|im_assistant|>{{role_name}}<|im_middle|>\n {%- else -%}\n <|im_system|>{{role_name}}<|im_middle|>\n {%- endif -%}\n{%- endmacro -%}\n\n\n{%- macro render_toolcalls(message) -%}\n <|tool_calls_section_begin|>\n {%- for tool_call in message['tool_calls'] -%}\n {%- set formatted_id = tool_call['id'] -%}\n <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>\n {%- endfor -%}\n <|tool_calls_section_end|>\n{%- endmacro -%}\n\n\n{# Find last non-tool-call assisitant message #}\n{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}\n{%- for idx in range(messages|length-1, -1, -1) -%}\n {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}\n {%- set ns.last_non_tool_call_assistant_msg = idx -%}\n {%- break -%}\n {%- endif -%}\n{%- endfor -%}\n\n{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}\n{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}\n{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}\n\n{%- if tools -%}{%- set tools_json = tools|tojson -%}{%- set tools_json = tools_json.split(': ') | join(':') -%}{%- set tools_json = tools_json.split(', ') | join(',') -%}\n <|im_system|>tool_declare<|im_middle|>{{ tools_json }}<|im_end|>\n{%- endif -%}\n\n{%- for message in hist_msgs -%}\n {%- if loop.first and messages[0]['role'] != 'system' -%}\n <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>\n {%- endif -%}\n {{set_roles(message)}}\n {%- if message['role'] == 'assistant' -%}\n <think></think>{{render_content(message)}}\n {%- if message.get('tool_calls') -%}\n {{render_toolcalls(message)}}\n {%- endif -%}\n {%- elif message['role'] == 'tool' -%}\n {%- set tool_call_id = message.tool_call_id -%}\n ## Return of {{ tool_call_id }}\n{{render_content(message)}}\n {%- elif message['content'] is not none -%}\n {{render_content(message)}}\n {%- endif -%}\n <|im_end|>\n{%- endfor -%}\n\n{%- for message in suffix_msgs -%}\n {{set_roles(message)}}\n {%- if message['role'] == 'assistant' -%}\n {%- set rc = message.get('reasoning_content', '') -%}\n <think>{{rc}}</think>{{render_content(message)}}\n {%- if message.get('tool_calls') -%}\n {{render_toolcalls(message)}}\n {%- endif -%}\n {%- elif message['role'] == 'tool' -%}\n {%- set tool_call_id = message.tool_call_id -%}\n ## Return of {{ tool_call_id }}\n{{render_content(message)}}\n {%- elif message['content'] is not none -%}\n {{render_content(message)}}\n {%- endif -%}\n <|im_end|>\n{%- endfor -%}\n\n\n{%- if add_generation_prompt -%}\n <|im_assistant|>assistant<|im_middle|>\n{%- endif -%}\n{# Copyright 2025-present Unsloth. Apache 2.0 License. #}"
 
 
 
 
 
182
  }