hbx commited on
Commit
91f38a3
·
verified ·
1 Parent(s): 0a2de43

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,36 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/fig1_aime24_curves_added.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,9 +1,188 @@
1
  ---
2
- license: apache-2.0
 
3
  datasets:
4
  - BytedTsinghua-SIA/DAPO-Math-17k
5
  language:
6
  - en
7
  base_model:
8
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
9
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: mit
3
+ library_name: transformers
4
  datasets:
5
  - BytedTsinghua-SIA/DAPO-Math-17k
6
  language:
7
  - en
8
  base_model:
9
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
10
+ pipeline_tag: text-generation
11
+ ---
12
+
13
+ <div align="center">
14
+ <span style="font-family: default; font-size: 1.5em;">AscentRL: Simplicity at Scale</span>
15
+ <div>
16
+ 🚀 Competitive RL Performance Without Complex Techniques 🌟
17
+ </div>
18
+ </div>
19
+
20
+ <br>
21
+
22
+ <div align="center" style="line-height: 1;">
23
+ <a href="[YOUR_GITHUB_REPO]" style="margin: 2px;">
24
+ <img alt="Code" src="https://img.shields.io/badge/GitHub-100000?style=for-the-badge&logo=github&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
25
+ </a>
26
+ <a href="[YOUR_BLOG_LINK]" target="_blank" style="margin: 2px;">
27
+ <img alt="Notion" src="https://img.shields.io/badge/Notion-%23000000.svg?style=for-the-badge&logo=notion&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
28
+ </a>
29
+ </div>
30
+
31
+ </div>
32
+ </div>
33
+
34
+ ## Overview
35
+
36
+ **AscentRL** demonstrates that competitive reinforcement learning performance for small language models doesn't require complex multi-stage pipelines or sophisticated stabilization techniques. Using a minimal recipe with single-stage training and fixed hyperparameters, we achieve state-of-the-art results on mathematical reasoning tasks.
37
+
38
+ We release two models:
39
+ - **AscentRL-1.5B-Weak**: Trained from DeepSeek-R1-Distill-Qwen-1.5B
40
+ - **AscentRL-1.5B-Strong**: Trained from OpenMath-Nemotron-1.5B
41
+
42
+ Both models use identical hyperparameters without per-model tuning, demonstrating the robustness of our approach.
43
+
44
+ ![The AIME24 performance curve for scaling from a weak base DeekSeek-R1-Distill-Qwen-1.5B and a strong base OpenMath-Nemotron-1.5B over thousands of steps.](./assets/fig1_aime24_curves_added.png)
45
+
46
+ ## Key Highlights
47
+
48
+ ✨ **Simplicity**: Single-stage training with fixed hyperparameters—no multi-stage pipelines, no dynamic schedules, no specialized stabilization techniques
49
+
50
+ 📈 **Stability**: Smooth, monotonic improvement over 4,000+ training steps without collapses or oscillations
51
+
52
+ 🎯 **Performance**: State-of-the-art results at 1.5B scale, matching or exceeding more complex approaches
53
+
54
+ 💰 **Efficiency**: Comparable or better performance with less compute than multi-stage methods
55
+
56
+ 🔓 **Open**: Complete evaluation scripts, and model weights released
57
+
58
+ ## Performance
59
+
60
+ ### AscentRL-1.5B-Weak (Based on DeepSeek-R1-Distill-Qwen-1.5B)
61
+
62
+ | Model | AIME24 (@32) | AIME25 (@32) | AMC23 (@32) | MATH-500 (@4) | Minerva (@4) | OlympiadBench (@4) | HMMT25 (@32) | BRUMO25 (@32) | CMIMC25 (@32) | Avg |
63
+ | ------------------------ | ------------ | ------------ | ----------- | ------------- | ------------ | ------------------ | ------------ | ------------- | ------------- | --------- |
64
+ | DeepSeek-R1-Distill-1.5B | 29.90 | 22.40 | 63.82 | 84.90 | 34.65 | 45.95 | 13.44 | 30.94 | 12.89 | 37.65 |
65
+ | DeepScaleR-1.5B-Preview | 40.21 | 28.65 | 73.83 | 89.30 | 39.34 | 52.79 | 18.96 | 40.00 | 21.00 | 44.88 |
66
+ | ProRL-V2 | 51.87 | 35.73 | 88.75 | 92.00 | 49.03 | **67.84** | 19.38 | 47.29 | **25.86** | 53.08 |
67
+ | BroRL | **57.50** | 36.88 | / | **92.14** | 49.08 | 61.54 | / | / | / | / |
68
+ | AscentRL-1.5B-Weak | 52.29 | **37.19** | **91.02** | 91.55 | **51.47** | 66.77 | **21.98** | **52.71** | 25.63 | **54.51** |
69
+
70
+ Besides, the real question is whether our simplicity comes at a computational cost. It doesn't. We match half of ProRL-V2's compute budget while using a single-stage recipe with fixed hyperparameters. BroRL requires 4.9× more compute by increasing rollouts to 512 per example, essentially exhaustively exploring the solution space. Our approach achieves competitive performance without this computational overhead.
71
+
72
+ | | w/ Dynamic Sampling? | Training Steps | Train Batch Size | Rollout N | Max Context Length | Estimated Total Token Budget |
73
+ | ---------------------------- | ------------------------------- | -------------- | ---------------- | ------------ | ------------------ | ------------------------------------------------------------ |
74
+ | DeepScaleR-1.5B-Preview | ❌ | 1,750 | 128 | 8 | 8k → 16k → 24k | $(1040\times8k + 480\times16k + 230\times24k) \times 128\times 8 \approx2.2\times10^6k$ |
75
+ | ProRL-V1 | ✅ Filter Ratio $\approx 50\%$ | 2,450 | 256 | 16 → 32 → 16 | 8k → 16k | $\frac 1 {50\%}(1700\times16\times8k + 550\times32\times8k + 200\times16\times16k) \times 256\approx2.1\times10^8k$ |
76
+ | ProRL-V2 (Based on ProRL-V1) | ✅ Filter Ratio $\approx 50\%$ | +1,000 | 256 | 16 → 32 → 16 | 8k → 16k → 8k | $\frac 1 {50\%}(1700\times16\times8k + 550\times32\times8k + 200\times16\times16k + 1000\times16\times8k) \times 256\approx2.8\times10^8k$ |
77
+ | BroRL (Based on ProRL-V2) | ✅ Filter Ratio $\approx 50\%$ | +191 | 128 | 512 | 16k | $\frac 1 {50\%}[(1700\times16\times8k + 550\times32\times8k + 200\times16\times16k + 1000\times16\times8k) \times 256 + 191\times512\times16k\times128]\approx6.8\times10^8k$ |
78
+ | AscentRL-1.5B-Weak | ❌ | 4,380 | 256 | 8 | 16k | $4380\times256\times 8\times 16k \approx1.4\times10^8k$ |
79
+
80
+ **Note on dynamic sampling**: Models marked with ✅ use dynamic sampling to filter examples. Following [POLARIS](https://honorable-payment-890.notion.site/POLARIS-A-POst-training-recipe-for-scaling-reinforcement-Learning-on-Advanced-ReasonIng-modelS-1dfa954ff7c38094923ec7772bf447a1), we estimate a 50% filter ratio for DeepSeek-R1-Distill-Qwen-1.5B using dynamic sampling, as rollouts often contain many trivial/hard cases (e.g., 8/8 or 0/8 correct rollouts). Even assuming no filtering (i.e., 0% ratio), our compute use remains comparable or even lower, making our estimates conservative.
81
+
82
+ ### AscentRL-1.5B-Strong (Based on OpenMath-Nemotron-1.5B)
83
+
84
+ | Model | AIME24 (@32) | AIME25 (@32) | AMC23 (@32) | MATH-500 (@4) | Minerva (@4) | OlympiadBench (@4) | HMMT25 (@32) | BRUMO25 (@32) | CMIMC25 (@32) | Avg |
85
+ | ---------------------- | ------------ | ------------ | ----------- | ------------- | ------------ | ------------------ | ------------ | ------------- | ------------- | --------- |
86
+ | OpenMath-Nemotron-1.5B | 58.75 | 48.44 | 90.55 | 92.40 | 26.93 | 71.70 | 30.10 | 61.67 | 30.08 | 56.74 |
87
+ | QUESTA-Nemotron-1.5B | **71.56** | 62.08 | 93.44 | 92.95 | **32.08** | 72.28 | **40.94** | **67.50** | 41.48 | 63.81 |
88
+ | AscentRL-1.5B-Strong | 69.69 | **62.92** | **96.02** | **94.15** | 30.24 | **76.59** | 40.63 | 66.88 | **41.72** | **64.32** |
89
+
90
+ We achieve 64.32% average, slightly outperforming QuestA's 63.81% and leading on five of nine benchmarks. The gap is narrow, which makes sense—both approaches are pushing the boundaries of what's achievable at 1.5B scale. The key difference is in how we get there. We use less compute while achieving slightly better average performance without designing a complex curriculum as used in QuestA.
91
+
92
+ | | w/ Dynamic Sampling? | Training Steps | Train Batch Size | Rollout N | Max Context Length | Estimated Total Token Budget |
93
+ | -------------------- | -------------------------------- | -------------- | ---------------- | --------- | ------------------ | ------------------------------------------------------------ |
94
+ | QUESTA-Nemotron-1.5B | ✅ Filter Ratio $\approx 50\%$ | 2,000 | 128 | 16 | 32k | $\frac 1 {50\%}\times2000\times128 \times 16\times 32k \approx2.6\times10^8k$ |
95
+ | AscentRL-1.5B-Strong | ❌ | 3,440 | 256 | 8 | 16k | $3440\times256 \times 8\times 16k \approx1.1\times10^8k$ |
96
+
97
+ ## Training Recipe
98
+
99
+ Our approach is deliberately minimal:
100
+
101
+ **Core Algorithm**: Standard GRPO with binary outcome rewards
102
+ - **Reward**: Simple DAPO verifier (string-matching, no SymPy)
103
+ - **Training**: Single-stage, no curriculum or stage transitions
104
+ - **Hyperparameters**: Fixed throughout (no adaptive schedules)
105
+ - **Data**: DAPO-Math-17k without filtering or dynamic sampling
106
+ - **Length Control**: 16K context cap (no explicit penalties)
107
+ - **Stabilization**: Only "clip higher" for gradient stability
108
+
109
+ Detail hyperparameters and comparisons on training techniques with other methods can refer to our blog.
110
+
111
+ ## Training Data
112
+
113
+ We train on [DAPO-Math-17k](https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k), a curated dataset of mathematical problems. **No offline difficulty filtering or online dynamic sampling is used.**
114
+
115
+ ## Usage
116
+
117
+ ### Basic Inference
118
+ ```python
119
+ from transformers import AutoModelForCausalLM, AutoTokenizer
120
+
121
+ model_name = "hbx/AscentRL-1.5B-Strong" # or AscentRL-1.5B-Weak
122
+ model = AutoModelForCausalLM.from_pretrained(
123
+ model_name,
124
+ torch_dtype="auto",
125
+ device_map="auto"
126
+ )
127
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
128
+
129
+ prompt = """<problem>
130
+
131
+ Please reason step by step, and put your final answer within \\boxed{}."""
132
+
133
+ messages = [{"role": "user", "content": prompt}]
134
+ text = tokenizer.apply_chat_template(
135
+ messages,
136
+ tokenize=False,
137
+ add_generation_prompt=True
138
+ )
139
+
140
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
141
+ outputs = model.generate(
142
+ **inputs,
143
+ max_new_tokens=16384,
144
+ temperature=0.7,
145
+ top_p=0.9,
146
+ do_sample=True
147
+ )
148
+
149
+ response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
150
+ print(response)
151
+ ```
152
+
153
+ ### Batch Inference with vLLM
154
+ ```python
155
+ from vllm import LLM, SamplingParams
156
+
157
+ llm = LLM(
158
+ model="hbx/AscentRL-1.5B-Strong",
159
+ tensor_parallel_size=1,
160
+ max_model_len=32768
161
+ )
162
+
163
+ sampling_params = SamplingParams(
164
+ temperature=0.7,
165
+ top_p=0.9,
166
+ max_tokens=16384,
167
+ )
168
+
169
+ problems = [...] # Your list of problems
170
+ responses = llm.generate(problems, sampling_params)
171
+ ```
172
+
173
+ ## Reproduction
174
+
175
+ We provide evaluation scripts based on [POLARIS](https://github.com/ChenxinAn-fdu/POLARIS), the evaluation script is [TODO](TODO).
176
+
177
+ ## Citation
178
+
179
+ ```bibtex
180
+ @misc{he2025ascentrl,
181
+ title = {TODO},
182
+ author = {TODO},
183
+ year = {2025},
184
+ month = {Nov},
185
+ day = {1},
186
+ note = {First published on Notion},
187
+ url = {https://TODO}
188
+ }
assets/fig1_aime24_curves_added.png ADDED

Git LFS Details

  • SHA256: 3fb93b4ec962967c62fb4a8d720a0936d321b470757473c69246633a199d315d
  • Pointer size: 131 Bytes
  • Size of remote file: 338 kB
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151643,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 21,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 2,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 10000,
21
+ "sliding_window": 4096,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.3",
25
+ "use_cache": true,
26
+ "use_mrope": false,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 151936
29
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151646,
4
+ "eos_token_id": 151643,
5
+ "do_sample": true,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.39.3"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bedcdf243c4e2e633fa04c05617809cf6a1bbc1a07221609035f6347efecffb
3
+ size 3554214752
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|begin▁of▁sentence|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|end▁of▁sentence|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 16384,
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|end▁of▁sentence|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "sp_model_kwargs": {},
32
+ "unk_token": null,
33
+ "tokenizer_class": "LlamaTokenizerFast",
34
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}"
35
+ }