Model save

Browse files

Files changed (8) hide show

README.md +73 -0
adapter_model.safetensors +1 -1
all_results.json +13 -0
eval_results.json +8 -0
runs/Jan20_08-25-38_98f107f1aa39/events.out.tfevents.1705739248.98f107f1aa39.3125.0 +2 -2
runs/Jan20_08-25-38_98f107f1aa39/events.out.tfevents.1705742423.98f107f1aa39.3125.1 +3 -0
train_results.json +8 -0
trainer_state.json +82 -0

README.md ADDED Viewed

	@@ -0,0 +1,73 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- trl
+- sft
+- generated_from_trainer
+datasets:
+- generator
+base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+model-index:
+- name: Tukan-1.1B-Chat-reasoning-sft
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Tukan-1.1B-Chat-reasoning-sft
+This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the generator dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.2578
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 10
+- eval_batch_size: 5
+- seed: 42
+- distributed_type: multi-GPU
+- gradient_accumulation_steps: 20
+- total_train_batch_size: 200
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 1.3338        | 0.39  | 10   | 1.2857          |
+| 1.2798        | 0.79  | 20   | 1.2591          |
+### Framework versions
+- Transformers 4.36.2
+- Pytorch 2.2.0a0+gitd925d94
+- Datasets 2.14.6
+- Tokenizers 0.15.0
+## Training procedure
+### Framework versions
+- PEFT 0.6.1

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7e494117026edd1aacae562e3f0932aee3ef1dc7e0ce13cc8ca034c58addb84
 size 26361536

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d5c30abc9154a5b5efc6dcf098c3c34f710ac50858779a43d10ef4cf10684d1
 size 26361536

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 0.98,
+    "eval_loss": 1.257819414138794,
+    "eval_runtime": 24.0321,
+    "eval_samples": 662,
+    "eval_samples_per_second": 5.368,
+    "eval_steps_per_second": 1.082,
+    "train_loss": 1.3208525276184082,
+    "train_runtime": 3151.0527,
+    "train_samples": 25778,
+    "train_samples_per_second": 1.613,
+    "train_steps_per_second": 0.008
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 0.98,
+    "eval_loss": 1.257819414138794,
+    "eval_runtime": 24.0321,
+    "eval_samples": 662,
+    "eval_samples_per_second": 5.368,
+    "eval_steps_per_second": 1.082
+}

runs/Jan20_08-25-38_98f107f1aa39/events.out.tfevents.1705739248.98f107f1aa39.3125.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ba0b300a06abfb6513f8347aa0ffcc0efc5859c846364a1345523fc243992f6
-size 5823

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f0abc33dd94f5bb1f1470cda2b72ab3bd44564d0ae18280f283c5b58208e6e4
+size 6325

runs/Jan20_08-25-38_98f107f1aa39/events.out.tfevents.1705742423.98f107f1aa39.3125.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2fef11cac704354cca00d9acaab1f8be2ad705461974fc3411e3919fd027f1
+size 354

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 0.98,
+    "train_loss": 1.3208525276184082,
+    "train_runtime": 3151.0527,
+    "train_samples": 25778,
+    "train_samples_per_second": 1.613,
+    "train_steps_per_second": 0.008
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9823182711198428,
+  "eval_steps": 10,
+  "global_step": 25,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04,
+      "learning_rate": 4.980286753286195e-05,
+      "loss": 1.4539,
+      "step": 1
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 4.522542485937369e-05,
+      "loss": 1.4374,
+      "step": 5
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 3.272542485937369e-05,
+      "loss": 1.3338,
+      "step": 10
+    },
+    {
+      "epoch": 0.39,
+      "eval_loss": 1.285738229751587,
+      "eval_runtime": 24.0251,
+      "eval_samples_per_second": 5.369,
+      "eval_steps_per_second": 1.082,
+      "step": 10
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 1.7274575140626318e-05,
+      "loss": 1.286,
+      "step": 15
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 4.7745751406263165e-06,
+      "loss": 1.2798,
+      "step": 20
+    },
+    {
+      "epoch": 0.79,
+      "eval_loss": 1.259059190750122,
+      "eval_runtime": 24.0291,
+      "eval_samples_per_second": 5.368,
+      "eval_steps_per_second": 1.082,
+      "step": 20
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 0.0,
+      "loss": 1.2639,
+      "step": 25
+    },
+    {
+      "epoch": 0.98,
+      "step": 25,
+      "total_flos": 6.39647288959959e+16,
+      "train_loss": 1.3208525276184082,
+      "train_runtime": 3151.0527,
+      "train_samples_per_second": 1.613,
+      "train_steps_per_second": 0.008
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 25,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 20,
+  "total_flos": 6.39647288959959e+16,
+  "train_batch_size": 10,
+  "trial_name": null,
+  "trial_params": null
+}