{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.20118343195266272,
  "eval_steps": 17,
  "global_step": 34,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.005917159763313609,
      "grad_norm": 0.6224875040916811,
      "learning_rate": 3.75e-07,
      "loss": 2.2194,
      "step": 1
    },
    {
      "epoch": 0.005917159763313609,
      "eval_loss": 2.1284265518188477,
      "eval_runtime": 197.2728,
      "eval_samples_per_second": 0.938,
      "eval_steps_per_second": 0.081,
      "step": 1
    },
    {
      "epoch": 0.011834319526627219,
      "grad_norm": 0.699774954026372,
      "learning_rate": 7.5e-07,
      "loss": 2.2098,
      "step": 2
    },
    {
      "epoch": 0.01775147928994083,
      "grad_norm": 0.8978069601988599,
      "learning_rate": 1.125e-06,
      "loss": 2.0771,
      "step": 3
    },
    {
      "epoch": 0.023668639053254437,
      "grad_norm": 0.603433372430734,
      "learning_rate": 1.5e-06,
      "loss": 2.1207,
      "step": 4
    },
    {
      "epoch": 0.029585798816568046,
      "grad_norm": 0.6705451670098455,
      "learning_rate": 1.875e-06,
      "loss": 2.0443,
      "step": 5
    },
    {
      "epoch": 0.03550295857988166,
      "grad_norm": 0.5685871807834867,
      "learning_rate": 2.25e-06,
      "loss": 2.0938,
      "step": 6
    },
    {
      "epoch": 0.04142011834319527,
      "grad_norm": 0.6813185340138558,
      "learning_rate": 2.6250000000000003e-06,
      "loss": 2.0113,
      "step": 7
    },
    {
      "epoch": 0.047337278106508875,
      "grad_norm": 1.461724410712979,
      "learning_rate": 3e-06,
      "loss": 1.9878,
      "step": 8
    },
    {
      "epoch": 0.05325443786982249,
      "grad_norm": 0.4806729570541462,
      "learning_rate": 2.999785830935332e-06,
      "loss": 2.1648,
      "step": 9
    },
    {
      "epoch": 0.05917159763313609,
      "grad_norm": 0.6450195095319927,
      "learning_rate": 2.999143405285129e-06,
      "loss": 2.1168,
      "step": 10
    },
    {
      "epoch": 0.0650887573964497,
      "grad_norm": 0.5199179785432192,
      "learning_rate": 2.998072967649747e-06,
      "loss": 2.1624,
      "step": 11
    },
    {
      "epoch": 0.07100591715976332,
      "grad_norm": 0.5888842243430149,
      "learning_rate": 2.9965749255929686e-06,
      "loss": 2.0793,
      "step": 12
    },
    {
      "epoch": 0.07692307692307693,
      "grad_norm": 0.5762628204395337,
      "learning_rate": 2.9946498494868224e-06,
      "loss": 2.1745,
      "step": 13
    },
    {
      "epoch": 0.08284023668639054,
      "grad_norm": 0.4611886576496398,
      "learning_rate": 2.9922984722944177e-06,
      "loss": 2.1977,
      "step": 14
    },
    {
      "epoch": 0.08875739644970414,
      "grad_norm": 0.4176129104372226,
      "learning_rate": 2.9895216892908717e-06,
      "loss": 2.1568,
      "step": 15
    },
    {
      "epoch": 0.09467455621301775,
      "grad_norm": 0.9682295199118589,
      "learning_rate": 2.9863205577224416e-06,
      "loss": 2.181,
      "step": 16
    },
    {
      "epoch": 0.10059171597633136,
      "grad_norm": 0.4087372617870282,
      "learning_rate": 2.982696296403978e-06,
      "loss": 2.2208,
      "step": 17
    },
    {
      "epoch": 0.10059171597633136,
      "eval_loss": 2.090203285217285,
      "eval_runtime": 197.1425,
      "eval_samples_per_second": 0.938,
      "eval_steps_per_second": 0.081,
      "step": 17
    },
    {
      "epoch": 0.10650887573964497,
      "grad_norm": 0.6944983731430622,
      "learning_rate": 2.9786502852548696e-06,
      "loss": 1.9687,
      "step": 18
    },
    {
      "epoch": 0.11242603550295859,
      "grad_norm": 0.42170107190665923,
      "learning_rate": 2.9741840647736478e-06,
      "loss": 2.1707,
      "step": 19
    },
    {
      "epoch": 0.11834319526627218,
      "grad_norm": 0.5031933608002007,
      "learning_rate": 2.9692993354514477e-06,
      "loss": 1.838,
      "step": 20
    },
    {
      "epoch": 0.1242603550295858,
      "grad_norm": 0.401561752549318,
      "learning_rate": 2.9639979571245524e-06,
      "loss": 2.1108,
      "step": 21
    },
    {
      "epoch": 0.1301775147928994,
      "grad_norm": 0.4745388099037245,
      "learning_rate": 2.958281948266274e-06,
      "loss": 2.0944,
      "step": 22
    },
    {
      "epoch": 0.13609467455621302,
      "grad_norm": 0.3966815146465992,
      "learning_rate": 2.952153485218428e-06,
      "loss": 2.1954,
      "step": 23
    },
    {
      "epoch": 0.14201183431952663,
      "grad_norm": 0.38324183695928576,
      "learning_rate": 2.9456149013627003e-06,
      "loss": 2.0669,
      "step": 24
    },
    {
      "epoch": 0.14792899408284024,
      "grad_norm": 0.45238869305755414,
      "learning_rate": 2.9386686862322264e-06,
      "loss": 2.0082,
      "step": 25
    },
    {
      "epoch": 0.15384615384615385,
      "grad_norm": 0.43587746677118455,
      "learning_rate": 2.9313174845637132e-06,
      "loss": 2.0851,
      "step": 26
    },
    {
      "epoch": 0.15976331360946747,
      "grad_norm": 0.3280269395122322,
      "learning_rate": 2.9235640952904685e-06,
      "loss": 2.1769,
      "step": 27
    },
    {
      "epoch": 0.16568047337278108,
      "grad_norm": 0.3364191479742792,
      "learning_rate": 2.91541147047672e-06,
      "loss": 2.1024,
      "step": 28
    },
    {
      "epoch": 0.17159763313609466,
      "grad_norm": 0.36098339761423204,
      "learning_rate": 2.9068627141936344e-06,
      "loss": 1.9672,
      "step": 29
    },
    {
      "epoch": 0.17751479289940827,
      "grad_norm": 0.39425457725953256,
      "learning_rate": 2.897921081337456e-06,
      "loss": 2.0966,
      "step": 30
    },
    {
      "epoch": 0.1834319526627219,
      "grad_norm": 0.6235048469037876,
      "learning_rate": 2.8885899763902215e-06,
      "loss": 2.0379,
      "step": 31
    },
    {
      "epoch": 0.1893491124260355,
      "grad_norm": 0.5041187119001387,
      "learning_rate": 2.878872952123519e-06,
      "loss": 1.969,
      "step": 32
    },
    {
      "epoch": 0.1952662721893491,
      "grad_norm": 0.5682354341756487,
      "learning_rate": 2.8687737082457906e-06,
      "loss": 1.9563,
      "step": 33
    },
    {
      "epoch": 0.20118343195266272,
      "grad_norm": 0.45352123274250616,
      "learning_rate": 2.8582960899936856e-06,
      "loss": 1.9577,
      "step": 34
    },
    {
      "epoch": 0.20118343195266272,
      "eval_loss": 2.0715060234069824,
      "eval_runtime": 197.0,
      "eval_samples_per_second": 0.939,
      "eval_steps_per_second": 0.081,
      "step": 34
    }
  ],
  "logging_steps": 1,
  "max_steps": 169,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 34,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 8316801515520.0,
  "train_batch_size": 3,
  "trial_name": null,
  "trial_params": null
}