{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.954646181119811, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 6.249924149859892e-05, "loss": 2.9841, "step": 2500 }, { "epoch": 0.07, "eval_loss": 2.880531072616577, "eval_runtime": 5041.8037, "eval_samples_per_second": 11.934, "eval_steps_per_second": 2.984, "step": 2500 }, { "epoch": 0.15, "learning_rate": 6.249460454641857e-05, "loss": 2.9159, "step": 5000 }, { "epoch": 0.15, "eval_loss": 2.843543291091919, "eval_runtime": 5041.0801, "eval_samples_per_second": 11.936, "eval_steps_per_second": 2.984, "step": 5000 }, { "epoch": 0.22, "learning_rate": 6.248574782066793e-05, "loss": 2.8874, "step": 7500 }, { "epoch": 0.22, "eval_loss": 2.8224122524261475, "eval_runtime": 5041.8102, "eval_samples_per_second": 11.934, "eval_steps_per_second": 2.984, "step": 7500 }, { "epoch": 0.3, "learning_rate": 6.24726796015593e-05, "loss": 2.867, "step": 10000 }, { "epoch": 0.3, "eval_loss": 2.805563449859619, "eval_runtime": 5045.7151, "eval_samples_per_second": 11.925, "eval_steps_per_second": 2.981, "step": 10000 }, { "epoch": 0.37, "learning_rate": 6.245539895286441e-05, "loss": 2.8462, "step": 12500 }, { "epoch": 0.44, "learning_rate": 6.243390820601403e-05, "loss": 2.8343, "step": 15000 }, { "epoch": 0.52, "learning_rate": 6.240821026044725e-05, "loss": 2.8232, "step": 17500 }, { "epoch": 0.59, "learning_rate": 6.237830858322031e-05, "loss": 2.8154, "step": 20000 }, { "epoch": 0.59, "eval_loss": 2.7630834579467773, "eval_runtime": 5044.7761, "eval_samples_per_second": 11.927, "eval_steps_per_second": 2.982, "step": 20000 }, { "epoch": 0.66, "learning_rate": 6.234420720853886e-05, "loss": 2.8085, "step": 22500 }, { "epoch": 0.74, "learning_rate": 6.230591073721361e-05, "loss": 2.7959, "step": 25000 }, { "epoch": 0.81, "learning_rate": 6.22634243360397e-05, "loss": 2.7934, "step": 27500 }, { "epoch": 0.89, "learning_rate": 6.221675373709958e-05, "loss": 2.7856, "step": 30000 }, { "epoch": 0.89, "eval_loss": 2.7356297969818115, "eval_runtime": 5045.7013, "eval_samples_per_second": 11.925, "eval_steps_per_second": 2.981, "step": 30000 }, { "epoch": 0.96, "learning_rate": 6.216590523698961e-05, "loss": 2.7796, "step": 32500 }, { "epoch": 1.03, "learning_rate": 6.211090854583099e-05, "loss": 2.7579, "step": 35000 }, { "epoch": 1.11, "learning_rate": 6.205172705145689e-05, "loss": 2.7374, "step": 37500 }, { "epoch": 1.18, "learning_rate": 6.198836374494218e-05, "loss": 2.7324, "step": 40000 }, { "epoch": 1.18, "eval_loss": 2.7184464931488037, "eval_runtime": 5041.2928, "eval_samples_per_second": 11.935, "eval_steps_per_second": 2.984, "step": 40000 }, { "epoch": 1.26, "learning_rate": 6.192087786506709e-05, "loss": 2.7345, "step": 42500 }, { "epoch": 1.33, "learning_rate": 6.18492540023217e-05, "loss": 2.7306, "step": 45000 }, { "epoch": 1.4, "learning_rate": 6.177350181988941e-05, "loss": 2.7291, "step": 47500 }, { "epoch": 1.48, "learning_rate": 6.169363153792874e-05, "loss": 2.7255, "step": 50000 }, { "epoch": 1.48, "eval_loss": 2.7048535346984863, "eval_runtime": 5061.5104, "eval_samples_per_second": 11.888, "eval_steps_per_second": 2.972, "step": 50000 }, { "epoch": 1.55, "learning_rate": 6.160961950708177e-05, "loss": 2.725, "step": 52500 }, { "epoch": 1.63, "learning_rate": 6.152154427075951e-05, "loss": 2.7212, "step": 55000 }, { "epoch": 1.7, "learning_rate": 6.142938492793726e-05, "loss": 2.7196, "step": 57500 }, { "epoch": 1.77, "learning_rate": 6.133315391235702e-05, "loss": 2.7169, "step": 60000 }, { "epoch": 1.77, "eval_loss": 2.6941745281219482, "eval_runtime": 5046.2528, "eval_samples_per_second": 11.924, "eval_steps_per_second": 2.981, "step": 60000 }, { "epoch": 1.85, "learning_rate": 6.12328232645584e-05, "loss": 2.7174, "step": 62500 }, { "epoch": 1.92, "learning_rate": 6.112848678433687e-05, "loss": 2.7153, "step": 65000 }, { "epoch": 1.99, "learning_rate": 6.102011922724016e-05, "loss": 2.7117, "step": 67500 }, { "epoch": 2.07, "learning_rate": 6.090778098663474e-05, "loss": 2.6718, "step": 70000 }, { "epoch": 2.07, "eval_loss": 2.6850435733795166, "eval_runtime": 5045.0709, "eval_samples_per_second": 11.926, "eval_steps_per_second": 2.982, "step": 70000 }, { "epoch": 2.14, "learning_rate": 6.0791397277177804e-05, "loss": 2.6687, "step": 72500 }, { "epoch": 2.22, "learning_rate": 6.0671027969511556e-05, "loss": 2.6724, "step": 75000 }, { "epoch": 2.29, "learning_rate": 6.0546638755690396e-05, "loss": 2.6703, "step": 77500 }, { "epoch": 2.36, "learning_rate": 6.041839805391616e-05, "loss": 2.6717, "step": 80000 }, { "epoch": 2.36, "eval_loss": 2.67754864692688, "eval_runtime": 5038.9811, "eval_samples_per_second": 11.941, "eval_steps_per_second": 2.985, "step": 80000 }, { "epoch": 2.44, "learning_rate": 6.028617152972819e-05, "loss": 2.6733, "step": 82500 }, { "epoch": 2.51, "learning_rate": 6.0150027570214874e-05, "loss": 2.6751, "step": 85000 }, { "epoch": 2.59, "learning_rate": 6.000998454333341e-05, "loss": 2.6738, "step": 87500 }, { "epoch": 2.66, "learning_rate": 5.9866061343086405e-05, "loss": 2.6747, "step": 90000 }, { "epoch": 2.66, "eval_loss": 2.6707887649536133, "eval_runtime": 5038.9349, "eval_samples_per_second": 11.941, "eval_steps_per_second": 2.985, "step": 90000 }, { "epoch": 2.73, "learning_rate": 5.971821747960996e-05, "loss": 2.676, "step": 92500 }, { "epoch": 2.81, "learning_rate": 5.9566591173123494e-05, "loss": 2.6739, "step": 95000 }, { "epoch": 2.88, "learning_rate": 5.941120747883403e-05, "loss": 2.6756, "step": 97500 }, { "epoch": 2.95, "learning_rate": 5.925196295535967e-05, "loss": 2.6714, "step": 100000 }, { "epoch": 2.95, "eval_loss": 2.663831949234009, "eval_runtime": 5038.524, "eval_samples_per_second": 11.942, "eval_steps_per_second": 2.986, "step": 100000 } ], "max_steps": 676900, "num_train_epochs": 20, "total_flos": 2.418000801633927e+18, "trial_name": null, "trial_params": null }