{ "best_metric": 0.8068910256410257, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-pos-ud-Tamil-TTB/checkpoint-1000", "epoch": 269.2307692307692, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.69, "learning_rate": 8e-05, "loss": 1.509, "step": 100 }, { "epoch": 15.38, "learning_rate": 7.946308724832215e-05, "loss": 0.274, "step": 200 }, { "epoch": 23.08, "learning_rate": 7.89261744966443e-05, "loss": 0.0679, "step": 300 }, { "epoch": 30.77, "learning_rate": 7.838926174496645e-05, "loss": 0.0307, "step": 400 }, { "epoch": 38.46, "learning_rate": 7.78523489932886e-05, "loss": 0.025, "step": 500 }, { "epoch": 38.46, "eval_accuracy": 0.7892628205128205, "eval_loss": 1.4348915815353394, "eval_runtime": 0.3441, "eval_samples_per_second": 232.482, "eval_steps_per_second": 29.06, "step": 500 }, { "epoch": 46.15, "learning_rate": 7.731543624161075e-05, "loss": 0.018, "step": 600 }, { "epoch": 53.85, "learning_rate": 7.677852348993288e-05, "loss": 0.0152, "step": 700 }, { "epoch": 61.54, "learning_rate": 7.624161073825503e-05, "loss": 0.0113, "step": 800 }, { "epoch": 69.23, "learning_rate": 7.570469798657718e-05, "loss": 0.0081, "step": 900 }, { "epoch": 76.92, "learning_rate": 7.516778523489933e-05, "loss": 0.0092, "step": 1000 }, { "epoch": 76.92, "eval_accuracy": 0.8068910256410257, "eval_loss": 1.4408690929412842, "eval_runtime": 0.3248, "eval_samples_per_second": 246.316, "eval_steps_per_second": 30.79, "step": 1000 }, { "epoch": 84.62, "learning_rate": 7.463087248322148e-05, "loss": 0.0069, "step": 1100 }, { "epoch": 92.31, "learning_rate": 7.409395973154362e-05, "loss": 0.0067, "step": 1200 }, { "epoch": 100.0, "learning_rate": 7.355704697986577e-05, "loss": 0.0069, "step": 1300 }, { "epoch": 107.69, "learning_rate": 7.302013422818792e-05, "loss": 0.0053, "step": 1400 }, { "epoch": 115.38, "learning_rate": 7.248322147651007e-05, "loss": 0.0046, "step": 1500 }, { "epoch": 115.38, "eval_accuracy": 0.780448717948718, "eval_loss": 1.9734928607940674, "eval_runtime": 0.3276, "eval_samples_per_second": 244.204, "eval_steps_per_second": 30.525, "step": 1500 }, { "epoch": 123.08, "learning_rate": 7.194630872483222e-05, "loss": 0.0049, "step": 1600 }, { "epoch": 130.77, "learning_rate": 7.140939597315438e-05, "loss": 0.0045, "step": 1700 }, { "epoch": 138.46, "learning_rate": 7.087248322147653e-05, "loss": 0.0052, "step": 1800 }, { "epoch": 146.15, "learning_rate": 7.033557046979866e-05, "loss": 0.004, "step": 1900 }, { "epoch": 153.85, "learning_rate": 6.979865771812081e-05, "loss": 0.0036, "step": 2000 }, { "epoch": 153.85, "eval_accuracy": 0.7996794871794872, "eval_loss": 1.8069559335708618, "eval_runtime": 0.3276, "eval_samples_per_second": 244.205, "eval_steps_per_second": 30.526, "step": 2000 }, { "epoch": 161.54, "learning_rate": 6.926174496644296e-05, "loss": 0.0057, "step": 2100 }, { "epoch": 169.23, "learning_rate": 6.87248322147651e-05, "loss": 0.0055, "step": 2200 }, { "epoch": 176.92, "learning_rate": 6.818791946308725e-05, "loss": 0.0049, "step": 2300 }, { "epoch": 184.62, "learning_rate": 6.76510067114094e-05, "loss": 0.0021, "step": 2400 }, { "epoch": 192.31, "learning_rate": 6.711409395973155e-05, "loss": 0.0042, "step": 2500 }, { "epoch": 192.31, "eval_accuracy": 0.7780448717948718, "eval_loss": 1.728083610534668, "eval_runtime": 0.3279, "eval_samples_per_second": 243.965, "eval_steps_per_second": 30.496, "step": 2500 }, { "epoch": 200.0, "learning_rate": 6.65771812080537e-05, "loss": 0.0044, "step": 2600 }, { "epoch": 207.69, "learning_rate": 6.604026845637585e-05, "loss": 0.0035, "step": 2700 }, { "epoch": 215.38, "learning_rate": 6.5503355704698e-05, "loss": 0.0032, "step": 2800 }, { "epoch": 223.08, "learning_rate": 6.496644295302014e-05, "loss": 0.0033, "step": 2900 }, { "epoch": 230.77, "learning_rate": 6.442953020134228e-05, "loss": 0.0027, "step": 3000 }, { "epoch": 230.77, "eval_accuracy": 0.8004807692307693, "eval_loss": 1.849981665611267, "eval_runtime": 0.3277, "eval_samples_per_second": 244.159, "eval_steps_per_second": 30.52, "step": 3000 }, { "epoch": 238.46, "learning_rate": 6.389261744966443e-05, "loss": 0.0027, "step": 3100 }, { "epoch": 246.15, "learning_rate": 6.335570469798657e-05, "loss": 0.0017, "step": 3200 }, { "epoch": 253.85, "learning_rate": 6.281879194630872e-05, "loss": 0.0016, "step": 3300 }, { "epoch": 261.54, "learning_rate": 6.228187919463087e-05, "loss": 0.0016, "step": 3400 }, { "epoch": 269.23, "learning_rate": 6.174496644295302e-05, "loss": 0.0022, "step": 3500 }, { "epoch": 269.23, "eval_accuracy": 0.8060897435897436, "eval_loss": 1.7650340795516968, "eval_runtime": 0.3308, "eval_samples_per_second": 241.835, "eval_steps_per_second": 30.229, "step": 3500 }, { "epoch": 269.23, "step": 3500, "total_flos": 1.768647275040768e+16, "train_loss": 0.05915338551998139, "train_runtime": 490.6715, "train_samples_per_second": 978.251, "train_steps_per_second": 30.57 } ], "max_steps": 15000, "num_train_epochs": 1154, "total_flos": 1.768647275040768e+16, "trial_name": null, "trial_params": null }