{ "best_metric": 1.0816909074783325, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq/checkpoint-2114", "epoch": 7.0, "eval_steps": 500, "global_step": 7399, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.3965729475021362, "learning_rate": 4.865067829457365e-05, "loss": 1.7995, "step": 1057 }, { "epoch": 1.0, "eval_bleu": 0.3157163520866108, "eval_loss": 1.1331889629364014, "eval_rouge1": 0.45516049357610144, "eval_rouge2": 0.2017132883532014, "eval_rougeL": 0.453144508008762, "eval_runtime": 20.354, "eval_samples_per_second": 103.812, "eval_steps_per_second": 13.02, "step": 1057 }, { "epoch": 2.0, "grad_norm": 1.2999993562698364, "learning_rate": 4.609011627906977e-05, "loss": 0.5906, "step": 2114 }, { "epoch": 2.0, "eval_bleu": 0.34996698643265867, "eval_loss": 1.0816909074783325, "eval_rouge1": 0.5070888923629823, "eval_rouge2": 0.24860353236577704, "eval_rougeL": 0.5048066808619132, "eval_runtime": 18.5352, "eval_samples_per_second": 113.999, "eval_steps_per_second": 14.297, "step": 2114 }, { "epoch": 3.0, "grad_norm": 1.494644284248352, "learning_rate": 4.3529554263565894e-05, "loss": 0.484, "step": 3171 }, { "epoch": 3.0, "eval_bleu": 0.3592334262176334, "eval_loss": 1.1038055419921875, "eval_rouge1": 0.5329660042734197, "eval_rouge2": 0.2737288088550716, "eval_rougeL": 0.530301732660456, "eval_runtime": 11.0163, "eval_samples_per_second": 191.807, "eval_steps_per_second": 24.055, "step": 3171 }, { "epoch": 4.0, "grad_norm": 2.169060230255127, "learning_rate": 4.096899224806201e-05, "loss": 0.4017, "step": 4228 }, { "epoch": 4.0, "eval_bleu": 0.3664876357280857, "eval_loss": 1.1117204427719116, "eval_rouge1": 0.5470701344699284, "eval_rouge2": 0.29154846104100973, "eval_rougeL": 0.5448816975696349, "eval_runtime": 11.4837, "eval_samples_per_second": 184.001, "eval_steps_per_second": 23.076, "step": 4228 }, { "epoch": 5.0, "grad_norm": 1.5656139850616455, "learning_rate": 3.840843023255814e-05, "loss": 0.3357, "step": 5285 }, { "epoch": 5.0, "eval_bleu": 0.36951226807058296, "eval_loss": 1.1360507011413574, "eval_rouge1": 0.5552416771197572, "eval_rouge2": 0.300977455987009, "eval_rougeL": 0.5533455418855034, "eval_runtime": 7.2292, "eval_samples_per_second": 292.287, "eval_steps_per_second": 36.657, "step": 5285 }, { "epoch": 6.0, "grad_norm": 1.9493229389190674, "learning_rate": 3.5847868217054265e-05, "loss": 0.2832, "step": 6342 }, { "epoch": 6.0, "eval_bleu": 0.37209002173126987, "eval_loss": 1.1256883144378662, "eval_rouge1": 0.5606510219414631, "eval_rouge2": 0.3089222128154705, "eval_rougeL": 0.5584217258440263, "eval_runtime": 7.0098, "eval_samples_per_second": 301.433, "eval_steps_per_second": 37.804, "step": 6342 }, { "epoch": 7.0, "grad_norm": 1.3939344882965088, "learning_rate": 3.328730620155039e-05, "loss": 0.2404, "step": 7399 }, { "epoch": 7.0, "eval_bleu": 0.37399333863753975, "eval_loss": 1.1580852270126343, "eval_rouge1": 0.5622192023319419, "eval_rouge2": 0.3102334124697639, "eval_rougeL": 0.5600347361241877, "eval_runtime": 7.2059, "eval_samples_per_second": 293.233, "eval_steps_per_second": 36.776, "step": 7399 }, { "epoch": 7.0, "step": 7399, "total_flos": 3864770445312000.0, "train_loss": 0.5907337587255773, "train_runtime": 1774.4968, "train_samples_per_second": 95.261, "train_steps_per_second": 11.913 } ], "logging_steps": 500, "max_steps": 21140, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3864770445312000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }