|
{ |
|
"best_metric": 1.0816909074783325, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq/checkpoint-2114", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 7399, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3965729475021362, |
|
"learning_rate": 4.865067829457365e-05, |
|
"loss": 1.7995, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.3157163520866108, |
|
"eval_loss": 1.1331889629364014, |
|
"eval_rouge1": 0.45516049357610144, |
|
"eval_rouge2": 0.2017132883532014, |
|
"eval_rougeL": 0.453144508008762, |
|
"eval_runtime": 20.354, |
|
"eval_samples_per_second": 103.812, |
|
"eval_steps_per_second": 13.02, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.2999993562698364, |
|
"learning_rate": 4.609011627906977e-05, |
|
"loss": 0.5906, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.34996698643265867, |
|
"eval_loss": 1.0816909074783325, |
|
"eval_rouge1": 0.5070888923629823, |
|
"eval_rouge2": 0.24860353236577704, |
|
"eval_rougeL": 0.5048066808619132, |
|
"eval_runtime": 18.5352, |
|
"eval_samples_per_second": 113.999, |
|
"eval_steps_per_second": 14.297, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.494644284248352, |
|
"learning_rate": 4.3529554263565894e-05, |
|
"loss": 0.484, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.3592334262176334, |
|
"eval_loss": 1.1038055419921875, |
|
"eval_rouge1": 0.5329660042734197, |
|
"eval_rouge2": 0.2737288088550716, |
|
"eval_rougeL": 0.530301732660456, |
|
"eval_runtime": 11.0163, |
|
"eval_samples_per_second": 191.807, |
|
"eval_steps_per_second": 24.055, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.169060230255127, |
|
"learning_rate": 4.096899224806201e-05, |
|
"loss": 0.4017, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.3664876357280857, |
|
"eval_loss": 1.1117204427719116, |
|
"eval_rouge1": 0.5470701344699284, |
|
"eval_rouge2": 0.29154846104100973, |
|
"eval_rougeL": 0.5448816975696349, |
|
"eval_runtime": 11.4837, |
|
"eval_samples_per_second": 184.001, |
|
"eval_steps_per_second": 23.076, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.5656139850616455, |
|
"learning_rate": 3.840843023255814e-05, |
|
"loss": 0.3357, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.36951226807058296, |
|
"eval_loss": 1.1360507011413574, |
|
"eval_rouge1": 0.5552416771197572, |
|
"eval_rouge2": 0.300977455987009, |
|
"eval_rougeL": 0.5533455418855034, |
|
"eval_runtime": 7.2292, |
|
"eval_samples_per_second": 292.287, |
|
"eval_steps_per_second": 36.657, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.9493229389190674, |
|
"learning_rate": 3.5847868217054265e-05, |
|
"loss": 0.2832, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.37209002173126987, |
|
"eval_loss": 1.1256883144378662, |
|
"eval_rouge1": 0.5606510219414631, |
|
"eval_rouge2": 0.3089222128154705, |
|
"eval_rougeL": 0.5584217258440263, |
|
"eval_runtime": 7.0098, |
|
"eval_samples_per_second": 301.433, |
|
"eval_steps_per_second": 37.804, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.3939344882965088, |
|
"learning_rate": 3.328730620155039e-05, |
|
"loss": 0.2404, |
|
"step": 7399 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.37399333863753975, |
|
"eval_loss": 1.1580852270126343, |
|
"eval_rouge1": 0.5622192023319419, |
|
"eval_rouge2": 0.3102334124697639, |
|
"eval_rougeL": 0.5600347361241877, |
|
"eval_runtime": 7.2059, |
|
"eval_samples_per_second": 293.233, |
|
"eval_steps_per_second": 36.776, |
|
"step": 7399 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 7399, |
|
"total_flos": 3864770445312000.0, |
|
"train_loss": 0.5907337587255773, |
|
"train_runtime": 1774.4968, |
|
"train_samples_per_second": 95.261, |
|
"train_steps_per_second": 11.913 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3864770445312000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|