{ "best_metric": 0.5494681000709534, "best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e3l57-l/checkpoint-10500", "epoch": 1.1198805460750854, "eval_steps": 500, "global_step": 10500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05332764505119454, "grad_norm": 414.62841796875, "learning_rate": 4.911120591581342e-07, "loss": 0.3812, "step": 500 }, { "epoch": 0.05332764505119454, "eval_loss": 1.1163015365600586, "eval_runtime": 61.8962, "eval_samples_per_second": 15.946, "eval_steps_per_second": 2.003, "step": 500 }, { "epoch": 0.10665529010238908, "grad_norm": 34.08418655395508, "learning_rate": 4.822241183162685e-07, "loss": 0.2683, "step": 1000 }, { "epoch": 0.10665529010238908, "eval_loss": 0.9684447646141052, "eval_runtime": 61.821, "eval_samples_per_second": 15.965, "eval_steps_per_second": 2.006, "step": 1000 }, { "epoch": 0.1599829351535836, "grad_norm": 350.4574890136719, "learning_rate": 4.733361774744027e-07, "loss": 0.2119, "step": 1500 }, { "epoch": 0.1599829351535836, "eval_loss": 0.9099885821342468, "eval_runtime": 61.9606, "eval_samples_per_second": 15.929, "eval_steps_per_second": 2.001, "step": 1500 }, { "epoch": 0.21331058020477817, "grad_norm": 0.01787523180246353, "learning_rate": 4.6444823663253695e-07, "loss": 0.1889, "step": 2000 }, { "epoch": 0.21331058020477817, "eval_loss": 0.8620074987411499, "eval_runtime": 63.2824, "eval_samples_per_second": 15.597, "eval_steps_per_second": 1.959, "step": 2000 }, { "epoch": 0.2666382252559727, "grad_norm": 118.66907501220703, "learning_rate": 4.5556029579067116e-07, "loss": 0.2071, "step": 2500 }, { "epoch": 0.2666382252559727, "eval_loss": 0.791786253452301, "eval_runtime": 63.7932, "eval_samples_per_second": 15.472, "eval_steps_per_second": 1.944, "step": 2500 }, { "epoch": 0.3199658703071672, "grad_norm": 7.21087323185543e-09, "learning_rate": 4.4667235494880547e-07, "loss": 0.1588, "step": 3000 }, { "epoch": 0.3199658703071672, "eval_loss": 0.7657254338264465, "eval_runtime": 62.4738, "eval_samples_per_second": 15.799, "eval_steps_per_second": 1.985, "step": 3000 }, { "epoch": 0.37329351535836175, "grad_norm": 0.0014836661284789443, "learning_rate": 4.377844141069397e-07, "loss": 0.1718, "step": 3500 }, { "epoch": 0.37329351535836175, "eval_loss": 0.7610095739364624, "eval_runtime": 63.9118, "eval_samples_per_second": 15.443, "eval_steps_per_second": 1.94, "step": 3500 }, { "epoch": 0.42662116040955633, "grad_norm": 3.2739710807800293, "learning_rate": 4.2889647326507393e-07, "loss": 0.1113, "step": 4000 }, { "epoch": 0.42662116040955633, "eval_loss": 0.7458393573760986, "eval_runtime": 62.1803, "eval_samples_per_second": 15.873, "eval_steps_per_second": 1.994, "step": 4000 }, { "epoch": 0.47994880546075086, "grad_norm": 1.788787841796875, "learning_rate": 4.2000853242320814e-07, "loss": 0.1313, "step": 4500 }, { "epoch": 0.47994880546075086, "eval_loss": 0.7168156504631042, "eval_runtime": 62.1477, "eval_samples_per_second": 15.882, "eval_steps_per_second": 1.995, "step": 4500 }, { "epoch": 0.5332764505119454, "grad_norm": 4.031916250823997e-05, "learning_rate": 4.1112059158134245e-07, "loss": 0.1649, "step": 5000 }, { "epoch": 0.5332764505119454, "eval_loss": 0.7019046545028687, "eval_runtime": 62.2092, "eval_samples_per_second": 15.866, "eval_steps_per_second": 1.993, "step": 5000 }, { "epoch": 0.58660409556314, "grad_norm": 3.883213139488362e-06, "learning_rate": 4.0223265073947665e-07, "loss": 0.1245, "step": 5500 }, { "epoch": 0.58660409556314, "eval_loss": 0.6812178492546082, "eval_runtime": 62.492, "eval_samples_per_second": 15.794, "eval_steps_per_second": 1.984, "step": 5500 }, { "epoch": 0.6399317406143344, "grad_norm": 80.17021942138672, "learning_rate": 3.933447098976109e-07, "loss": 0.1286, "step": 6000 }, { "epoch": 0.6399317406143344, "eval_loss": 0.6501584649085999, "eval_runtime": 62.5236, "eval_samples_per_second": 15.786, "eval_steps_per_second": 1.983, "step": 6000 }, { "epoch": 0.693259385665529, "grad_norm": 0.0001418297761119902, "learning_rate": 3.8445676905574517e-07, "loss": 0.1076, "step": 6500 }, { "epoch": 0.693259385665529, "eval_loss": 0.6153982281684875, "eval_runtime": 62.2927, "eval_samples_per_second": 15.845, "eval_steps_per_second": 1.991, "step": 6500 }, { "epoch": 0.7465870307167235, "grad_norm": 5.804526495012396e-07, "learning_rate": 3.755688282138794e-07, "loss": 0.1477, "step": 7000 }, { "epoch": 0.7465870307167235, "eval_loss": 0.6118016839027405, "eval_runtime": 63.1588, "eval_samples_per_second": 15.627, "eval_steps_per_second": 1.963, "step": 7000 }, { "epoch": 0.7999146757679181, "grad_norm": 2.66489315032959, "learning_rate": 3.6668088737201363e-07, "loss": 0.1315, "step": 7500 }, { "epoch": 0.7999146757679181, "eval_loss": 0.601563036441803, "eval_runtime": 62.9163, "eval_samples_per_second": 15.688, "eval_steps_per_second": 1.971, "step": 7500 }, { "epoch": 0.8532423208191127, "grad_norm": 851.2547607421875, "learning_rate": 3.5779294653014783e-07, "loss": 0.1413, "step": 8000 }, { "epoch": 0.8532423208191127, "eval_loss": 0.584888756275177, "eval_runtime": 62.6735, "eval_samples_per_second": 15.748, "eval_steps_per_second": 1.979, "step": 8000 }, { "epoch": 0.9065699658703071, "grad_norm": 6.041275628376752e-05, "learning_rate": 3.4890500568828214e-07, "loss": 0.124, "step": 8500 }, { "epoch": 0.9065699658703071, "eval_loss": 0.5766209363937378, "eval_runtime": 63.3493, "eval_samples_per_second": 15.58, "eval_steps_per_second": 1.957, "step": 8500 }, { "epoch": 0.9598976109215017, "grad_norm": 3.0901598930358887, "learning_rate": 3.4001706484641635e-07, "loss": 0.1215, "step": 9000 }, { "epoch": 0.9598976109215017, "eval_loss": 0.5558739304542542, "eval_runtime": 63.0862, "eval_samples_per_second": 15.645, "eval_steps_per_second": 1.966, "step": 9000 }, { "epoch": 1.0132252559726962, "grad_norm": 2509.93212890625, "learning_rate": 3.311291240045506e-07, "loss": 0.131, "step": 9500 }, { "epoch": 1.0132252559726962, "eval_loss": 0.563326358795166, "eval_runtime": 62.3607, "eval_samples_per_second": 15.827, "eval_steps_per_second": 1.988, "step": 9500 }, { "epoch": 1.0665529010238908, "grad_norm": 32.05735778808594, "learning_rate": 3.2224118316268486e-07, "loss": 0.0348, "step": 10000 }, { "epoch": 1.0665529010238908, "eval_loss": 0.553093671798706, "eval_runtime": 62.9895, "eval_samples_per_second": 15.669, "eval_steps_per_second": 1.969, "step": 10000 }, { "epoch": 1.1198805460750854, "grad_norm": 0.004324838053435087, "learning_rate": 3.133532423208191e-07, "loss": 0.0687, "step": 10500 }, { "epoch": 1.1198805460750854, "eval_loss": 0.5494681000709534, "eval_runtime": 62.3211, "eval_samples_per_second": 15.837, "eval_steps_per_second": 1.99, "step": 10500 } ], "logging_steps": 500, "max_steps": 28128, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3778182204932520.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }