{ "best_metric": 0.5568962693214417, "best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e4l57-l/checkpoint-18500", "epoch": 2.138917032534054, "eval_steps": 500, "global_step": 19000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.056287290329843524, "grad_norm": 150.1642303466797, "learning_rate": 4.929640887087696e-07, "loss": 0.3486, "step": 500 }, { "epoch": 0.056287290329843524, "eval_loss": 1.1265727281570435, "eval_runtime": 123.7806, "eval_samples_per_second": 15.948, "eval_steps_per_second": 1.995, "step": 500 }, { "epoch": 0.11257458065968705, "grad_norm": 366.6913146972656, "learning_rate": 4.859281774175391e-07, "loss": 0.2733, "step": 1000 }, { "epoch": 0.11257458065968705, "eval_loss": 0.9742079377174377, "eval_runtime": 125.8047, "eval_samples_per_second": 15.691, "eval_steps_per_second": 1.963, "step": 1000 }, { "epoch": 0.16886187098953057, "grad_norm": 555.5016479492188, "learning_rate": 4.788922661263087e-07, "loss": 0.1851, "step": 1500 }, { "epoch": 0.16886187098953057, "eval_loss": 0.9162159562110901, "eval_runtime": 126.7606, "eval_samples_per_second": 15.573, "eval_steps_per_second": 1.949, "step": 1500 }, { "epoch": 0.2251491613193741, "grad_norm": 0.423260897397995, "learning_rate": 4.7185635483507824e-07, "loss": 0.1973, "step": 2000 }, { "epoch": 0.2251491613193741, "eval_loss": 0.8716973662376404, "eval_runtime": 127.6955, "eval_samples_per_second": 15.459, "eval_steps_per_second": 1.934, "step": 2000 }, { "epoch": 0.2814364516492176, "grad_norm": 6.840139389038086, "learning_rate": 4.6482044354384774e-07, "loss": 0.1881, "step": 2500 }, { "epoch": 0.2814364516492176, "eval_loss": 0.830047070980072, "eval_runtime": 128.0771, "eval_samples_per_second": 15.413, "eval_steps_per_second": 1.929, "step": 2500 }, { "epoch": 0.33772374197906113, "grad_norm": 729.8377685546875, "learning_rate": 4.577845322526173e-07, "loss": 0.1695, "step": 3000 }, { "epoch": 0.33772374197906113, "eval_loss": 0.8265627026557922, "eval_runtime": 128.052, "eval_samples_per_second": 15.416, "eval_steps_per_second": 1.929, "step": 3000 }, { "epoch": 0.39401103230890466, "grad_norm": 7.96999938756926e-06, "learning_rate": 4.507486209613869e-07, "loss": 0.155, "step": 3500 }, { "epoch": 0.39401103230890466, "eval_loss": 0.7721038460731506, "eval_runtime": 128.1871, "eval_samples_per_second": 15.399, "eval_steps_per_second": 1.927, "step": 3500 }, { "epoch": 0.4502983226387482, "grad_norm": 0.006555848754942417, "learning_rate": 4.4371270967015645e-07, "loss": 0.1216, "step": 4000 }, { "epoch": 0.4502983226387482, "eval_loss": 0.7490401864051819, "eval_runtime": 127.5516, "eval_samples_per_second": 15.476, "eval_steps_per_second": 1.936, "step": 4000 }, { "epoch": 0.5065856129685917, "grad_norm": 0.0004515685432124883, "learning_rate": 4.36676798378926e-07, "loss": 0.1531, "step": 4500 }, { "epoch": 0.5065856129685917, "eval_loss": 0.7533182501792908, "eval_runtime": 127.4797, "eval_samples_per_second": 15.485, "eval_steps_per_second": 1.938, "step": 4500 }, { "epoch": 0.5628729032984352, "grad_norm": 4.4633176003117114e-05, "learning_rate": 4.2964088708769556e-07, "loss": 0.1154, "step": 5000 }, { "epoch": 0.5628729032984352, "eval_loss": 0.747530460357666, "eval_runtime": 127.8675, "eval_samples_per_second": 15.438, "eval_steps_per_second": 1.932, "step": 5000 }, { "epoch": 0.6191601936282788, "grad_norm": 4.158839702606201, "learning_rate": 4.2260497579646517e-07, "loss": 0.1407, "step": 5500 }, { "epoch": 0.6191601936282788, "eval_loss": 0.724854588508606, "eval_runtime": 125.1629, "eval_samples_per_second": 15.771, "eval_steps_per_second": 1.973, "step": 5500 }, { "epoch": 0.6754474839581223, "grad_norm": 0.748630166053772, "learning_rate": 4.155690645052347e-07, "loss": 0.1447, "step": 6000 }, { "epoch": 0.6754474839581223, "eval_loss": 0.7091771960258484, "eval_runtime": 125.2549, "eval_samples_per_second": 15.76, "eval_steps_per_second": 1.972, "step": 6000 }, { "epoch": 0.7317347742879657, "grad_norm": 2.8120924980612472e-05, "learning_rate": 4.0853315321400427e-07, "loss": 0.1146, "step": 6500 }, { "epoch": 0.7317347742879657, "eval_loss": 0.7105826735496521, "eval_runtime": 127.7786, "eval_samples_per_second": 15.449, "eval_steps_per_second": 1.933, "step": 6500 }, { "epoch": 0.7880220646178093, "grad_norm": 1.4741635823156685e-07, "learning_rate": 4.014972419227738e-07, "loss": 0.171, "step": 7000 }, { "epoch": 0.7880220646178093, "eval_loss": 0.6978534460067749, "eval_runtime": 125.2567, "eval_samples_per_second": 15.76, "eval_steps_per_second": 1.972, "step": 7000 }, { "epoch": 0.8443093549476528, "grad_norm": 0.002975167240947485, "learning_rate": 3.944613306315434e-07, "loss": 0.1584, "step": 7500 }, { "epoch": 0.8443093549476528, "eval_loss": 0.6923494935035706, "eval_runtime": 125.3384, "eval_samples_per_second": 15.749, "eval_steps_per_second": 1.971, "step": 7500 }, { "epoch": 0.9005966452774964, "grad_norm": 9.225498797604814e-05, "learning_rate": 3.8742541934031293e-07, "loss": 0.1384, "step": 8000 }, { "epoch": 0.9005966452774964, "eval_loss": 0.6872902512550354, "eval_runtime": 128.114, "eval_samples_per_second": 15.408, "eval_steps_per_second": 1.928, "step": 8000 }, { "epoch": 0.9568839356073399, "grad_norm": 1.9474915902151224e-08, "learning_rate": 3.803895080490825e-07, "loss": 0.1243, "step": 8500 }, { "epoch": 0.9568839356073399, "eval_loss": 0.6788680553436279, "eval_runtime": 125.0525, "eval_samples_per_second": 15.785, "eval_steps_per_second": 1.975, "step": 8500 }, { "epoch": 1.0131712259371835, "grad_norm": 5.165647506713867, "learning_rate": 3.7335359675785204e-07, "loss": 0.1102, "step": 9000 }, { "epoch": 1.0131712259371835, "eval_loss": 0.6788864135742188, "eval_runtime": 124.7777, "eval_samples_per_second": 15.82, "eval_steps_per_second": 1.98, "step": 9000 }, { "epoch": 1.069458516267027, "grad_norm": 0.0005840375670231879, "learning_rate": 3.6631768546662164e-07, "loss": 0.0523, "step": 9500 }, { "epoch": 1.069458516267027, "eval_loss": 0.6628187894821167, "eval_runtime": 127.8018, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.933, "step": 9500 }, { "epoch": 1.1257458065968704, "grad_norm": 0.001191094284877181, "learning_rate": 3.592817741753912e-07, "loss": 0.0617, "step": 10000 }, { "epoch": 1.1257458065968704, "eval_loss": 0.6507942080497742, "eval_runtime": 125.484, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.968, "step": 10000 }, { "epoch": 1.1820330969267139, "grad_norm": 0.0003092484548687935, "learning_rate": 3.5224586288416075e-07, "loss": 0.0598, "step": 10500 }, { "epoch": 1.1820330969267139, "eval_loss": 0.6480408906936646, "eval_runtime": 125.7661, "eval_samples_per_second": 15.696, "eval_steps_per_second": 1.964, "step": 10500 }, { "epoch": 1.2383203872565574, "grad_norm": 5.508670710696606e-07, "learning_rate": 3.452099515929303e-07, "loss": 0.0782, "step": 11000 }, { "epoch": 1.2383203872565574, "eval_loss": 0.6403237581253052, "eval_runtime": 128.3947, "eval_samples_per_second": 15.374, "eval_steps_per_second": 1.924, "step": 11000 }, { "epoch": 1.294607677586401, "grad_norm": 0.1470593810081482, "learning_rate": 3.3817404030169986e-07, "loss": 0.0616, "step": 11500 }, { "epoch": 1.294607677586401, "eval_loss": 0.6376460790634155, "eval_runtime": 125.5391, "eval_samples_per_second": 15.724, "eval_steps_per_second": 1.968, "step": 11500 }, { "epoch": 1.3508949679162445, "grad_norm": 1.8021532014245167e-05, "learning_rate": 3.3113812901046946e-07, "loss": 0.0723, "step": 12000 }, { "epoch": 1.3508949679162445, "eval_loss": 0.6378474831581116, "eval_runtime": 125.5914, "eval_samples_per_second": 15.718, "eval_steps_per_second": 1.967, "step": 12000 }, { "epoch": 1.407182258246088, "grad_norm": 0.00013483635848388076, "learning_rate": 3.2410221771923896e-07, "loss": 0.0459, "step": 12500 }, { "epoch": 1.407182258246088, "eval_loss": 0.6378260850906372, "eval_runtime": 128.2725, "eval_samples_per_second": 15.389, "eval_steps_per_second": 1.926, "step": 12500 }, { "epoch": 1.4634695485759315, "grad_norm": 4.757418707868055e-07, "learning_rate": 3.170663064280085e-07, "loss": 0.0571, "step": 13000 }, { "epoch": 1.4634695485759315, "eval_loss": 0.6366899609565735, "eval_runtime": 125.6986, "eval_samples_per_second": 15.704, "eval_steps_per_second": 1.965, "step": 13000 }, { "epoch": 1.5197568389057752, "grad_norm": 0.00028420978924259543, "learning_rate": 3.1003039513677807e-07, "loss": 0.0499, "step": 13500 }, { "epoch": 1.5197568389057752, "eval_loss": 0.6286123394966125, "eval_runtime": 127.9165, "eval_samples_per_second": 15.432, "eval_steps_per_second": 1.931, "step": 13500 }, { "epoch": 1.5760441292356187, "grad_norm": 2.1999912291903456e-07, "learning_rate": 3.0299448384554767e-07, "loss": 0.0679, "step": 14000 }, { "epoch": 1.5760441292356187, "eval_loss": 0.634914755821228, "eval_runtime": 125.0396, "eval_samples_per_second": 15.787, "eval_steps_per_second": 1.975, "step": 14000 }, { "epoch": 1.6323314195654621, "grad_norm": 0.00023505109129473567, "learning_rate": 2.959585725543172e-07, "loss": 0.0554, "step": 14500 }, { "epoch": 1.6323314195654621, "eval_loss": 0.623322069644928, "eval_runtime": 125.2429, "eval_samples_per_second": 15.761, "eval_steps_per_second": 1.972, "step": 14500 }, { "epoch": 1.6886187098953056, "grad_norm": 11.346132278442383, "learning_rate": 2.889226612630868e-07, "loss": 0.0617, "step": 15000 }, { "epoch": 1.6886187098953056, "eval_loss": 0.6115825772285461, "eval_runtime": 127.8813, "eval_samples_per_second": 15.436, "eval_steps_per_second": 1.931, "step": 15000 }, { "epoch": 1.744906000225149, "grad_norm": 308.43804931640625, "learning_rate": 2.8188674997185633e-07, "loss": 0.0561, "step": 15500 }, { "epoch": 1.744906000225149, "eval_loss": 0.5951005220413208, "eval_runtime": 125.2491, "eval_samples_per_second": 15.761, "eval_steps_per_second": 1.972, "step": 15500 }, { "epoch": 1.8011932905549926, "grad_norm": 6.91558055265773e-09, "learning_rate": 2.7485083868062594e-07, "loss": 0.0672, "step": 16000 }, { "epoch": 1.8011932905549926, "eval_loss": 0.5809568166732788, "eval_runtime": 125.2557, "eval_samples_per_second": 15.76, "eval_steps_per_second": 1.972, "step": 16000 }, { "epoch": 1.8574805808848363, "grad_norm": 2.2250075915053458e-07, "learning_rate": 2.678149273893955e-07, "loss": 0.0812, "step": 16500 }, { "epoch": 1.8574805808848363, "eval_loss": 0.5691242218017578, "eval_runtime": 128.3032, "eval_samples_per_second": 15.385, "eval_steps_per_second": 1.925, "step": 16500 }, { "epoch": 1.9137678712146797, "grad_norm": 1.1724663972854614, "learning_rate": 2.6077901609816504e-07, "loss": 0.0614, "step": 17000 }, { "epoch": 1.9137678712146797, "eval_loss": 0.5643230080604553, "eval_runtime": 125.7191, "eval_samples_per_second": 15.702, "eval_steps_per_second": 1.965, "step": 17000 }, { "epoch": 1.9700551615445232, "grad_norm": 0.00030226208036765456, "learning_rate": 2.5374310480693454e-07, "loss": 0.0462, "step": 17500 }, { "epoch": 1.9700551615445232, "eval_loss": 0.5657238364219666, "eval_runtime": 124.9923, "eval_samples_per_second": 15.793, "eval_steps_per_second": 1.976, "step": 17500 }, { "epoch": 2.026342451874367, "grad_norm": 1.949754704355655e-07, "learning_rate": 2.4670719351570415e-07, "loss": 0.0565, "step": 18000 }, { "epoch": 2.026342451874367, "eval_loss": 0.5607953667640686, "eval_runtime": 127.7187, "eval_samples_per_second": 15.456, "eval_steps_per_second": 1.934, "step": 18000 }, { "epoch": 2.0826297422042104, "grad_norm": 14.2212553024292, "learning_rate": 2.396712822244737e-07, "loss": 0.0549, "step": 18500 }, { "epoch": 2.0826297422042104, "eval_loss": 0.5568962693214417, "eval_runtime": 125.0503, "eval_samples_per_second": 15.786, "eval_steps_per_second": 1.975, "step": 18500 }, { "epoch": 2.138917032534054, "grad_norm": 1.2045973539352417, "learning_rate": 2.3263537093324326e-07, "loss": 0.0245, "step": 19000 }, { "epoch": 2.138917032534054, "eval_loss": 0.5595241785049438, "eval_runtime": 124.8527, "eval_samples_per_second": 15.811, "eval_steps_per_second": 1.978, "step": 19000 } ], "logging_steps": 500, "max_steps": 35532, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6835768432291260.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }