nlparabic commited on
Commit
8a5211b
1 Parent(s): 34d1123

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.1581
22
- - Bleu: 0.3740
23
- - Rouge1: 0.5622
24
- - Rouge2: 0.3102
25
- - Rougel: 0.5600
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.0817
22
+ - Bleu: 0.3500
23
+ - Rouge1: 0.5071
24
+ - Rouge2: 0.2486
25
+ - Rougel: 0.5048
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 13.0,
3
- "eval_bleu": 0.23046108076912605,
4
- "eval_loss": 0.5856799483299255,
5
- "eval_rouge1": 0.552383162149263,
6
- "eval_rouge2": 0.28740939784429775,
7
- "eval_rougeL": 0.5497007000948397,
8
- "eval_runtime": 7.0212,
9
- "eval_samples": 2117,
10
- "eval_samples_per_second": 301.517,
11
- "eval_steps_per_second": 37.743,
12
- "perplexity": 1.796211901795729,
13
- "total_flos": 7192716410880000.0,
14
- "train_loss": 0.055393123952289666,
15
- "train_runtime": 1209.9717,
16
- "train_samples": 8470,
17
- "train_samples_per_second": 140.003,
18
- "train_steps_per_second": 17.505
19
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "eval_bleu": 0.34996698643265867,
4
+ "eval_loss": 1.0816909074783325,
5
+ "eval_rouge1": 0.5070888923629823,
6
+ "eval_rouge2": 0.24860353236577704,
7
+ "eval_rougeL": 0.5048066808619132,
8
+ "eval_runtime": 7.2625,
9
+ "eval_samples": 2113,
10
+ "eval_samples_per_second": 290.948,
11
+ "eval_steps_per_second": 36.489,
12
+ "perplexity": 2.9496629437803548,
13
+ "total_flos": 3864770445312000.0,
14
+ "train_loss": 0.5907337587255773,
15
+ "train_runtime": 1774.4968,
16
+ "train_samples": 8452,
17
+ "train_samples_per_second": 95.261,
18
+ "train_steps_per_second": 11.913
19
  }
egy_training_log.txt CHANGED
@@ -158,3 +158,5 @@ INFO:root:Epoch 6.0: Train Loss = 0.3357, Eval Loss = 1.1360507011413574
158
  INFO:absl:Using default tokenizer.
159
  INFO:root:Epoch 7.0: Train Loss = 0.2832, Eval Loss = 1.1256883144378662
160
  INFO:absl:Using default tokenizer.
 
 
 
158
  INFO:absl:Using default tokenizer.
159
  INFO:root:Epoch 7.0: Train Loss = 0.2832, Eval Loss = 1.1256883144378662
160
  INFO:absl:Using default tokenizer.
161
+ INFO:__main__:*** Evaluate ***
162
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 13.0,
3
- "eval_bleu": 0.23046108076912605,
4
- "eval_loss": 0.5856799483299255,
5
- "eval_rouge1": 0.552383162149263,
6
- "eval_rouge2": 0.28740939784429775,
7
- "eval_rougeL": 0.5497007000948397,
8
- "eval_runtime": 7.0212,
9
- "eval_samples": 2117,
10
- "eval_samples_per_second": 301.517,
11
- "eval_steps_per_second": 37.743,
12
- "perplexity": 1.796211901795729
13
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "eval_bleu": 0.34996698643265867,
4
+ "eval_loss": 1.0816909074783325,
5
+ "eval_rouge1": 0.5070888923629823,
6
+ "eval_rouge2": 0.24860353236577704,
7
+ "eval_rougeL": 0.5048066808619132,
8
+ "eval_runtime": 7.2625,
9
+ "eval_samples": 2113,
10
+ "eval_samples_per_second": 290.948,
11
+ "eval_steps_per_second": 36.489,
12
+ "perplexity": 2.9496629437803548
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 13.0,
3
- "total_flos": 7192716410880000.0,
4
- "train_loss": 0.055393123952289666,
5
- "train_runtime": 1209.9717,
6
- "train_samples": 8470,
7
- "train_samples_per_second": 140.003,
8
- "train_steps_per_second": 17.505
9
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "total_flos": 3864770445312000.0,
4
+ "train_loss": 0.5907337587255773,
5
+ "train_runtime": 1774.4968,
6
+ "train_samples": 8452,
7
+ "train_samples_per_second": 95.261,
8
+ "train_steps_per_second": 11.913
9
  }
train_vs_val_loss.png CHANGED
trainer_state.json CHANGED
@@ -1,272 +1,158 @@
1
  {
2
- "best_metric": 0.5856799483299255,
3
- "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq/checkpoint-3177",
4
- "epoch": 13.0,
5
  "eval_steps": 500,
6
- "global_step": 13767,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.9141271114349365,
14
- "learning_rate": 4.864845261121857e-05,
15
- "loss": 1.8421,
16
- "step": 1059
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_bleu": 0.19309252278222033,
21
- "eval_loss": 0.6647776365280151,
22
- "eval_rouge1": 0.48057112484546394,
23
- "eval_rouge2": 0.21418316037711305,
24
- "eval_rougeL": 0.4779283146059673,
25
- "eval_runtime": 7.1769,
26
- "eval_samples_per_second": 294.974,
27
- "eval_steps_per_second": 36.924,
28
- "step": 1059
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 1.218380093574524,
33
- "learning_rate": 4.608800773694391e-05,
34
- "loss": 0.591,
35
- "step": 2118
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_bleu": 0.22252011703416463,
40
- "eval_loss": 0.6045418977737427,
41
- "eval_rouge1": 0.5287371210505303,
42
- "eval_rouge2": 0.2614938115780706,
43
- "eval_rougeL": 0.5264748671435004,
44
- "eval_runtime": 9.5179,
45
- "eval_samples_per_second": 222.424,
46
- "eval_steps_per_second": 27.842,
47
- "step": 2118
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.0960309505462646,
52
- "learning_rate": 4.3527562862669246e-05,
53
- "loss": 0.4845,
54
- "step": 3177
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_bleu": 0.23046108076912605,
59
- "eval_loss": 0.5856799483299255,
60
- "eval_rouge1": 0.552383162149263,
61
- "eval_rouge2": 0.28740939784429775,
62
- "eval_rougeL": 0.5497007000948397,
63
- "eval_runtime": 10.8015,
64
- "eval_samples_per_second": 195.992,
65
- "eval_steps_per_second": 24.534,
66
- "step": 3177
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.3864747285842896,
71
- "learning_rate": 4.096711798839459e-05,
72
- "loss": 0.4018,
73
- "step": 4236
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_bleu": 0.2415877566741423,
78
- "eval_loss": 0.5871986746788025,
79
- "eval_rouge1": 0.5715504611231429,
80
- "eval_rouge2": 0.3055462547357787,
81
- "eval_rougeL": 0.5693046274769865,
82
- "eval_runtime": 13.9766,
83
- "eval_samples_per_second": 151.468,
84
- "eval_steps_per_second": 18.96,
85
- "step": 4236
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 1.157429575920105,
90
- "learning_rate": 3.840667311411992e-05,
91
- "loss": 0.3351,
92
- "step": 5295
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_bleu": 0.24406004584501667,
97
- "eval_loss": 0.5902981162071228,
98
- "eval_rouge1": 0.5779981254861084,
99
- "eval_rouge2": 0.317524337500259,
100
- "eval_rougeL": 0.5758829591426086,
101
- "eval_runtime": 9.689,
102
- "eval_samples_per_second": 218.496,
103
- "eval_steps_per_second": 27.351,
104
- "step": 5295
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.5503590106964111,
109
- "learning_rate": 3.5846228239845266e-05,
110
- "loss": 0.2824,
111
- "step": 6354
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_bleu": 0.25233205020508037,
116
- "eval_loss": 0.5987895131111145,
117
- "eval_rouge1": 0.5857067061237097,
118
- "eval_rouge2": 0.3285041572893456,
119
- "eval_rougeL": 0.5837783686526783,
120
- "eval_runtime": 49.8895,
121
- "eval_samples_per_second": 42.434,
122
- "eval_steps_per_second": 5.312,
123
- "step": 6354
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.2503788471221924,
128
- "learning_rate": 3.3285783365570597e-05,
129
- "loss": 0.2395,
130
- "step": 7413
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_bleu": 0.25714921193093904,
135
- "eval_loss": 0.6094574332237244,
136
- "eval_rouge1": 0.5876661421958642,
137
- "eval_rouge2": 0.33302273991869746,
138
- "eval_rougeL": 0.5860531719581312,
139
- "eval_runtime": 23.7105,
140
- "eval_samples_per_second": 89.285,
141
- "eval_steps_per_second": 11.176,
142
- "step": 7413
143
  },
144
  {
145
- "epoch": 8.0,
146
- "grad_norm": 1.2774244546890259,
147
- "learning_rate": 3.072533849129594e-05,
148
- "loss": 0.206,
149
- "step": 8472
150
- },
151
- {
152
- "epoch": 8.0,
153
- "eval_bleu": 0.25980262957911116,
154
- "eval_loss": 0.6177937388420105,
155
- "eval_rouge1": 0.5921019219570411,
156
- "eval_rouge2": 0.33750981785667133,
157
- "eval_rougeL": 0.5897405302098737,
158
- "eval_runtime": 8.9943,
159
- "eval_samples_per_second": 235.37,
160
- "eval_steps_per_second": 29.463,
161
- "step": 8472
162
- },
163
- {
164
- "epoch": 9.0,
165
- "grad_norm": 1.7185566425323486,
166
- "learning_rate": 2.8164893617021275e-05,
167
- "loss": 0.1782,
168
- "step": 9531
169
- },
170
- {
171
- "epoch": 9.0,
172
- "eval_bleu": 0.2614863236371811,
173
- "eval_loss": 0.629348874092102,
174
- "eval_rouge1": 0.5946629982518206,
175
- "eval_rouge2": 0.3436011935268504,
176
- "eval_rougeL": 0.5923928551586273,
177
- "eval_runtime": 7.5598,
178
- "eval_samples_per_second": 280.035,
179
- "eval_steps_per_second": 35.054,
180
- "step": 9531
181
- },
182
- {
183
- "epoch": 10.0,
184
- "grad_norm": 1.2046067714691162,
185
- "learning_rate": 2.5604448742746616e-05,
186
- "loss": 0.1569,
187
- "step": 10590
188
- },
189
- {
190
- "epoch": 10.0,
191
- "eval_bleu": 0.2622683837036058,
192
- "eval_loss": 0.6383969187736511,
193
- "eval_rouge1": 0.5956308067865153,
194
- "eval_rouge2": 0.3446481345143322,
195
- "eval_rougeL": 0.5936155042273001,
196
- "eval_runtime": 6.9925,
197
- "eval_samples_per_second": 302.752,
198
- "eval_steps_per_second": 37.898,
199
- "step": 10590
200
- },
201
- {
202
- "epoch": 11.0,
203
- "grad_norm": 1.339342474937439,
204
- "learning_rate": 2.3044003868471954e-05,
205
- "loss": 0.14,
206
- "step": 11649
207
- },
208
- {
209
- "epoch": 11.0,
210
- "eval_bleu": 0.26262679811001677,
211
- "eval_loss": 0.6476752161979675,
212
- "eval_rouge1": 0.5934893482225323,
213
- "eval_rouge2": 0.34369593409547294,
214
- "eval_rougeL": 0.5911903400030549,
215
- "eval_runtime": 20.541,
216
- "eval_samples_per_second": 103.062,
217
- "eval_steps_per_second": 12.901,
218
- "step": 11649
219
- },
220
- {
221
- "epoch": 12.0,
222
- "grad_norm": 0.8063332438468933,
223
- "learning_rate": 2.0483558994197295e-05,
224
- "loss": 0.1273,
225
- "step": 12708
226
- },
227
- {
228
- "epoch": 12.0,
229
- "eval_bleu": 0.26337346373364584,
230
- "eval_loss": 0.6561474800109863,
231
- "eval_rouge1": 0.5959828528791276,
232
- "eval_rouge2": 0.34512909829817506,
233
- "eval_rougeL": 0.5936516463492448,
234
- "eval_runtime": 7.0944,
235
- "eval_samples_per_second": 298.404,
236
- "eval_steps_per_second": 37.353,
237
- "step": 12708
238
- },
239
- {
240
- "epoch": 13.0,
241
- "grad_norm": 1.136061668395996,
242
- "learning_rate": 1.7923114119922633e-05,
243
- "loss": 0.1177,
244
- "step": 13767
245
- },
246
- {
247
- "epoch": 13.0,
248
- "eval_bleu": 0.2637881399114867,
249
- "eval_loss": 0.6630375385284424,
250
- "eval_rouge1": 0.5976052885688896,
251
- "eval_rouge2": 0.3469216711027344,
252
- "eval_rougeL": 0.5951145249152312,
253
- "eval_runtime": 7.126,
254
- "eval_samples_per_second": 297.08,
255
- "eval_steps_per_second": 37.188,
256
- "step": 13767
257
- },
258
- {
259
- "epoch": 13.0,
260
- "step": 13767,
261
- "total_flos": 7192716410880000.0,
262
- "train_loss": 0.055393123952289666,
263
- "train_runtime": 1209.9717,
264
- "train_samples_per_second": 140.003,
265
- "train_steps_per_second": 17.505
266
  }
267
  ],
268
  "logging_steps": 500,
269
- "max_steps": 21180,
270
  "num_input_tokens_seen": 0,
271
  "num_train_epochs": 20,
272
  "save_steps": 500,
@@ -291,7 +177,7 @@
291
  "attributes": {}
292
  }
293
  },
294
- "total_flos": 7192716410880000.0,
295
  "train_batch_size": 8,
296
  "trial_name": null,
297
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.0816909074783325,
3
+ "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq/checkpoint-2114",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 7399,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.3965729475021362,
14
+ "learning_rate": 4.865067829457365e-05,
15
+ "loss": 1.7995,
16
+ "step": 1057
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_bleu": 0.3157163520866108,
21
+ "eval_loss": 1.1331889629364014,
22
+ "eval_rouge1": 0.45516049357610144,
23
+ "eval_rouge2": 0.2017132883532014,
24
+ "eval_rougeL": 0.453144508008762,
25
+ "eval_runtime": 20.354,
26
+ "eval_samples_per_second": 103.812,
27
+ "eval_steps_per_second": 13.02,
28
+ "step": 1057
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 1.2999993562698364,
33
+ "learning_rate": 4.609011627906977e-05,
34
+ "loss": 0.5906,
35
+ "step": 2114
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_bleu": 0.34996698643265867,
40
+ "eval_loss": 1.0816909074783325,
41
+ "eval_rouge1": 0.5070888923629823,
42
+ "eval_rouge2": 0.24860353236577704,
43
+ "eval_rougeL": 0.5048066808619132,
44
+ "eval_runtime": 18.5352,
45
+ "eval_samples_per_second": 113.999,
46
+ "eval_steps_per_second": 14.297,
47
+ "step": 2114
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.494644284248352,
52
+ "learning_rate": 4.3529554263565894e-05,
53
+ "loss": 0.484,
54
+ "step": 3171
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_bleu": 0.3592334262176334,
59
+ "eval_loss": 1.1038055419921875,
60
+ "eval_rouge1": 0.5329660042734197,
61
+ "eval_rouge2": 0.2737288088550716,
62
+ "eval_rougeL": 0.530301732660456,
63
+ "eval_runtime": 11.0163,
64
+ "eval_samples_per_second": 191.807,
65
+ "eval_steps_per_second": 24.055,
66
+ "step": 3171
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.169060230255127,
71
+ "learning_rate": 4.096899224806201e-05,
72
+ "loss": 0.4017,
73
+ "step": 4228
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_bleu": 0.3664876357280857,
78
+ "eval_loss": 1.1117204427719116,
79
+ "eval_rouge1": 0.5470701344699284,
80
+ "eval_rouge2": 0.29154846104100973,
81
+ "eval_rougeL": 0.5448816975696349,
82
+ "eval_runtime": 11.4837,
83
+ "eval_samples_per_second": 184.001,
84
+ "eval_steps_per_second": 23.076,
85
+ "step": 4228
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 1.5656139850616455,
90
+ "learning_rate": 3.840843023255814e-05,
91
+ "loss": 0.3357,
92
+ "step": 5285
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_bleu": 0.36951226807058296,
97
+ "eval_loss": 1.1360507011413574,
98
+ "eval_rouge1": 0.5552416771197572,
99
+ "eval_rouge2": 0.300977455987009,
100
+ "eval_rougeL": 0.5533455418855034,
101
+ "eval_runtime": 7.2292,
102
+ "eval_samples_per_second": 292.287,
103
+ "eval_steps_per_second": 36.657,
104
+ "step": 5285
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 1.9493229389190674,
109
+ "learning_rate": 3.5847868217054265e-05,
110
+ "loss": 0.2832,
111
+ "step": 6342
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_bleu": 0.37209002173126987,
116
+ "eval_loss": 1.1256883144378662,
117
+ "eval_rouge1": 0.5606510219414631,
118
+ "eval_rouge2": 0.3089222128154705,
119
+ "eval_rougeL": 0.5584217258440263,
120
+ "eval_runtime": 7.0098,
121
+ "eval_samples_per_second": 301.433,
122
+ "eval_steps_per_second": 37.804,
123
+ "step": 6342
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.3939344882965088,
128
+ "learning_rate": 3.328730620155039e-05,
129
+ "loss": 0.2404,
130
+ "step": 7399
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_bleu": 0.37399333863753975,
135
+ "eval_loss": 1.1580852270126343,
136
+ "eval_rouge1": 0.5622192023319419,
137
+ "eval_rouge2": 0.3102334124697639,
138
+ "eval_rougeL": 0.5600347361241877,
139
+ "eval_runtime": 7.2059,
140
+ "eval_samples_per_second": 293.233,
141
+ "eval_steps_per_second": 36.776,
142
+ "step": 7399
143
  },
144
  {
145
+ "epoch": 7.0,
146
+ "step": 7399,
147
+ "total_flos": 3864770445312000.0,
148
+ "train_loss": 0.5907337587255773,
149
+ "train_runtime": 1774.4968,
150
+ "train_samples_per_second": 95.261,
151
+ "train_steps_per_second": 11.913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  }
153
  ],
154
  "logging_steps": 500,
155
+ "max_steps": 21140,
156
  "num_input_tokens_seen": 0,
157
  "num_train_epochs": 20,
158
  "save_steps": 500,
 
177
  "attributes": {}
178
  }
179
  },
180
+ "total_flos": 3864770445312000.0,
181
  "train_batch_size": 8,
182
  "trial_name": null,
183
  "trial_params": null