AlekseyKorshuk commited on
Commit
a8807a1
1 Parent(s): 1c8741c

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/og-buda")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11co51jr/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/og-buda")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2ic775kv/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1g4193mx) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1g4193mx/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.11.1",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.6418354511260986, "eval_runtime": 10.1368, "eval_samples_per_second": 22.69, "eval_steps_per_second": 2.861, "epoch": 4.0}
 
1
+ {"eval_loss": 1.5331557989120483, "eval_runtime": 11.1146, "eval_samples_per_second": 21.233, "eval_steps_per_second": 2.699, "epoch": 5.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87d33c0069e8b72372c9e6dfc7f9a8187c137ac978f25b032d134b7ef5fd064f
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6ca1185ef4c01671071a96d4a6bc08a99dfb851bc262d5b345d99c44c8777b8
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3160fb582d67d179d5ee50aa5804c6359d28643e90fcd54e9bd4c89655dfc68
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b59554fe6cde8204954f2fd3c802193fa21f23c03e5477bc320159740bc767
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:911581e98e184bd3b5eb8b1cf8a6bf2e4ec0ff4f88f7ea521640e2dd07b94f03
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc420fc59da46155e275e30ee4f33e49901084326e259ee2daba1b1bc168d40
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dbbbccbd02e27dbdb79311ae0bdd36d65163767a8d048823783db7230b9c01f
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:151664dce0a47953ee30b57a0b85f89039a9cddc1f9c94e479fa890f7dfb89e4
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b342b9aa08984a7d22e3d2af7c55d45d34bb20259c62266df710a4afe559ae
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5b7a99e2b2ebcfb7b31d39b7daabc1659b5909216616127ab13d8f6b3cc088
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.6418354511260986,
3
- "best_model_checkpoint": "output/og-buda/checkpoint-600",
4
- "epoch": 4.0,
5
- "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -766,11 +766,199 @@
766
  "eval_samples_per_second": 22.942,
767
  "eval_steps_per_second": 2.893,
768
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769
  }
770
  ],
771
- "max_steps": 600,
772
- "num_train_epochs": 4,
773
- "total_flos": 625141186560000.0,
774
  "trial_name": null,
775
  "trial_params": null
776
  }
 
1
  {
2
+ "best_metric": 1.5331557989120483,
3
+ "best_model_checkpoint": "output/og-buda/checkpoint-750",
4
+ "epoch": 5.0,
5
+ "global_step": 750,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
766
  "eval_samples_per_second": 22.942,
767
  "eval_steps_per_second": 2.893,
768
  "step": 600
769
+ },
770
+ {
771
+ "epoch": 4.03,
772
+ "learning_rate": 0.00013682420202226357,
773
+ "loss": 1.7067,
774
+ "step": 605
775
+ },
776
+ {
777
+ "epoch": 4.07,
778
+ "learning_rate": 0.0001357009254103391,
779
+ "loss": 1.7111,
780
+ "step": 610
781
+ },
782
+ {
783
+ "epoch": 4.1,
784
+ "learning_rate": 0.0001338424770178476,
785
+ "loss": 1.6736,
786
+ "step": 615
787
+ },
788
+ {
789
+ "epoch": 4.13,
790
+ "learning_rate": 0.00013126921839428241,
791
+ "loss": 1.7152,
792
+ "step": 620
793
+ },
794
+ {
795
+ "epoch": 4.17,
796
+ "learning_rate": 0.00012800934269961248,
797
+ "loss": 1.7257,
798
+ "step": 625
799
+ },
800
+ {
801
+ "epoch": 4.2,
802
+ "learning_rate": 0.00012409856581412142,
803
+ "loss": 1.7207,
804
+ "step": 630
805
+ },
806
+ {
807
+ "epoch": 4.23,
808
+ "learning_rate": 0.00011957973502774922,
809
+ "loss": 1.6457,
810
+ "step": 635
811
+ },
812
+ {
813
+ "epoch": 4.27,
814
+ "learning_rate": 0.00011450235959621773,
815
+ "loss": 1.713,
816
+ "step": 640
817
+ },
818
+ {
819
+ "epoch": 4.3,
820
+ "learning_rate": 0.00010892206830726369,
821
+ "loss": 1.7242,
822
+ "step": 645
823
+ },
824
+ {
825
+ "epoch": 4.33,
826
+ "learning_rate": 0.00010290000000000009,
827
+ "loss": 1.6386,
828
+ "step": 650
829
+ },
830
+ {
831
+ "epoch": 4.37,
832
+ "learning_rate": 9.650213371499996e-05,
833
+ "loss": 1.6539,
834
+ "step": 655
835
+ },
836
+ {
837
+ "epoch": 4.4,
838
+ "learning_rate": 8.97985658141213e-05,
839
+ "loss": 1.7034,
840
+ "step": 660
841
+ },
842
+ {
843
+ "epoch": 4.43,
844
+ "learning_rate": 8.286274199009828e-05,
845
+ "loss": 1.6681,
846
+ "step": 665
847
+ },
848
+ {
849
+ "epoch": 4.47,
850
+ "learning_rate": 7.577065258016099e-05,
851
+ "loss": 1.6742,
852
+ "step": 670
853
+ },
854
+ {
855
+ "epoch": 4.5,
856
+ "learning_rate": 6.860000000000003e-05,
857
+ "loss": 1.687,
858
+ "step": 675
859
+ },
860
+ {
861
+ "epoch": 4.53,
862
+ "learning_rate": 6.14293474198391e-05,
863
+ "loss": 1.7066,
864
+ "step": 680
865
+ },
866
+ {
867
+ "epoch": 4.57,
868
+ "learning_rate": 5.433725800990179e-05,
869
+ "loss": 1.6622,
870
+ "step": 685
871
+ },
872
+ {
873
+ "epoch": 4.6,
874
+ "learning_rate": 4.740143418587876e-05,
875
+ "loss": 1.6647,
876
+ "step": 690
877
+ },
878
+ {
879
+ "epoch": 4.63,
880
+ "learning_rate": 4.069786628500011e-05,
881
+ "loss": 1.6251,
882
+ "step": 695
883
+ },
884
+ {
885
+ "epoch": 4.67,
886
+ "learning_rate": 3.429999999999996e-05,
887
+ "loss": 1.6732,
888
+ "step": 700
889
+ },
890
+ {
891
+ "epoch": 4.7,
892
+ "learning_rate": 2.8277931692736372e-05,
893
+ "loss": 1.5917,
894
+ "step": 705
895
+ },
896
+ {
897
+ "epoch": 4.73,
898
+ "learning_rate": 2.2697640403782324e-05,
899
+ "loss": 1.5938,
900
+ "step": 710
901
+ },
902
+ {
903
+ "epoch": 4.77,
904
+ "learning_rate": 1.762026497225081e-05,
905
+ "loss": 1.7145,
906
+ "step": 715
907
+ },
908
+ {
909
+ "epoch": 4.8,
910
+ "learning_rate": 1.3101434185878628e-05,
911
+ "loss": 1.6437,
912
+ "step": 720
913
+ },
914
+ {
915
+ "epoch": 4.83,
916
+ "learning_rate": 9.190657300387574e-06,
917
+ "loss": 1.5361,
918
+ "step": 725
919
+ },
920
+ {
921
+ "epoch": 4.87,
922
+ "learning_rate": 5.930781605717611e-06,
923
+ "loss": 1.5239,
924
+ "step": 730
925
+ },
926
+ {
927
+ "epoch": 4.9,
928
+ "learning_rate": 3.3575229821524373e-06,
929
+ "loss": 1.656,
930
+ "step": 735
931
+ },
932
+ {
933
+ "epoch": 4.93,
934
+ "learning_rate": 1.4990745896609297e-06,
935
+ "loss": 1.6033,
936
+ "step": 740
937
+ },
938
+ {
939
+ "epoch": 4.97,
940
+ "learning_rate": 3.757979777364447e-07,
941
+ "loss": 1.6277,
942
+ "step": 745
943
+ },
944
+ {
945
+ "epoch": 5.0,
946
+ "learning_rate": 0.0,
947
+ "loss": 1.5745,
948
+ "step": 750
949
+ },
950
+ {
951
+ "epoch": 5.0,
952
+ "eval_loss": 1.5331557989120483,
953
+ "eval_runtime": 11.0656,
954
+ "eval_samples_per_second": 21.327,
955
+ "eval_steps_per_second": 2.711,
956
+ "step": 750
957
  }
958
  ],
959
+ "max_steps": 750,
960
+ "num_train_epochs": 5,
961
+ "total_flos": 781001883648000.0,
962
  "trial_name": null,
963
  "trial_params": null
964
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:241f4fc691c28aaa2ddb496ddc8870ce626761c7412d518a7188ac1aaea6de47
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09831668545d53e1902c5d27a1cebb81afa6b22d9d6f902498224fa69e5221ac
3
+ size 2863