מודלי AI מסודרים +מטאדאטה למודל

NHLOCAL · Sep 3, 2024 · af4a4b9 · af4a4b9
1 parent 7992351
commit af4a4b9
Show file tree

Hide file tree

Showing 16 changed files with 737 additions and 3 deletions.
diff --git a/machine-learn/creating_model_git.py b/machine-learn/creating_model_git.py
@@ -118,5 +118,10 @@ def custom_tokenizer(nlp):
 
 # Load the best model before saving with the final name
 nlp = spacy.load(best_model_path)
-nlp.meta['name'] = 'find_singer_heb'
+nlp.meta['name'] = 'singer_ner_he'
+nlp.meta['description'] = 'Model for recognizing singer names in Hebrew song titles'
+nlp.meta['author'] = 'nhlocal'
+nlp.meta['email'] = '[email protected]'
+nlp.meta['license'] = 'MIT'
+nlp.meta['tags'] = ['NER', 'Hebrew', 'Singer', 'Named Entity Recognition', 'Text Classification']
 nlp.to_disk(model_name)
diff --git a/src/core/ai_models.py b/src/core/ai_models.py
@@ -27,15 +27,15 @@ def __init__(self, logger=None):
     def load_models(self):
         # Load the NER model
         try:
-            model_name = r"C:\Users\משתמש\Documents\GitHub\Singles-Sorter-ml\machine-learn\custom_ner_model23git"
+            model_name = 'models/singer_ner_he'
             self.nlp = load(model_name)
             self.logger.debug(f"Loaded NER model: {model_name}")
         except Exception as e:
             self.logger.error(f"Failed to load NER model: {str(e)}")
 
         # Load the sklearn model
         try:
-            model_path = r'C:\Users\משתמש\Documents\GitHub\Singles-Sorter-ml\machine-learn\music_classification\model_creation\music_classifier.pkl'
+            model_path = 'models/music_classifier.pkl'
             with open(model_path, 'rb') as model_file:
                 self.sklearn_model = pickle.load(model_file)
             self.logger.debug("Loaded sklearn model successfully")

diff --git a/src/core/models/add_metadata.py b/src/core/models/add_metadata.py
@@ -0,0 +1,16 @@
+import spacy
+
+# טען את המודל הקיים או צור חדש
+nlp = spacy.load("singer_ner_he")
+
+# עדכון המטא נתונים
+nlp.meta['version'] = '25.1'
+nlp.meta['description'] = 'Model for recognizing singer names in Hebrew song titles'
+nlp.meta['author'] = 'nhlocal'
+nlp.meta['email'] = '[email protected]'
+nlp.meta['license'] = 'MIT'
+nlp.meta['tags'] = ['NER', 'Hebrew', 'Singer', 'Named Entity Recognition', 'Text Classification']
+
+# שמור את המודל מחדש
+nlp.to_disk("singer_ner_he")
+print("the model with metadata saving to disk!")
diff --git a/src/core/models/music_classifier.pkl b/src/core/models/music_classifier.pkl
diff --git a/src/core/models/singer_ner_he/config.cfg b/src/core/models/singer_ner_he/config.cfg
@@ -0,0 +1,130 @@
+[paths]
+train = null
+dev = null
+vectors = null
+init_tok2vec = null
+
+[system]
+seed = 0
+gpu_allocator = null
+
+[nlp]
+lang = "he"
+pipeline = ["ner"]
+disabled = []
+before_creation = null
+after_creation = null
+after_pipeline_creation = null
+batch_size = 1000
+tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
+vectors = {"@vectors":"spacy.Vectors.v1"}
+
+[components]
+
+[components.ner]
+factory = "ner"
+incorrect_spans_key = null
+moves = null
+scorer = {"@scorers":"spacy.ner_scorer.v1"}
+update_with_oracle_cut_size = 100
+
+[components.ner.model]
+@architectures = "spacy.TransitionBasedParser.v2"
+state_type = "ner"
+extra_state_tokens = false
+hidden_width = 64
+maxout_pieces = 2
+use_upper = true
+nO = null
+
+[components.ner.model.tok2vec]
+@architectures = "spacy.HashEmbedCNN.v2"
+pretrained_vectors = null
+width = 96
+depth = 4
+embed_size = 2000
+window_size = 1
+maxout_pieces = 3
+subword_features = true
+
+[corpora]
+
+[corpora.dev]
+@readers = "spacy.Corpus.v1"
+path = ${paths.dev}
+gold_preproc = false
+max_length = 0
+limit = 0
+augmenter = null
+
+[corpora.train]
+@readers = "spacy.Corpus.v1"
+path = ${paths.train}
+gold_preproc = false
+max_length = 0
+limit = 0
+augmenter = null
+
+[training]
+seed = ${system.seed}
+gpu_allocator = ${system.gpu_allocator}
+dropout = 0.1
+accumulate_gradient = 1
+patience = 1600
+max_epochs = 0
+max_steps = 20000
+eval_frequency = 200
+frozen_components = []
+annotating_components = []
+dev_corpus = "corpora.dev"
+train_corpus = "corpora.train"
+before_to_disk = null
+before_update = null
+
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+get_length = null
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+t = 0.0
+
+[training.logger]
+@loggers = "spacy.ConsoleLogger.v1"
+progress_bar = false
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = false
+eps = 0.00000001
+learn_rate = 0.001
+
+[training.score_weights]
+ents_f = 1.0
+ents_p = 0.0
+ents_r = 0.0
+ents_per_type = null
+
+[pretraining]
+
+[initialize]
+vectors = ${paths.vectors}
+init_tok2vec = ${paths.init_tok2vec}
+vocab_data = null
+lookups = null
+before_init = null
+after_init = null
+
+[initialize.components]
+
+[initialize.tokenizer]
diff --git a/src/core/models/singer_ner_he/iteration_data.json b/src/core/models/singer_ner_he/iteration_data.json
@@ -0,0 +1,167 @@
+{
+  "0": {
+    "ner": 12760.009605653426
+  },
+  "1": {
+    "ner": 8170.318919559713
+  },
+  "2": {
+    "ner": 6891.266782209631
+  },
+  "3": {
+    "ner": 6025.393886494811
+  },
+  "4": {
+    "ner": 5811.96771290383
+  },
+  "5": {
+    "ner": 5512.271173835842
+  },
+  "6": {
+    "ner": 5296.490141148527
+  },
+  "7": {
+    "ner": 5235.260896612553
+  },
+  "8": {
+    "ner": 5046.740624489254
+  },
+  "9": {
+    "ner": 4973.830577046772
+  },
+  "10": {
+    "ner": 5017.859998598908
+  },
+  "11": {
+    "ner": 4900.961947907638
+  },
+  "12": {
+    "ner": 4838.758090039101
+  },
+  "13": {
+    "ner": 4886.875202931668
+  },
+  "14": {
+    "ner": 4714.624255783186
+  },
+  "15": {
+    "ner": 4738.327280185656
+  },
+  "16": {
+    "ner": 4565.160664788347
+  },
+  "17": {
+    "ner": 4583.78275722923
+  },
+  "18": {
+    "ner": 4495.220605149534
+  },
+  "19": {
+    "ner": 4534.810179279405
+  },
+  "20": {
+    "ner": 4500.498598112403
+  },
+  "21": {
+    "ner": 4424.467961309918
+  },
+  "22": {
+    "ner": 4418.58057027781
+  },
+  "23": {
+    "ner": 4177.6422341950165
+  },
+  "24": {
+    "ner": 4473.728806059217
+  },
+  "25": {
+    "ner": 4482.807123740337
+  },
+  "26": {
+    "ner": 4295.435655848486
+  },
+  "27": {
+    "ner": 4296.125647655365
+  },
+  "28": {
+    "ner": 4211.3802648941555
+  },
+  "29": {
+    "ner": 4226.050618910973
+  },
+  "30": {
+    "ner": 4343.9247733939055
+  },
+  "31": {
+    "ner": 4251.641707995066
+  },
+  "32": {
+    "ner": 4284.022606448415
+  },
+  "33": {
+    "ner": 4354.118602595073
+  },
+  "34": {
+    "ner": 4372.461272659416
+  },
+  "35": {
+    "ner": 4378.70702104278
+  },
+  "36": {
+    "ner": 4245.001379737945
+  },
+  "37": {
+    "ner": 4270.430685805459
+  },
+  "38": {
+    "ner": 4208.291241037127
+  },
+  "39": {
+    "ner": 4250.659736461786
+  },
+  "40": {
+    "ner": 4201.661238122778
+  },
+  "41": {
+    "ner": 4174.294246218296
+  },
+  "42": {
+    "ner": 4317.815189482398
+  },
+  "43": {
+    "ner": 4206.800198851955
+  },
+  "44": {
+    "ner": 4211.050399543626
+  },
+  "45": {
+    "ner": 4164.989329134082
+  },
+  "46": {
+    "ner": 4169.060926826847
+  },
+  "47": {
+    "ner": 4246.892852349701
+  },
+  "48": {
+    "ner": 4376.177885754265
+  },
+  "49": {
+    "ner": 4144.422442224529
+  },
+  "50": {
+    "ner": 4229.421797165746
+  },
+  "51": {
+    "ner": 4100.578560124174
+  },
+  "52": {
+    "ner": 4273.158234436391
+  },
+  "53": {
+    "ner": 4141.029963890384
+  },
+  "54": {
+    "ner": 4119.760375687587
+  }
+}
diff --git a/src/core/models/singer_ner_he/meta.json b/src/core/models/singer_ner_he/meta.json
@@ -0,0 +1,40 @@
+{
+  "lang":"he",
+  "name":"find_singer_heb",
+  "version":"25.1",
+  "spacy_version":">=3.7.5,<3.8.0",
+  "description":"Model for recognizing singer names in Hebrew song titles",
+  "author":"nhlocal",
+  "email":"[email protected]",
+  "url":"",
+  "license":"MIT",
+  "spacy_git_version":"a6d0fc360",
+  "vectors":{
+    "width":0,
+    "vectors":0,
+    "keys":0,
+    "name":null,
+    "mode":"default"
+  },
+  "labels":{
+    "ner":[
+      "SINGER"
+    ]
+  },
+  "pipeline":[
+    "ner"
+  ],
+  "components":[
+    "ner"
+  ],
+  "disabled":[
+
+  ],
+  "tags":[
+    "NER",
+    "Hebrew",
+    "Singer",
+    "Named Entity Recognition",
+    "Text Classification"
+  ]
+}