Skip to content

Commit

Permalink
מודלי AI מסודרים +מטאדאטה למודל
Browse files Browse the repository at this point in the history
  • Loading branch information
NHLOCAL committed Sep 3, 2024
1 parent 7992351 commit af4a4b9
Show file tree
Hide file tree
Showing 16 changed files with 737 additions and 3 deletions.
7 changes: 6 additions & 1 deletion machine-learn/creating_model_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,10 @@ def custom_tokenizer(nlp):

# Load the best model before saving with the final name
nlp = spacy.load(best_model_path)
nlp.meta['name'] = 'find_singer_heb'
nlp.meta['name'] = 'singer_ner_he'
nlp.meta['description'] = 'Model for recognizing singer names in Hebrew song titles'
nlp.meta['author'] = 'nhlocal'
nlp.meta['email'] = '[email protected]'
nlp.meta['license'] = 'MIT'
nlp.meta['tags'] = ['NER', 'Hebrew', 'Singer', 'Named Entity Recognition', 'Text Classification']
nlp.to_disk(model_name)
4 changes: 2 additions & 2 deletions src/core/ai_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ def __init__(self, logger=None):
def load_models(self):
# Load the NER model
try:
model_name = r"C:\Users\משתמש\Documents\GitHub\Singles-Sorter-ml\machine-learn\custom_ner_model23git"
model_name = 'models/singer_ner_he'
self.nlp = load(model_name)
self.logger.debug(f"Loaded NER model: {model_name}")
except Exception as e:
self.logger.error(f"Failed to load NER model: {str(e)}")

# Load the sklearn model
try:
model_path = r'C:\Users\משתמש\Documents\GitHub\Singles-Sorter-ml\machine-learn\music_classification\model_creation\music_classifier.pkl'
model_path = 'models/music_classifier.pkl'
with open(model_path, 'rb') as model_file:
self.sklearn_model = pickle.load(model_file)
self.logger.debug("Loaded sklearn model successfully")
Expand Down
16 changes: 16 additions & 0 deletions src/core/models/add_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import spacy

# טען את המודל הקיים או צור חדש
nlp = spacy.load("singer_ner_he")

# עדכון המטא נתונים
nlp.meta['version'] = '25.1'
nlp.meta['description'] = 'Model for recognizing singer names in Hebrew song titles'
nlp.meta['author'] = 'nhlocal'
nlp.meta['email'] = '[email protected]'
nlp.meta['license'] = 'MIT'
nlp.meta['tags'] = ['NER', 'Hebrew', 'Singer', 'Named Entity Recognition', 'Text Classification']

# שמור את המודל מחדש
nlp.to_disk("singer_ner_he")
print("the model with metadata saving to disk!")
Binary file added src/core/models/music_classifier.pkl
Binary file not shown.
130 changes: 130 additions & 0 deletions src/core/models/singer_ner_he/config.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
[paths]
train = null
dev = null
vectors = null
init_tok2vec = null

[system]
seed = 0
gpu_allocator = null

[nlp]
lang = "he"
pipeline = ["ner"]
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
batch_size = 1000
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
vectors = {"@vectors":"spacy.Vectors.v1"}

[components]

[components.ner]
factory = "ner"
incorrect_spans_key = null
moves = null
scorer = {"@scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100

[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = true
nO = null

[components.ner.model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
embed_size = 2000
window_size = 1
maxout_pieces = 3
subword_features = true

[corpora]

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
gold_preproc = false
max_length = 0
limit = 0
augmenter = null

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
gold_preproc = false
max_length = 0
limit = 0
augmenter = null

[training]
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
accumulate_gradient = 1
patience = 1600
max_epochs = 0
max_steps = 20000
eval_frequency = 200
frozen_components = []
annotating_components = []
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
before_to_disk = null
before_update = null

[training.batcher]
@batchers = "spacy.batch_by_words.v1"
discard_oversize = false
tolerance = 0.2
get_length = null

[training.batcher.size]
@schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
t = 0.0

[training.logger]
@loggers = "spacy.ConsoleLogger.v1"
progress_bar = false

[training.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
learn_rate = 0.001

[training.score_weights]
ents_f = 1.0
ents_p = 0.0
ents_r = 0.0
ents_per_type = null

[pretraining]

[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null

[initialize.components]

[initialize.tokenizer]
167 changes: 167 additions & 0 deletions src/core/models/singer_ner_he/iteration_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
{
"0": {
"ner": 12760.009605653426
},
"1": {
"ner": 8170.318919559713
},
"2": {
"ner": 6891.266782209631
},
"3": {
"ner": 6025.393886494811
},
"4": {
"ner": 5811.96771290383
},
"5": {
"ner": 5512.271173835842
},
"6": {
"ner": 5296.490141148527
},
"7": {
"ner": 5235.260896612553
},
"8": {
"ner": 5046.740624489254
},
"9": {
"ner": 4973.830577046772
},
"10": {
"ner": 5017.859998598908
},
"11": {
"ner": 4900.961947907638
},
"12": {
"ner": 4838.758090039101
},
"13": {
"ner": 4886.875202931668
},
"14": {
"ner": 4714.624255783186
},
"15": {
"ner": 4738.327280185656
},
"16": {
"ner": 4565.160664788347
},
"17": {
"ner": 4583.78275722923
},
"18": {
"ner": 4495.220605149534
},
"19": {
"ner": 4534.810179279405
},
"20": {
"ner": 4500.498598112403
},
"21": {
"ner": 4424.467961309918
},
"22": {
"ner": 4418.58057027781
},
"23": {
"ner": 4177.6422341950165
},
"24": {
"ner": 4473.728806059217
},
"25": {
"ner": 4482.807123740337
},
"26": {
"ner": 4295.435655848486
},
"27": {
"ner": 4296.125647655365
},
"28": {
"ner": 4211.3802648941555
},
"29": {
"ner": 4226.050618910973
},
"30": {
"ner": 4343.9247733939055
},
"31": {
"ner": 4251.641707995066
},
"32": {
"ner": 4284.022606448415
},
"33": {
"ner": 4354.118602595073
},
"34": {
"ner": 4372.461272659416
},
"35": {
"ner": 4378.70702104278
},
"36": {
"ner": 4245.001379737945
},
"37": {
"ner": 4270.430685805459
},
"38": {
"ner": 4208.291241037127
},
"39": {
"ner": 4250.659736461786
},
"40": {
"ner": 4201.661238122778
},
"41": {
"ner": 4174.294246218296
},
"42": {
"ner": 4317.815189482398
},
"43": {
"ner": 4206.800198851955
},
"44": {
"ner": 4211.050399543626
},
"45": {
"ner": 4164.989329134082
},
"46": {
"ner": 4169.060926826847
},
"47": {
"ner": 4246.892852349701
},
"48": {
"ner": 4376.177885754265
},
"49": {
"ner": 4144.422442224529
},
"50": {
"ner": 4229.421797165746
},
"51": {
"ner": 4100.578560124174
},
"52": {
"ner": 4273.158234436391
},
"53": {
"ner": 4141.029963890384
},
"54": {
"ner": 4119.760375687587
}
}
40 changes: 40 additions & 0 deletions src/core/models/singer_ner_he/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"lang":"he",
"name":"find_singer_heb",
"version":"25.1",
"spacy_version":">=3.7.5,<3.8.0",
"description":"Model for recognizing singer names in Hebrew song titles",
"author":"nhlocal",
"email":"[email protected]",
"url":"",
"license":"MIT",
"spacy_git_version":"a6d0fc360",
"vectors":{
"width":0,
"vectors":0,
"keys":0,
"name":null,
"mode":"default"
},
"labels":{
"ner":[
"SINGER"
]
},
"pipeline":[
"ner"
],
"components":[
"ner"
],
"disabled":[

],
"tags":[
"NER",
"Hebrew",
"Singer",
"Named Entity Recognition",
"Text Classification"
]
}
Loading

0 comments on commit af4a4b9

Please sign in to comment.