From 68cfe485d019a568540f0e4f614d9c18d77c5501 Mon Sep 17 00:00:00 2001 From: NHLOCAL Date: Mon, 2 Sep 2024 16:15:29 +0300 Subject: [PATCH] =?UTF-8?q?=D7=92=D7=A8=D7=A1=D7=AA=20cli=20=D7=9E=D7=A9?= =?UTF-8?q?=D7=95=D7=9C=D7=91=D7=AA=20sklean?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit יצירת גרסת cli של התוכנה שמשלבת זיהוי סוג מחרוזת (זמר או אחר) לצורך שכבת זיהוי נוספת על מודל ה-NER --- ...342\200\217\342\200\217publish-cli-ml.yml" | 2 +- .../model_creation/try_model.py | 21 ++++++ src/core/singles_sorter_v5.py | 66 ++++++++++++++++++- 3 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 machine-learn/music_classification/model_creation/try_model.py diff --git "a/.github/workflows/\342\200\217\342\200\217publish-cli-ml.yml" "b/.github/workflows/\342\200\217\342\200\217publish-cli-ml.yml" index 920b0070..aca1fed9 100644 --- "a/.github/workflows/\342\200\217\342\200\217publish-cli-ml.yml" +++ "b/.github/workflows/\342\200\217\342\200\217publish-cli-ml.yml" @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | - pip install pyinstaller music_tag jibrish_to_hebrew spacy==3.7.5 + pip install pyinstaller music_tag jibrish_to_hebrew spacy==3.7.5 scikit-learn - name: Build EXE run: | diff --git a/machine-learn/music_classification/model_creation/try_model.py b/machine-learn/music_classification/model_creation/try_model.py new file mode 100644 index 00000000..ae9bb3cd --- /dev/null +++ b/machine-learn/music_classification/model_creation/try_model.py @@ -0,0 +1,21 @@ +import pickle + +# מיפוי תוויות מספריות לשמות קטגוריות +label_mapping = {0: "ARTIST", 1: "ALBUM", 2: "SONG", 3: "RANDOM"} + +# טעינת המודל +with open('music_classifier.pkl', 'rb') as f: + loaded_model = pickle.load(f) + +# פונקציה לחיזוי על מחרוזת טקסט בודדת +def classify_text(text): + # המודל מצפה לקבל רשימה של טקסטים גם אם יש טקסט אחד בלבד + prediction = loaded_model.predict([text]) + return prediction[0] # החיזוי הוא רשימה ולכן אנו מחזירים את הערך הראשון + +# הדגמת השימוש בפונקציה +text_input = input("הכנס מחרוזת טקסט לסיווג: ") +prediction = classify_text(text_input) + +# הצגת התוצאה +print(f"הקטגוריה של הטקסט היא: {label_mapping[prediction]} (זיהוי מספרי: {prediction})") diff --git a/src/core/singles_sorter_v5.py b/src/core/singles_sorter_v5.py index a039d02c..dcf59fa5 100644 --- a/src/core/singles_sorter_v5.py +++ b/src/core/singles_sorter_v5.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -__VERSION__ = '13.6' +__VERSION__ = '14.0' import os import re @@ -15,6 +15,8 @@ import datetime from spacy import load +import pickle +from sklearn import metrics class MusicSorter: @@ -51,15 +53,73 @@ def __init__(self, source_dir, target_dir=None, copy_mode=False, abc_sort=False, self.logger.error(f"Failed to load NER model: {str(e)}") self.nlp = None + # Load the sklearn model + try: + with open(r'C:\Users\משתמש\Documents\GitHub\Singles-Sorter-ml\machine-learn\music_classification\model_creation\music_classifier.pkl', 'rb') as model_file: + self.sklearn_model = pickle.load(model_file) + self.logger.info("Loaded sklearn model successfully") + except Exception as e: + self.logger.error(f"Failed to load sklearn model: {str(e)}") + self.sklearn_model = None + + + + def verify_artist_with_sklearn(self, artist_name): + if not self.sklearn_model: + self.logger.warning("sklearn model not available for verification") + return True # Assume it's an artist if model is not available + + # Prepare the input for the model + input_data = [artist_name] + + try: + # Make prediction + prediction = self.sklearn_model.predict(input_data) + probabilities = self.sklearn_model.predict_proba(input_data)[0] + + # Get the predicted class and its probability + predicted_class = prediction[0] + class_probability = probabilities[predicted_class] + + # Define class names for logging + class_names = ["ARTIST", "ALBUM", "SONG", "RANDOM"] + # Log the prediction details + self.logger.info(f"sklearn model prediction for '{artist_name}': " + f"class={class_names[predicted_class]}, " + f"probability={class_probability:.2f}") + # Check if the predicted class is "אמן" (0) + is_artist = predicted_class == 0 + + if is_artist: + self.logger.info(f"'{artist_name}' verified as an artist") + else: + self.logger.info(f"'{artist_name}' not verified as an artist. " + f"Predicted as: {class_names[predicted_class]}") + + return is_artist + + except Exception as e: + self.logger.error(f"Error during sklearn prediction for '{artist_name}': {str(e)}") + return True # Assume it's an artist if prediction fails def process_with_ner(self, text): if not self.nlp: return [] doc = self.nlp(text) - return [ent.text for ent in doc.ents if ent.label_ == "SINGER"] - + potential_artists = [ent.text for ent in doc.ents if ent.label_ == "SINGER"] + + verified_artists = [] + for artist in potential_artists: + self.logger.info(f"NER model identified potential artist: {artist}") + if self.verify_artist_with_sklearn(artist): + verified_artists.append(artist) + self.logger.info(f"sklearn model verified '{artist}' as an artist") + else: + self.logger.info(f"sklearn model rejected '{artist}' as an artist") + + return verified_artists