Skip to content

Commit

Permalink
גרסת cli משולבת sklean
Browse files Browse the repository at this point in the history
יצירת גרסת cli של התוכנה שמשלבת זיהוי סוג מחרוזת (זמר או אחר) לצורך שכבת זיהוי נוספת על מודל ה-NER
  • Loading branch information
NHLOCAL committed Sep 2, 2024
1 parent f83742b commit 68cfe48
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/‏‏publish-cli-ml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

- name: Install dependencies
run: |
pip install pyinstaller music_tag jibrish_to_hebrew spacy==3.7.5
pip install pyinstaller music_tag jibrish_to_hebrew spacy==3.7.5 scikit-learn
- name: Build EXE
run: |
Expand Down
21 changes: 21 additions & 0 deletions machine-learn/music_classification/model_creation/try_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pickle

# מיפוי תוויות מספריות לשמות קטגוריות
label_mapping = {0: "ARTIST", 1: "ALBUM", 2: "SONG", 3: "RANDOM"}

# טעינת המודל
with open('music_classifier.pkl', 'rb') as f:
loaded_model = pickle.load(f)

# פונקציה לחיזוי על מחרוזת טקסט בודדת
def classify_text(text):
# המודל מצפה לקבל רשימה של טקסטים גם אם יש טקסט אחד בלבד
prediction = loaded_model.predict([text])
return prediction[0] # החיזוי הוא רשימה ולכן אנו מחזירים את הערך הראשון

# הדגמת השימוש בפונקציה
text_input = input("הכנס מחרוזת טקסט לסיווג: ")
prediction = classify_text(text_input)

# הצגת התוצאה
print(f"הקטגוריה של הטקסט היא: {label_mapping[prediction]} (זיהוי מספרי: {prediction})")
66 changes: 63 additions & 3 deletions src/core/singles_sorter_v5.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
__VERSION__ = '13.6'
__VERSION__ = '14.0'

import os
import re
Expand All @@ -15,6 +15,8 @@
import datetime

from spacy import load
import pickle
from sklearn import metrics

class MusicSorter:

Expand Down Expand Up @@ -51,15 +53,73 @@ def __init__(self, source_dir, target_dir=None, copy_mode=False, abc_sort=False,
self.logger.error(f"Failed to load NER model: {str(e)}")
self.nlp = None

# Load the sklearn model
try:
with open(r'C:\Users\משתמש\Documents\GitHub\Singles-Sorter-ml\machine-learn\music_classification\model_creation\music_classifier.pkl', 'rb') as model_file:
self.sklearn_model = pickle.load(model_file)
self.logger.info("Loaded sklearn model successfully")
except Exception as e:
self.logger.error(f"Failed to load sklearn model: {str(e)}")
self.sklearn_model = None



def verify_artist_with_sklearn(self, artist_name):
if not self.sklearn_model:
self.logger.warning("sklearn model not available for verification")
return True # Assume it's an artist if model is not available

# Prepare the input for the model
input_data = [artist_name]

try:
# Make prediction
prediction = self.sklearn_model.predict(input_data)
probabilities = self.sklearn_model.predict_proba(input_data)[0]

# Get the predicted class and its probability
predicted_class = prediction[0]
class_probability = probabilities[predicted_class]

# Define class names for logging
class_names = ["ARTIST", "ALBUM", "SONG", "RANDOM"]

# Log the prediction details
self.logger.info(f"sklearn model prediction for '{artist_name}': "
f"class={class_names[predicted_class]}, "
f"probability={class_probability:.2f}")

# Check if the predicted class is "אמן" (0)
is_artist = predicted_class == 0

if is_artist:
self.logger.info(f"'{artist_name}' verified as an artist")
else:
self.logger.info(f"'{artist_name}' not verified as an artist. "
f"Predicted as: {class_names[predicted_class]}")

return is_artist

except Exception as e:
self.logger.error(f"Error during sklearn prediction for '{artist_name}': {str(e)}")
return True # Assume it's an artist if prediction fails
def process_with_ner(self, text):
if not self.nlp:
return []

doc = self.nlp(text)
return [ent.text for ent in doc.ents if ent.label_ == "SINGER"]

potential_artists = [ent.text for ent in doc.ents if ent.label_ == "SINGER"]

verified_artists = []
for artist in potential_artists:
self.logger.info(f"NER model identified potential artist: {artist}")
if self.verify_artist_with_sklearn(artist):
verified_artists.append(artist)
self.logger.info(f"sklearn model verified '{artist}' as an artist")
else:
self.logger.info(f"sklearn model rejected '{artist}' as an artist")

return verified_artists



Expand Down

0 comments on commit 68cfe48

Please sign in to comment.