-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
136 lines (110 loc) · 5.32 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import streamlit as st
import pandas as pd
import numpy as np
from preprocess import preprocess
# import joblib
import pickle
# Define preprocessing function
import re
import nltk
import nltk
pd.options.mode.chained_assignment = None
# load the data from pickle file
with open('tfidf.pkl', 'rb') as file:
tfidf_vectorizer = pickle.load(file)
with open('LogisticAspect2.pkl','rb') as file2:
logistic_aspect = pickle.load(file2)
with open('LogisticPolarity.pkl','rb') as file3:
logistic_polarity = pickle.load(file3)
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
import string
# def classify_sentiment(sentence, aspect_model, polarity_model, tfidf_vectorizer):
# # Split the input sentence into individual sentences using the full stop character
# comment = preprocess(sentence)
# # sentences = sentence.split('.')
# # Vectorize the preprocessed sentences
# X = tfidf_vectorizer.transform([comment])
# # Predict the aspects for each sentence using the aspect model
# aspect_preds = aspect_model.predict(X)
# # Classify the polarity of each sentence using the polarity model
# polarity_preds = polarity_model.predict(X)
# aspect =""
# aspect_pred = aspect_preds[0]
# if np.any(aspect_pred[0]==1): aspect+= " | Data integration"
# if np.any(aspect_pred[1]==1): aspect+= " | Marketing, Communication & Special offers"
# if np.any(aspect_pred[2]==1): aspect+= " | Technology"
# if np.any(aspect_pred[3]==1): aspect+= " | Payment and Check-out"
# if np.any(aspect_pred[4]==1): aspect+= " | Shopping experience"
# if np.any(aspect_pred[5]==1): aspect+= " | Unemployment"
# if np.any(aspect_pred[6]==1): aspect+= " | Product availability and Store design"
# if np.any(aspect_pred[7]==1): aspect+= " | Price and Value"
# if np.any(aspect_pred[8]==1): aspect+= " | General"
# if np.any(aspect_pred[9]==1): aspect+= " | Privacy and Security issues"
# if(aspect!=""): aspect = aspect[3:]
# polarity = str(polarity_preds)
# polarity = preprocess(polarity)
# if(polarity=="positi"):polarity="Positive"
# elif(polarity=="negati"):polarity="Negative"
# elif(polarity=="neutral"): polarity="Neutral"
# return aspect, polarity
def predict_aspect(text):
aspect_preds = logistic_aspect.predict(text)
aspect =""
aspect_pred = aspect_preds[0]
if np.any(aspect_pred[0]==1): aspect+= " | Data integration"
if np.any(aspect_pred[1]==1): aspect+= " | Marketing, Communication & Special offers"
if np.any(aspect_pred[2]==1): aspect+= " | Technology"
if np.any(aspect_pred[3]==1): aspect+= " | Payment and Check-out"
if np.any(aspect_pred[4]==1): aspect+= " | Shopping experience"
if np.any(aspect_pred[5]==1): aspect+= " | Unemployment"
if np.any(aspect_pred[6]==1): aspect+= " | Product availability and Store design"
if np.any(aspect_pred[7]==1): aspect+= " | Price and Value"
if np.any(aspect_pred[8]==1): aspect+= " | General"
if np.any(aspect_pred[9]==1): aspect+= " | Privacy and Security issues"
if(aspect!=""): aspect = aspect[3:]
return aspect
def predict_polarity(vec):
polarity_preds = logistic_polarity.predict(vec)
polarity = str(polarity_preds)
polarity = preprocess(polarity)
if(polarity=="positi"):polarity="Positive"
elif(polarity=="negati"):polarity="Negative"
elif(polarity=="neutral"): polarity="Neutral"
return polarity
def predict_multi_aspect(text):
# Tách câu dài hoặc đoạn nhiều câu bằng các từ như "but", "except", "yet", dấu chấm
sentences = re.split(r'[.!?;]|but |except |yet |although |though |however |despite |or |nor', text)
aspects = ""
polarities = ""
aspectArray=[]
# Tiền xử lý và dự đoán các khía cạnh trong từng câu
for sentence in sentences:
# Tiền xử lý văn bản
preprocessed = preprocess(sentence)
aspect =""
polarity=""
if(len(preprocessed)>2):
vec = tfidf_vectorizer.transform([preprocessed])
aspect = predict_aspect(vec)
polarity = predict_polarity(vec)
# Dự đoán các khía cạnh trong câu
if aspect !="" and aspect not in aspectArray:
aspects+= " | "+aspect
aspectArray.append(aspect)
if polarity !="": polarities+=" | "+polarity
if(aspects!=""): aspects = aspects[3:]
if(polarities!=""): polarities = polarities[3:]
# Loại bỏ các câu null
return aspects, polarities
# Create Streamlit app
st.title("Aspect-based sentiment analysis on IoT store comments")
st.write("This application was built based on the model we used to analyze data in our research. Although we still maintain the same framework as presented in the research process, some changes in the processing procedure have been made.")
st.write("Please input a short sample comment expressing your viewpoint on the experience at retail stores with IoT applications. Our model is built on English language data, so please enter an English comment to allow the model to classify it accurately.")
st.write("An example of a comment is: 'The smart cart is incredible'")
text = st.text_area("Text", height=200)
if st.button("Classify"):
aspect, polarity = predict_multi_aspect(text)
st.write("Sentiment: ", polarity)
st.write("Aspect: ", aspect)