-
Notifications
You must be signed in to change notification settings - Fork 0
/
SENT_DATA_FINAL.py
171 lines (143 loc) · 7.66 KB
/
SENT_DATA_FINAL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import streamlit as st
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from collections import Counter
from wordcloud import WordCloud
import numpy as np
from nltk.corpus import stopwords
import re
# Load the model
model = joblib.load('model.pkl')
# Load the vectorizer
vectorizer = joblib.load('vectorizer.pkl')
# Define function to preprocess text
def preprocess_text(text):
# Implement your preprocessing logic here
return text
# Define function to predict sentiment
def predict_sentiment(text):
# Preprocess text
preprocessed_text = preprocess_text(text)
# Vectorize preprocessed text
vectorized_text = vectorizer.transform([preprocessed_text])
# Predict sentiment
sentiment = model.predict(vectorized_text)[0]
return sentiment
# Streamlit UI
def main():
st.title('Sentiment Analysis App')
# Input textarea for user to enter reviews
reviews_input = st.text_area('Enter up to 10,000 reviews (one review per line):', height=200)
# Button to analyze reviews
if st.button('Analyze Reviews'):
if reviews_input:
# Split reviews into individual lines
reviews_list = reviews_input.split('\n')
# Predict sentiment for each review
sentiments = [predict_sentiment(review) for review in reviews_list]
# Count positive, negative, and neutral sentiments
positive_count = sentiments.count('Positive')
negative_count = sentiments.count('Negative')
neutral_count = sentiments.count('Neutral')
# Calculate percentage of each sentiment
total_reviews = len(sentiments)
positive_percentage = (positive_count / total_reviews) * 100
negative_percentage = (negative_count / total_reviews) * 100
neutral_percentage = (neutral_count / total_reviews) * 100
# Overall sentiment analysis
overall_sentiment = 'Positive' if positive_count > negative_count else 'Negative'
# Display sentiment analysis results
st.subheader('Sentiment Analysis Results:')
st.write(f'Positive Sentiment: {positive_percentage:.2f}%')
st.write(f'Negative Sentiment: {negative_percentage:.2f}%')
st.write(f'Neutral Sentiment: {neutral_percentage:.2f}%')
st.write(f'Overall Sentiment: {overall_sentiment}')
# Plot sentiment distribution
sentiment_data = pd.DataFrame({
'Sentiment': ['Positive', 'Negative', 'Neutral'],
'Percentage': [positive_percentage, negative_percentage, neutral_percentage]
})
fig, ax = plt.subplots()
ax.bar(sentiment_data['Sentiment'], sentiment_data['Percentage'])
ax.set_xlabel('Sentiment')
ax.set_ylabel('Percentage')
ax.set_title('Sentiment Distribution')
st.pyplot(fig)
# Find most common words for each sentiment
positive_reviews = [review for review, sentiment in zip(reviews_list, sentiments) if sentiment == 'Positive']
negative_reviews = [review for review, sentiment in zip(reviews_list, sentiments) if sentiment == 'Negative']
neutral_reviews = [review for review, sentiment in zip(reviews_list, sentiments) if sentiment == 'Neutral']
# Tokenize and preprocess reviews
stop_words = set(stopwords.words('english'))
positive_words = ' '.join(positive_reviews).split()
negative_words = ' '.join(negative_reviews).split()
neutral_words = ' '.join(neutral_reviews).split()
# Remove stop words, single characters, and numbers
positive_words = [word for word in positive_words if word not in stop_words and len(word) > 1 and not word.isdigit()]
negative_words = [word for word in negative_words if word not in stop_words and len(word) > 1 and not word.isdigit()]
neutral_words = [word for word in neutral_words if word not in stop_words and len(word) > 1 and not word.isdigit()]
# Calculate most common words
positive_word_counts = Counter(positive_words)
negative_word_counts = Counter(negative_words)
neutral_word_counts = Counter(neutral_words)
# Plot most common words for each sentiment
st.subheader('Most Common Words:')
st.write('Most Positive Words:')
st.bar_chart(pd.DataFrame(positive_word_counts.most_common(10), columns=['Word', 'Count']).set_index('Word'))
st.write('Most Negative Words:')
st.bar_chart(pd.DataFrame(negative_word_counts.most_common(10), columns=['Word', 'Count']).set_index('Word'))
st.write('Most Neutral Words:')
st.bar_chart(pd.DataFrame(neutral_word_counts.most_common(10), columns=['Word', 'Count']).set_index('Word'))
# Tokenize and preprocess reviews
positive_words = ' '.join(positive_reviews)
negative_words = ' '.join(negative_reviews)
neutral_words = ' '.join(neutral_reviews)
# Create word cloud for each sentiment
if positive_words:
st.subheader('Word Clouds:')
st.write('Positive Words:')
positive_wordcloud = WordCloud(color_func=lambda *args, **kwargs: "green").generate(positive_words)
st.image(positive_wordcloud.to_array(), caption='Positive Word Cloud', use_column_width=True)
else:
st.write('No positive words to plot.')
if negative_words:
st.write('Negative Words:')
negative_wordcloud = WordCloud(color_func=lambda *args, **kwargs: "red").generate(negative_words)
st.image(negative_wordcloud.to_array(), caption='Negative Word Cloud', use_column_width=True)
else:
st.write('No negative words to plot.')
if neutral_words:
st.write('Neutral Words:')
neutral_wordcloud = WordCloud(color_func=lambda *args, **kwargs: "gray").generate(neutral_words)
st.image(neutral_wordcloud.to_array(), caption='Neutral Word Cloud', use_column_width=True)
else:
st.write('No neutral words to plot.')
# Generate 3D word cloud
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
# Add words from each sentiment class
add_word_cloud(ax, positive_words, color='green')
add_word_cloud(ax, negative_words, color='red')
add_word_cloud(ax, neutral_words, color='gray')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('3D Word Cloud')
st.write(fig)
else:
st.write('Please enter some reviews.')
def add_word_cloud(ax, text, color='blue'):
if text:
words = text.split() # Split the text into individual words
wordcloud = WordCloud(width=800, height=800, background_color=None, mode='RGBA', color_func=lambda *args, **kwargs: color).generate(text)
x = np.random.rand(100) * 100
y = np.random.rand(100) * 100
z = np.random.rand(100) * 100
ax.scatter(x, y, z)
for i in range(min(len(words), len(x)) - 1): # Ensure index doesn't exceed word count or coordinate count
ax.text(x[i], y[i], z[i], words[i], color=color, fontsize=12)
else:
st.write('No words to plot for the word cloud.')
if __name__ == '__main__':
main()