#i developed this for review purpose of hotel and it gave an accuracy of 90%
#i have used ann deep learning with nlp
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def negate(text):
negation = False
result = []
prev = None
pprev = None
for word in text:
negated = "not_" + word if negation else word
result.append(negated)
if any(neg in word for neg in ["not", "n't", "no"]):
negation = True
else:
negation=False
return result
Импорт набора данных
dataset = pd.read_csv('Restaurant_Reviews.tsv',delimiter="\t",quoting=3)
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus=[]
from autocorrect import spell
for i in range(1000):
review=re.sub('[^a-zA-Z]',' ',dataset.values[i,0])
review=review.lower()
review=review.split()
ps=PorterStemmer()
review=negate(review)
review=[(ps.stem(word)) for word in review if not word in
review=" ".join(review)
corpus.append(review)
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=1500)
X=cv.fit_transform(corpus).toarray()
y=dataset.iloc[:,1].values
lm=cv.vocabulary_
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
classifier=Sequential()
classifier.add(Dense(50,input_shape=(1500,),kernel_initializer='uniform',activation='relu'))
classifier.add(Dropout(rate=0.45))
classifier.add(Dense(30,kernel_initializer='uniform',activation='relu'))
classifier.add(Dropout(rate=0.45))
classifier.add(Dense(1,kernel_initializer='uniform',activation='sigmoid'))
classifier.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])
classifier.fit(X_train,y_train,batch_size=32,epochs=50)
y_pred=classifier.predict(X_test)
y_pred=list(map(int,(y_pred>0.5)))
y_pred=np.reshape(y_pred,(200,))
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
#to test the model
review="i good to hang here"
review=re.sub('[^a-zA-Z]',' ',review)
review=review.lower()
review=review.split()
ps=PorterStemmer()
review=negate(review)
review=[ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
review=" ".join(review)
k=cv.transform([review]).toarray()
tt=classifier.predict(k)