Я использую код ниже в python и получаю ошибку, даже если читаю только одну запись из файла:
import pandas as pd
import numpy as np
from pandas import DataFrame as df
from nltk.corpus import wordnet
import string
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.tokenize import WhitespaceTokenizer
from nltk.stem import WordNetLemmatizer
reviews_df = pd.read_csv("./hoteldata/Hotel_Reviews.csv",header=0, nrows = 100)
reviews_df["review"] = reviews_df["Positive_Review"].astype(str) + reviews_df["Negative_Review"]
reviews_df["is_bad_review"] = reviews_df["Reviewer_Score"].apply(lambda x:1 if x < 5 else 0)
reviews_df = reviews_df[["review","is_bad_review"]]
reviews_df = reviews_df["review"].apply(lambda x: x.replace("No Negative","")).replace("No positive","")
def cleanText(text):
# lower case
text = text.lower()
# tokenize text and remove punctuation
text = [word.strip(string.punctuation) for word in text.split(" ")]
#remove words which contains numbers
text = [word for word in text if not any( c.isdigit() for c in word)]
#remove stop words
stop = stopwords.word("english")
text = [x for x in text if x not in stop]
# remove empty tokens
text = [t for t in text if len(t) > 0]
# pos tag text
pos_tags = pos_tag(text)
# lemmatize text
text = [WordNetLemmatizer.lemmatize(t[0],getWordnetPos(t[1])) for t in pos_tags]
def getWordnetPos(pos_tag):
if pos_tag.startwith('J'):
return wordnet.ADJ
if pos_tag.startwith('V'):
return wordnet.VERB
if pos_tag.startwith('N'):
return wordnet.NOUN
if pos_tag.startwith('R'):
return wordnet.ADV
else:
return wordnet.NOUN
# clean text data
reviews_df["review_clean"] = reviews_df["review"].apply(lambda x: cleanText(reviews_df["review"]))
Ошибка:
File "pandas\_libs\hashtable_class_helper.pxi", line 958, in pandas._libs.hashtable.Int64HashTable.get_item
TypeError: an integer is required
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/My Code/R sample/badCustomerDetection.py", line 70, in <module>
print(reviews_df["review"])
File "C:\Users\abhishek.rai\AppData\Local\Continuum\anaconda3\envs\R sample\lib\site-packages\pandas\core\series.py", line 767, in __getitem__
result = self.index.get_value(self, key)
File "C:\Users\abhishek.rai\AppData\Local\Continuum\anaconda3\envs\R sample\lib\site-packages\pandas\core\indexes\base.py", line 3118, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas\_libs\index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value
File "pandas\_libs\index.pyx", line 164, in pandas._libs.index.IndexEngine.get_loc
KeyError: 'review'
Process finished with exit code 1
Даже для одногозапись, я получаю ошибку.Ниже приведен экран печати данных.Данные присутствуют в ниже местоположения .