Получение ошибки в pandas._libs.index.get_value_box - PullRequest
0 голосов
/ 28 января 2019

Я использую код ниже в python и получаю ошибку, даже если читаю только одну запись из файла:

import pandas as pd
import numpy as np
from pandas import DataFrame as df
from nltk.corpus import wordnet
import string
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.tokenize import WhitespaceTokenizer
from nltk.stem import WordNetLemmatizer

reviews_df = pd.read_csv("./hoteldata/Hotel_Reviews.csv",header=0, nrows = 100)
reviews_df["review"] = reviews_df["Positive_Review"].astype(str) + reviews_df["Negative_Review"]
reviews_df["is_bad_review"] = reviews_df["Reviewer_Score"].apply(lambda x:1 if x < 5 else 0)
reviews_df = reviews_df[["review","is_bad_review"]]
reviews_df = reviews_df["review"].apply(lambda x: x.replace("No Negative","")).replace("No positive","")

def cleanText(text):
    # lower case
    text = text.lower()

    # tokenize text and remove punctuation
    text = [word.strip(string.punctuation) for word in text.split(" ")]

    #remove words which contains numbers
    text = [word for word in text if not any( c.isdigit() for c in word)]

    #remove stop words
    stop = stopwords.word("english")
    text = [x for x in text if x not in stop]

    # remove empty tokens
    text = [t for t in text if len(t) > 0]

    # pos tag text
    pos_tags = pos_tag(text)

    # lemmatize text
    text = [WordNetLemmatizer.lemmatize(t[0],getWordnetPos(t[1])) for t in pos_tags]

def getWordnetPos(pos_tag):
    if pos_tag.startwith('J'):
        return wordnet.ADJ
    if pos_tag.startwith('V'):
        return wordnet.VERB
    if pos_tag.startwith('N'):
        return wordnet.NOUN
    if pos_tag.startwith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

# clean text data
reviews_df["review_clean"] = reviews_df["review"].apply(lambda x: cleanText(reviews_df["review"]))

Ошибка:

File "pandas\_libs\hashtable_class_helper.pxi", line 958, in pandas._libs.hashtable.Int64HashTable.get_item
TypeError: an integer is required

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "D:/My Code/R sample/badCustomerDetection.py", line 70, in <module>
    print(reviews_df["review"])
  File "C:\Users\abhishek.rai\AppData\Local\Continuum\anaconda3\envs\R sample\lib\site-packages\pandas\core\series.py", line 767, in __getitem__
    result = self.index.get_value(self, key)
  File "C:\Users\abhishek.rai\AppData\Local\Continuum\anaconda3\envs\R sample\lib\site-packages\pandas\core\indexes\base.py", line 3118, in get_value
    tz=getattr(series.dtype, 'tz', None))
  File "pandas\_libs\index.pyx", line 106, in pandas._libs.index.IndexEngine.get_value
  File "pandas\_libs\index.pyx", line 114, in pandas._libs.index.IndexEngine.get_value
  File "pandas\_libs\index.pyx", line 164, in pandas._libs.index.IndexEngine.get_loc
KeyError: 'review'

Process finished with exit code 1

Даже для одногозапись, я получаю ошибку.Ниже приведен экран печати данных.Данные присутствуют в ниже местоположения .

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...