AttributeError: у объекта «DataFrame» нет атрибута «is_impossible»: - я получил эту ошибку при работе с файлом Json [SQUAD DATASET] - PullRequest
0 голосов
/ 10 марта 2020
from collections import Counter
import re
import numpy as np
import pandas as pd
from nltk.tokenize import word_tokenize
from keras.models import Model, load_model
from keras.layers import Input, Dense, GRU, Masking, Lambda, Bidirectional, Dropout, Reshape
from keras.preprocessing.sequence import pad_sequences
from keras import regularizers
from keras.optimizers import Adam
from keras.utils import multi_gpu_model
import keras.backend as K
import tensorflow as tf
import matplotlib.pyplot as plt

train_df_all = pd.read_json('C:\Users\privus\Desktop\Q & A\Question Answering System\Data\train_file.json').reset_index(drop=True)

dev_df_all=pd.read_json('C:\Users\privus\Desktop\Q & A\Question Answering System\Data\validation_data.json').reset_index(drop=True)



# Dataframes consisting only of 'possible' to answer questions.
train_df = train_df_all.loc[[train_df_all.is_impossible == False,["context", "question", "answer_text", "answer_start", "title"]]].reset_index(drop=True)

# This is the testing dataset.
dev_df = dev_df_all.loc[train_df_all.is_impossible == False,["context", "question", "answer_text", "answer_start", "title"]].reset_index(drop=True)

train_df.answer_start = train_df_all.answer_start.astype(int)

dev_df.answer_start = dev_df_all.answer_start.astype(int)

# Extracting training data
contexts, questions, answers, answer_start = (train_df.context.values, train_df.question.values, train_df.answer_text.values, train_df.answer_start.values)

# Array of answer end indeces in the context.
answer_end = np.array([answer_start[idx] + len(answers[idx]) for idx in range(len(answer_start))])

ОШИБКА: -

AttributeError Traceback (most recent call last) in 4 #print(train_df_all) 5 # Dataframes consisting only of 'possible' to answer questions. ----> 6 train_df = train_df_all.loc[[train_df_all.is_impossible == False,["context", "question", "answer_text", "answer_start", "title"]]].reset_index(drop=True)#.loc[train_df_all.is_impossible == False, 7 # This is the testing dataset. 8 dev_df = dev_df_all.loc[train_df_all.is_impossible == False,["context", "question", "answer_text", "answer_start", "title"]].reset_index(drop=True) #replacing is_impossibel with impossible_question #.loc[dev_df_all.is_impossible == False,

~\Anaconda3\lib\site-packages\pandas\core\generic.py in getattr(self, name) 5272 f = lambda: self._data.consolidate() 5273 cons_data = self._protect_consolidate(f) -> 5274 return self._constructor(cons_data).finalize(self) 5275 5276 @property

AttributeError: 'DataFrame' object has no attribute 'is_impossible'
...