Я новичок в Python. Я работаю в модели машинного обучения классификации спама. Я пытаюсь обработать объект модели, а затем запустить код и получить сообщение об ошибке:
EOFError: закончился ввод.
Я проверил файл рассола, это пусто. В чем причина пустого файла рассола? Мой код ниже
'code for preprocessing and training'
'''
def datapreprocessing(text):
nopunct = [char for char in text if char not in string.punctuation]
nopunct = ''.join(nopunct)
cleanwords = [word for word in nopunct.split() if word.lower() not in stopwords.words('english')]
return cleanwords
'''
'''
def training(data):
# importing dataset
#data = pd.read_csv("spam_ham_dataset.csv")
# creating new dataframe and omit unwanted columns
spam_data = data[['text', 'label_num']].copy()
# print(spam_data)
# checking for duplicates and remove them
spam_data.drop_duplicates(inplace=True)
# show the number of missing data for each column
spam_data.isnull().sum()
# Need to download stopwords
nltk.download('stopwords')
spam_data['text'].head().apply(datapreprocessing)
#convert the text into vector format
vect = CountVectorizer(analyzer = datapreprocessing)
X = vect.fit_transform(spam_data['text'])
print(X)
vec_file = "vectorizer.pickle"
with open(vec_file,'wb') as f:
saved_vec = pickle.dump(vect,f)
#Split the data into training and test data set
X_train,X_test,y_train,y_test = train_test_split(X,spam_data['label_num'],test_size= 0.2,random_state= 0)
#train the model
classifier = MultinomialNB()
classifier.fit(X_train,y_train)
pkl_filename = "pickle_model.pkl"
with open(pkl_filename,'wb') as file:
saved_model = pickle.dump(classifier,file)
y_pred = classifier.predict(X_test)
print(y_pred)
print(y_train.values)
acc_score = accuracy_score(y_test,y_pred)
con_mat = confusion_matrix(y_test,y_pred)
print(acc_score)
print(con_mat)
'''
'code for new predictions'
'''
def predict(review_spam):
print(">>>>>>>>predict>>>>>>>")
data = review_spam.message
print("message:",data)
#df = pd.DataFrame(d1,index=[0],columns = ['message','msg_type'])
#print("DataFrame:",df[['message']])
d1 = datapreprocessing(data)
print("data:",d1)
print(type(d1))
vec_file = "vectorizer.pickle"
vec_file = os.path.dirname(__file__)+"/"+vec_file
with open(vec_file, 'rb') as f:
loaded_vectorizer = pickle.load(f)
X = loaded_vectorizer.transform(d1).toarray()
print(X)
pkl_filename = "pickle_model.pkl"
pkl_filename = os.path.dirname(__file__)+"/"+pkl_filename
with open(pkl_filename, 'rb') as file:
loaded_model = pickle.load(file)
pred = loaded_model.predict(X)
print("prediction:",pred)
#return pred
'''
'''
if __name__ =='__main__':
data = pd.read_csv("spam_ham_dataset.csv")
training(data)
'''