фляга _pickle.PicklingError: - PullRequest
       1

фляга _pickle.PicklingError:

0 голосов
/ 12 февраля 2019

Я новичок в флеше, я пытаюсь реализовать мою модель классификатора текста (Bag of word) в python и развернуть ее с помощью веб-приложения фляги.но я получил ошибку при переходе на другие страницы с помощью следующего кода:

Реализация с использованием наивного байесовского классификатора, он будет отображаться на странице новостей, которая дает результат pos или neg.из колбы импорт Flask, render_template, url_for, запрос импорта панд как pd import pickle из sklearn.externals import joblib

import matplotlib.pyplot as plt
import csv
from textblob import TextBlob
import sklearn
import _pickle as cPickle
import numpy as np
from scipy.sparse.csr import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split,GridSearchCV,learning_curve
from sklearn.tree import DecisionTreeClassifier 
from sklearn.model_selection import StratifiedKFold
from sklearn.externals import joblib

app = Flask(__name__)



@app.route('/')
def home():
    return render_template('home.html')

@app.route('/predict',methods=['POST'])
def predict():
    messages = pd.read_csv('bitcoin_reddit.csv', usecols=["title","class"])
    messages['length'] = messages['title'].map(lambda text: len(text))

    def split_into_tokens(title):
        return TextBlob(title).words

    def split_into_lemmas(title):
        words = TextBlob(title).words.lower()
        # for each word, take its "base form" = lemma 
        return [word.lemma for word in words]

    bow_transformer = CountVectorizer(analyzer=split_into_lemmas).fit(messages['title'])
    messages_bow = bow_transformer.transform(messages['title'])

    tfidf_transformer = TfidfTransformer().fit(messages_bow) #normalization can be done with TF-IDF
    messages_tfidf = tfidf_transformer.transform(messages_bow)

    #Training Model NB
    spam_detector = MultinomialNB().fit(messages_tfidf, messages['class'])
    all_predictions = spam_detector.predict(messages_tfidf)

    msg_train, msg_test, label_train, label_test = \
    train_test_split(messages['title'], messages['class'], test_size=0.2)

    pipeline = Pipeline([
        ('bow', CountVectorizer(analyzer=split_into_lemmas)),  # strings to token integer counts
        ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores
        ('classifier', MultinomialNB())]) # train on TF-IDF vectors w/ Naive Bayes classifier


    scores = cross_val_score(pipeline,  # steps to convert raw messages into models
                             msg_train,  # training data
                             label_train,  # training labels
                             cv=10,  # split data randomly into 10 parts: 9 for training, 1 for scoring
                             scoring='accuracy')  # which scoring metric? 

    params = {
    'tfidf__use_idf': (True, False),
    'bow__analyzer': (split_into_lemmas, split_into_tokens),
    }

    grid = GridSearchCV(
    pipeline,  # pipeline from above
    params,  # parameters to tune via cross validation
    refit=True,  # fit using all available data at the end, on the best found param combination
    scoring='accuracy',  # what score are we optimizing?
    cv=StratifiedKFold(n_splits=5))  # what type of cross validation to use

    nb_detector = grid.fit(msg_train, label_train)
    predictions = nb_detector.predict(msg_test)

    joblib.dump(nb_detector, 'NB_model_bow.pkl')

    if request.method == 'POST':
        message = request.form['message']
        data = [message]
        vect = cv.transform(data).toarray()
        my_prediction = nb_detector.predict(vect)
    return render_template('result.html',prediction = my_prediction)



if __name__ == '__main__':
    app.run(debug=True)

Но я получил такую ​​ошибку

_pickle.PicklingError: Can't pickle <function predict.<locals>.split_into_lemmas at 0x000001ABF618AE18>: it's not found as __main__.predict.<locals>.split_into_lemmas
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...