Python pickle не может загрузить файл в веб-приложение колбы, AttributeError: модель '__main__' не имеет атрибута
/ 29 декабря 2018

У меня есть веб-приложение для колб, которое требует загрузки модели scikit-learn.Модель создается с помощью функции build_model и сохраняется в файл с вызовом classifier1.pkl с помощью функции save_model.Обе эти функции находятся в файле Python.Ниже приведено содержимое этого файла Python:

# import .....

def load_data(database_filepath):
    Load the cleaned dataset.

def tokenize(text):
    Convert given text into tokens.

    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()

    return clean_tokens

def build_model():
    Construct a scikit-learn pipeline and use GridSearchCV method to
    tune the pipelines hyperparameters.

        model: The scikit-learn pipeline model.
    pipeline = Pipeline([
        ('vect', CountVectorizer(tokenizer=tokenize)),
        ('tfidf', TfidfTransformer()),
        ('clf', MultiOutputClassifier(RandomForestClassifier()))

    parameters = {
        'vect__ngram_range': ((1, 1), (1, 2)),
        'vect__max_df': (0.5, 0.75, 1.0),
        'vect__max_features': (None, 5000),
        'tfidf__use_idf': (True, False),
        'clf__estimator__n_estimators': [10, 20],
        'clf__estimator__min_samples_split': [2, 3]

    model = GridSearchCV(pipeline, param_grid=parameters,
                      verbose=2, return_train_score=False, n_jobs=5)

    return model

def evaluate_model(model, X_test, Y_test, category_names):
    Use model to perform predictions

        model: Model using to perform predictions.
        X_test: Test messages.
        Y_test: True values of the categories for corresponding messages.
        category_names: the name of each category
    Y_pred = model.predict(X_test)
    print(classification_report(Y_test, Y_pred, target_names= category_names))

def save_model(model, model_filepath):
    Save the model in a pickle file.

        model: Model to be saved.
        model_filepath: the file path of the saved model.
    with open(model_filepath, 'wb') as f:
        pickle.dump(model, f)

def main():
    if len(sys.argv) == 3:
        database_filepath, model_filepath = sys.argv[1:]
        print('Loading data...\n    DATABASE: {}'.format(database_filepath))
        X, Y, category_names = load_data(database_filepath)
        X_train, X_test, Y_train, Y_test = train_test_split(
            X, Y, test_size=0.2)

        print('Building model...')
        model = build_model()

        print('Training model...'), Y_train)

        print('Evaluating model...')
        evaluate_model(model, X_test, Y_test, category_names)

        print('Saving model...\n    MODEL: {}'.format(model_filepath))
        save_model(model, model_filepath)

        print('Trained model saved!')

        print('Please provide the filepath of the disaster messages database '
              'as the first argument and the filepath of the pickle file to '
              'save the model to as the second argument. \n\nExample: python '
              ' ../data/DisasterResponse.db classifier.pkl')

if __name__ == '__main__':

Модель, построенная функцией build_model(), представляет собой объект scikit-learn pipeline, который использует функцию tokenize(text) в качестве входных данных.Затем модель обучается и оценивается, и, наконец, модель сохраняется в файле выбора.

Проблема заключается в том, что когда мой скрипт приложения загружает файл classifier1.pkl в Heroku, он показывает AttributeError сообщение.Я протестировал код в своей локальной консоли, он работает без ошибок.

Ниже приведены журналы платформы Heroku.

Traceback (most recent call last):
2018-12-29T12:14:09.796947+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/gunicorn/", line 583, in spawn_worker
2018-12-29T12:14:09.796949+00:00 app[web.1]: worker.init_process()
2018-12-29T12:14:09.796951+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/gunicorn/workers/", line 129, in init_process
2018-12-29T12:14:09.796952+00:00 app[web.1]: self.load_wsgi()
2018-12-29T12:14:09.796954+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/gunicorn/workers/", line 138, in load_wsgi
2018-12-29T12:14:09.796955+00:00 app[web.1]: self.wsgi =
2018-12-29T12:14:09.796958+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/gunicorn/app/", line 67, in wsgi
2018-12-29T12:14:09.796960+00:00 app[web.1]: self.callable = self.load()
2018-12-29T12:14:09.796961+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/gunicorn/app/", line 52, in load
2018-12-29T12:14:09.796963+00:00 app[web.1]: return self.load_wsgiapp()
2018-12-29T12:14:09.796965+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/gunicorn/app/", line 41, in load_wsgiapp
2018-12-29T12:14:09.796967+00:00 app[web.1]: return util.import_app(self.app_uri)
2018-12-29T12:14:09.796969+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/gunicorn/", line 350, in import_app
2018-12-29T12:14:09.796970+00:00 app[web.1]: __import__(module)
2018-12-29T12:14:09.796972+00:00 app[web.1]: File "/app/app/", line 37, in <module>
2018-12-29T12:14:09.796974+00:00 app[web.1]: model = joblib.load("models/classifier1.pkl")
2018-12-29T12:14:09.796976+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/sklearn/externals/joblib/", line 598, in load
2018-12-29T12:14:09.796977+00:00 app[web.1]: obj = _unpickle(fobj, filename, mmap_mode)
2018-12-29T12:14:09.796979+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/site-packages/sklearn/externals/joblib/", line 526, in _unpickle
2018-12-29T12:14:09.796980+00:00 app[web.1]: obj = unpickler.load()
2018-12-29T12:14:09.796982+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/", line 1050, in load
2018-12-29T12:14:09.796984+00:00 app[web.1]: dispatch[key[0]](self)
2018-12-29T12:14:09.796986+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/", line 1338, in load_global
2018-12-29T12:14:09.796987+00:00 app[web.1]: klass = self.find_class(module, name)
2018-12-29T12:14:09.796989+00:00 app[web.1]: File "/app/.heroku/python/lib/python3.6/", line 1392, in find_class
2018-12-29T12:14:09.796990+00:00 app[web.1]: return getattr(sys.modules[module], name)
2018-12-29T12:14:09.797163+00:00 app[web.1]: AttributeError: module '__main__' has no attribute 'tokenize'

Содержимое моего скрипта приложения:

#import ...  

app = Flask(__name__)

def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()

    return clean_tokens

# load data
engine = create_engine('sqlite:///data/DisasterResponse.db')
df = pd.read_sql_table('ResponseCategory', engine)

# load model
with open("models/classifier1.pkl", 'rb') as f:
        model = pickle.load(f)

def index():

    # extract data needed for visuals

    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    cate_counts = df[df.columns[-36:]].sum()
    cate_names = list(df.columns[-36:])

    # create visuals

    graphs = [
            'data': [

            'layout': {
                'title': 'Distribution of Message Genres',
                'yaxis': {
                    'title': "Count"
                'xaxis': {
                    'title': "Genre"


    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)

# web page that handles user query and displays model results

def go():
    # save user input in query
    query = request.args.get('query', '') 

    # use model to predict classification for query
    classification_labels = model.predict([query])[0]
    classification_results = dict(zip(df.columns[4:], classification_labels))

    # This will render the go.html Please see that file. 
    return render_template(

if __name__ == '__main__':

Структура папки моего проекта

Моя ОС - Ubuntu 18.04, версия Python - 3.6.7.
