Во время обучения я делаю это:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.externals import joblib
vectorizer = TfidfVectorizer(decode_error='ignore')
docs = ['this is a test','another test']
# tokenize and build vocab
vectorizer.fit_transform(docs)
joblib.dump(vectorizer, open("tfidf1.pkl", "wb"))
Затем во время теста:
vectorizer = TfidfVectorizer(decode_error='ignore')
vectozier = joblib.load(open("tfidf1.pkl", 'rb'))
feature_names = vectorizer.get_feature_names()
Но всегда выдает ошибку:
raise NotFittedError("Vocabulary not fitted or provided")
sklearn.exceptions.NotFittedError: Vocabulary not fitted or provided