Кодек utf-8 не может декодировать байт 0xe3 в позиции 87 word2vec gensim - PullRequest
0 голосов
/ 24 сентября 2019

У меня есть код

import time
import multiprocessing
from datetime import timedelta
from gensim.models import word2vec
start_time = time.time()
print('Training Word2Vec Model...')
sentences = word2vec.LineSentence('data/data_text.txt')
id_w2v = word2vec.Word2Vec(sentences, size=300, workers=multiprocessing.cpu_count()-1)
id_w2v.save('model_terbaru/word2vec_300.model')

, когда я делаю модель, у меня появляется ошибка

Traceback (most recent call last):

File"<ipython-input-10-fc7016864a34>", line 1, in <module>

        runfile('F:/pa reza/model.py', wdir='F:/pa reza')

File "C:\ProgramData\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 704, in runfile
    execfile(filename, namespace)

File "C:\ProgramData\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

File "F:/pa reza/model.py", line 13, in <module>
    iter=10)

Файл "C: \ ProgramData \ Anaconda \ lib \ site-packages \ gensim \"models \ word2vec.py ", строка 527, в init fast_version = FAST_VERSION)

 File "C:\ProgramData\Anaconda\lib\site-packages\gensim\models\base_any2vec.py", line 335, in __init__
        self.build_vocab(sentences, trim_rule=trim_rule)

File "C:\ProgramData\Anaconda\lib\site-packages\gensim\models\base_any2vec.py", line 480, in build_vocab
    sentences, progress_per=progress_per, trim_rule=trim_rule)

File "C:\ProgramData\Anaconda\lib\site-packages\gensim\models\word2vec.py", line 1151, in scan_vocab
    for sentence_no, sentence in enumerate(sentences):

File "C:\ProgramData\Anaconda\lib\site-packages\gensim\models\word2vec.py", line 1073, in __iter__
    line = utils.to_unicode(line).split()

File "C:\ProgramData\Anaconda\lib\site-packages\gensim\utils.py", line 359, in any2unicode

return unicode(text, encoding, errors=errors)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe3 in position 87: invalid continuation byte

помогите мне ....

...