Попытка импортировать word_topic_vectors
из пакета nlpia
. Приведенный ниже код вызывает ошибку UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 360: character maps to <undefined>
.
Код:
from nlpia.book.examples.ch04_catdog_lsa_3x6x16 import word_topic_vectors
Ошибка:
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-54-4bbfbc931491> in <module>
----> 1 from nlpia.book.examples.ch04_catdog_lsa_3x6x16 import word_topic_vectors
d:\python\lib\site-packages\nlpia\book\examples\ch04_catdog_lsa_3x6x16.py in <module>
128
129 # do it all over again on a tiny portion of the corpus and vocabulary
--> 130 corpus = get_data('cats_and_dogs_sorted')[:NUM_PRETTY]
131 docs = normalize_corpus_words(corpus)
132 tfidfer = TfidfVectorizer(min_df=1, max_df=.99, stop_words=None, token_pattern=r'(?u)\b\w+\b',
d:\python\lib\site-packages\nlpia\loaders.py in get_data(name, nrows, limit)
1111 return filepaths[name]
1112 elif name in DATASET_NAME2FILENAME:
-> 1113 return read_named_csv(name, nrows=nrows)
1114 elif name in DATA_NAMES:
1115 return read_named_csv(DATA_NAMES[name], nrows=nrows)
d:\python\lib\site-packages\nlpia\loaders.py in read_named_csv(name, data_path, nrows, verbose)
1003 name = DATASET_NAME2FILENAME[name]
1004 if name.lower().endswith('.txt') or name.lower().endswith('.txt.gz'):
-> 1005 return read_text(os.path.join(data_path, name), nrows=nrows)
1006 else:
1007 return read_csv(os.path.join(data_path, name), nrows=nrows)
d:\python\lib\site-packages\nlpia\futil.py in read_text(forfn, nrows, verbose)
416 """
417 tqdm_prog = tqdm if verbose else no_tqdm
--> 418 nrows = wc(forfn, nrows=nrows) # not necessary when nrows==None
419 lines = np.empty(dtype=object, shape=nrows)
420 with ensure_open(forfn) as f:
d:\python\lib\site-packages\nlpia\futil.py in wc(f, verbose, nrows)
48 tqdm_prog = tqdm if verbose else no_tqdm
49 with ensure_open(f, mode='r') as fin:
---> 50 for i, line in tqdm_prog(enumerate(fin)):
51 if nrows is not None and i >= nrows - 1:
52 break
d:\python\lib\encodings\cp1252.py in decode(self, input, final)
21 class IncrementalDecoder(codecs.IncrementalDecoder):
22 def decode(self, input, final=False):
---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0]
24
25 class StreamWriter(Codec,codecs.StreamWriter):
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 360: character maps to <undefined>
ОС Подробности: Windows 10
Python Версия: 3.6.0