У меня есть пример python скрипта, подобного этому
import nltk
from nltk.collocations import *
bigram_measures = nltk.collocations.BigramAssocMeasures()
finder = BigramCollocationFinder.from_words(
nltk.corpus.genesis.words('/Users/anonymous/Desktop/text.txt')
)
finder.apply_freq_filter(3)
finder.nbest(bigram_measures.pmi, 5)
Я не могу его запустить, потому что я столкнулся со следующей ошибкой:
Traceback (most recent call last):
File "text2tag.py", line 10, in <module>
nltk.corpus.genesis.words('/Users/anonymous/Desktop/text.txt')
File "/Users/anonymous/.virtualenvs/playground/lib/python3.6/site-packages/nltk/collocations.py", line 178, in from_words
for window in ngrams(words, window_size, pad_right=True):
File "/Users/anonymous/.virtualenvs/playground/lib/python3.6/site-packages/nltk/util.py", line 525, in ngrams
next_item = next(sequence)
File "/Users/anonymous/.virtualenvs/playground/lib/python3.6/site-packages/nltk/corpus/reader/util.py", line 296, in iterate_from
tokens = self.read_block(self._stream)
File "/Users/anonymous/.virtualenvs/playground/lib/python3.6/site-packages/nltk/corpus/reader/plaintext.py", line 134, in _read_word_block
words.extend(self._word_tokenizer.tokenize(stream.readline()))
File "/Users/anonymous/.virtualenvs/playground/lib/python3.6/site-packages/nltk/tokenize/regexp.py", line 133, in tokenize
return self._regexp.findall(text)
TypeError: cannot use a string pattern on a bytes-like object