Вот все установки и импорт:
!pip install wordcloud
!pip install fileupload
!pip install ipywidgets
!jupyter nbextension install --py --user fileupload
!jupyter nbextension enable --py fileupload
import wordcloud
import numpy as np
from matplotlib import pyplot as plt
from IPython.display import display
import fileupload
import io
import sys
Это виджет загрузки:
from IPython.display import display
import fileupload
uploader = fileupload.FileUploadWidget()
def _handle_upload(change):
w = change['owner']
with open(w.filename, 'wb') as f:
f.write(w.data)
print('Uploaded `{}` ({:.2f} kB)'.format(
w.filename, len(w.data) / 2**10))
uploader.observe(_handle_upload, names='data')
display(uploader)
Программа Wordcloud:
def calculate_frequencies(file_contents):
# Here is a list of punctuations and uninteresting words you can use to process your text
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
uninteresting_words = ["the", "a", "to", "if", "is", "it", "of", "and", "or", "an", "as", "i", "me", "my", \
"we", "our", "ours", "you", "your", "yours", "he", "she", "him", "his", "her", "hers", "its", "they", "them", \
"their", "what", "which", "who", "whom", "this", "that", "am", "are", "was", "were", "be", "been", "being", \
"have", "has", "had", "do", "does", "did", "but", "at", "by", "with", "from", "here", "when", "where", "how", \
"all", "any", "both", "each", "few", "more", "some", "such", "no", "nor", "too", "very", "can", "will", "just"]
# LEARNER CODE START HERE
non_punctuation_text=""
for char in file_contents:
if char not in punctuations:
non_punctuation_text=non_punctuation_text+char
words=non_punctuation_text.split()
clean_words=[]
frequencies={}
for word in words:
if word.isalpha():
if word not in uninteresting_words:
clean_words.append(word)
for alpha_word in clean_words:
if alpha_word not in frequencies:
frequencies[alpha_word]=1
else:
frequencies[alpha_word]+=1
#wordcloud
cloud = wordcloud.WordCloud()
cloud.generate_from_frequencies(frequencies)
return cloud.to_array()
Показать наш wordcloud image:
myimage = calculate_frequencies(file_contents)
plt.imshow(myimage, interpolation = 'nearest')
plt.axis('off')
plt.show()
Сообщение об ошибке:
NameError Traceback (most recent call last)
<ipython-input-2-fd0f708f372c> in <module>
1 # Display your wordcloud image
2
----> 3 myimage = calculate_frequencies(file_contents)
4 plt.imshow(myimage, interpolation = 'nearest')
5 plt.axis('off')
NameError: name 'file_contents' is not defined
какую часть параметра file_contents
мне не хватает? Почему это не отображает Wordcloud? Я пытался загрузить разные документы .txt
из виджета uploader
, но ни один из них не помог.