Я хочу удалить «Дэн» в процессе фильтрации, но не сработало.вот мой код
for row in readCSV:
_word = []
username = row[0]
date = row[1]
text = row[2].lower()
text = re.sub(r'@[A-Za-z0-9_]+','',text)
text = re.sub(r'http\S+', '',text)
text = replaceMultiple(text, ["!","@","#","$","%","^","&","*","(",
")","_","-","+","=","{","}","[","]",
"\\","/",",",".","?","<",">",":",";",
"'",'"',"~","0","1","2","3","4","5","6","7","8","9"], '')
text = text.strip()
nltk_tokens = nltk.word_tokenize(text)
stop_words = set(stopwords.words("indonesian"))
stop_words_new = ['aku','dan','duh','hhhmmm','thn','nih','tgl',
'hai','jazz','bro','broo','msh','']
new_stopwords_list = stop_words.union(stop_words_new)
слова в stop_words_new удалены, кроме 'dan'.Зачем?