См. Ниже результат моего сценария: я хотел бы убрать скобки и кавычки при заполнении CSV:
tagger = treetaggerwrapper.TreeTagger(TAGLANG='fr')
def lemmatize(text):
lemmatize_list_of_sentences= []
lemmatize_list_of_sentences2 = []
tags = tagger.tag_text(text)
tags2 = treetaggerwrapper.make_tags(tags, allow_extra = True)
lemmatize_list_of_sentences.append(tags2)
#print(lemmatize_list_of_sentences[0])
for subl in lemmatize_list_of_sentences: # loop in list of sublists
#Here you create a list to work as a "inner" sentence list.
sentence_lemmas = []
for word in subl:
if word.__class__.__name__ == "Tag":
lemme=word[2] # I want also to check if lemme[2] is empty and add this
lemmeOption2=lemme.split("|")
lemme=lemmeOption2[0] #There was a typo here
sentence_lemmas.append(lemme) #Here you append the lemma extracted
lemmatize_list_of_sentences2.append(sentence_lemmas)
joined_sentences= []
for lemma_list in lemmatize_list_of_sentences2:
joined_sentences.append(" ".join(lemma_list))
return joined_sentences
csv_df['phrase_lemmatisée'] = csv_df['Verbatim'].apply(lemmatize)
Так что любая помощь будет отличной !!!