Вот обходной путь, использующий библиотеку nltk
для идентификации существительных с использованием функции pos_tag
:
#Import nltk modules
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
text = "mr. john is living in canada"
#Define a function to extract nouns from the string
def ExtractNoun(sentence):
sentence = nltk.word_tokenize(sentence)
sentence = nltk.pos_tag(sentence)
return sentence
sent = ExtractNoun(text)
#This will return a tuple of tokens and tags
print(sent)
[('mr.', 'NN'), ('john', 'NN'), ('is', 'VBZ'), ('living', 'VBG'), ('in', 'IN'), ('canada', 'NN')]
#Create a list of nouns
nn = [i[0] for i in sent if i[1] == 'NN']
#Capitalize the nouns which are matching with the list
text_cap = " ".join([x.capitalize() if x in nn else x for x in text.split()])
print(text_cap)
'Mr. John is living in Canada'
Надеюсь, что это работает !!