Я пытаюсь токенизировать строку, используя шаблон, как показано ниже.
>>> splitter = re.compile(r'((\w*)(\d*)\-\s?(\w*)(\d*)|(?x)\$?\d+(\.\d+)?(\,\d+)?|([A-Z]\.)+|(Mr)\.|(Sen)\.|(Miss)\.|.$|\w+|[^\w\s])')
>>> splitter.split("Hello! Hi, I am debating this predicament called life. Can you help me?")
Я получаю следующий вывод.Может ли кто-нибудь указать, что мне нужно исправить, пожалуйста?Я в замешательстве по поводу всей связки "Нет".Также, если есть лучший способ токенизации строки, я бы очень признателен за дополнительную помощь.
['', 'Hello', None, None, None, None, None, None, None, None, None, None, '', '!', None, None, None, None, None, None, None, None, None, None, ' ', 'Hi', None,None, None, None, None, None, None, None, None, None, '', ',', None, None, None, None, None, None, None, None, None, None, ' ', 'I', None, None, None, None, None, None, None, None, None, None, ' ', 'am', None, None, None, None, None, None,None, None, None, None, ' ', 'debating', None, None, None, None, None, None, None, None, None, None, ' ', 'this', None, None, None, None, None, None, None, None, None, None, ' ', 'predicament', None, None, None, None, None, None, None, None, None, None, ' ', 'called', None, None, None, None, None, None, None, None, None, None, ' ', 'life', None, None, None, None, None, None, None, None, None, None, '', '.', None, None, None, None, None, None, None, None, None, None, ' ', 'Can', None, None, None, None, None, None, None, None, None, None, ' ', 'you', None, None, None, None, None, None, None, None, None, None, ' ', 'help', None, None,None, None, None, None, None, None, None, None, ' ', 'me', None, None, None, None, None, None, None, None, None, None, '', '?', None, None, None, None, None, None, None, None, None, None, '']
Вывод, который мне нужен: -
['Hello', '!', 'Hi', ',', 'I', 'am', 'debating', 'this', 'predicament', 'called', 'life', '.', 'Can', 'you', 'help', 'me', '?']
Спасибо.