Я не могу исправить проблему с модулем DateFinder.Вы заявили, что вам нужно решение, поэтому я собрал это для вас. Это работа в процессе , что означает, что вы можете настроить его по мере необходимости.Кроме того, некоторые из регулярных выражений могли бы быть объединены, но я хотел предложить их вам.Надеюсь, этот ответ поможет вам, пока вы не найдете другое решение, которое лучше работает для ваших нужд.
import re
string_with_dates = 'The stock has a 04/30/2009 great record of positive Sept 1st, 2005 earnings surprises having beaten the trade Consensus EPS estimate in each of the last ' \
'four quarters In its last earnings report on March 8, 2018, Triple-S Management reported EPS of $0.6 vs.the trade Consensus of $0.24 while it beat the ' \
'consensus revenue estimate by 4.93%. The next trading day will occur at 2019-02-15T12:00:00-06:30'
def find_dates(input):
'''
This function is used to extract date strings from provide text.
Symbol references:
YYYY = four-digit year
MM = two-digit month (01=January, etc.)
DD = two-digit day of month (01 through 31)
hh = two digits of hour (00 through 23) (am/pm NOT allowed)
mm = two digits of minute (00 through 59)
ss = two digits of second (00 through 59)
s = one or more digits representing a decimal fraction of a second
TZD = time zone designator (Z or +hh:mm or -hh:mm)
:param input: text
:return: date string
'''
date_formats = [
# Matches date format MM/DD/YYYY
'(\d{2}\/\d{2}\/\d{4})',
# Matches date format MM-DD-YYYY
'(\d{2}-\d{2}-\d{4})',
# Matches date format YYYY/MM/DD
'(\d{4}\/\d{1,2}\/\d{1,2})',
# Matches ISO 8601 format (YYYY-MM-DD)
'(\d{4}-\d{1,2}-\d{1,2})',
# Matches ISO 8601 format YYYYMMDD
'(\d{4}\d{2}\d{2})',
# Matches full_month_name dd, YYYY or full_month_name dd[suffixes], YYYY
'(January|February|March|April|May|June|July|August|September|October|November|December)(\s\d{1,2}\W\s\d{4}|\s\d(st|nd|rd|th)\W\s\d{4})',
# Matches abbreviated_month_name dd, YYYY or abbreviated_month_name dd[suffixes], YYYY
'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Oct|Nov|Dec)(\s\d{1,2}\W\s\d{4}|\s\d(st|nd|rd|th)\W\s\d{4})',
# Matches ISO 8601 format with time and time zone
# yyyy-mm-ddThh:mm:ss.nnnnnn+|-hh:mm
'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\+|-)\d{2}:\d{2}',
# Matches ISO 8601 format Datetime with timezone
# yyyymmddThhmmssZ
'\d{8}T\d{6}Z',
# Matches ISO 8601 format Datetime with timezone
# yyyymmddThhmmss+|-hhmm
'\d{8}T\d{6}(\+|-)\d{4}'
]
for item in date_formats:
date_format = re.compile(r'\b{}\b'.format(item), re.IGNORECASE|re.MULTILINE)
find_date = re.search(date_format, input)
if find_date:
print (find_date.group(0))
find_dates(string_with_dates)
# outputs
04/30/2009
March 8, 2018
Sept 1st, 2005
2019-02-15T12:00:00-06:30