На ваш вопрос уже есть несколько ответов, поэтому я решил добавить еще один.Я недавно ответил на другой вопрос для аналогичной проблемы.Я расширил код этого ответа, чтобы поддержать ваш вопрос.Да, это долго, но код охватывает несколько форматов данных.Вы можете расширить его по мере необходимости или свернуть некоторые регулярные выражения в соответствии с вашими потребностями.
string_with_dates = '''On December 18th there will be an initial meeting for the codeathon that is scheduled for the 24th of April.
Our second meeting will be on Jan 31, 2019, the third on 28th Feb and the fourth on the 4 March.'''
def find_dates(input):
'''
This function is used to extract date strings from provide text.
Symbol references:
YYYY = four-digit year
MM = two-digit month (01=January, etc.)
DD = two-digit day of month (01 through 31)
hh = two digits of hour (00 through 23) (am/pm NOT allowed)
mm = two digits of minute (00 through 59)
ss = two digits of second (00 through 59)
s = one or more digits representing a decimal fraction of a second
TZD = time zone designator (Z or +hh:mm or -hh:mm)
:param input: text
:return: date string
'''
date_formats = [
# Matches date format MM/DD/YYYY
'(\d{2}\/\d{2}\/\d{4})',
# Matches date format MM-DD-YYYY
'(\d{2}-\d{2}-\d{4})',
# Matches date format YYYY/MM/DD
'(\d{4}\/\d{1,2}\/\d{1,2})',
# Matches ISO 8601 format (YYYY-MM-DD)
'(\d{4}-\d{1,2}-\d{1,2})',
# Matches ISO 8601 format YYYYMMDD
'(\d{4}\d{2}\d{2})',
# Matches full_month_name dd, YYYY or full_month_name dd[suffixes], YYYY
'(January|February|March|April|May|June|July|August|September|October|November|December)(\s\d{1,2}\W\s\d{4}|\s\d(st|nd|rd|th)\W\s\d{4})',
# Matches full_month_name and dd[suffixes]
'(January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2}(st|nd|rd|th)',
# Matches dd full_month_name
'\d{1,2}\s(January|February|March|April|May|June|July|August|September|October|November|December)',
# Matches dd[suffixes] of full_month_name, YYYY
'\d{1,2}(st|nd|rd|th)\sof\s(January|February|March|April|May|June|July|August|September|October|November|December),\s\d{4}',
# Matches dd[suffixes] of full_month_name
'\d{1,2}(st|nd|rd|th)\sof\s(January|February|March|April|May|June|July|August|September|October|November|December)',
# Matches dd abbreviated_month_name
'\d{1,2}\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Oct|Nov|Dec)',
# Matches dd[suffixes] abbreviated_month_name
'\d{1,2}(st|nd|rd|th)\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Oct|Nov|Dec)',
# Matches abbreviated_month_name dd, YYYY or abbreviated_month_name dd[suffixes], YYYY
'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Oct|Nov|Dec)(\s\d{1,2}\W\s\d{4}|\s\d(st|nd|rd|th)\W\s\d{4})',
# Matches abbreviated_month_name and dd[suffixes]
'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Oct|Nov|Dec)\s\d{1,2}(st|nd|rd|th)',
# Matches ISO 8601 format with time and time zone
# yyyy-mm-ddThh:mm:ss.nnnnnn+|-hh:mm
'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\+|-)\d{2}:\d{2}',
# Matches ISO 8601 format Datetime with timezone
# yyyymmddThhmmssZ
'\d{8}T\d{6}Z',
# Matches ISO 8601 format Datetime with timezone
# yyyymmddThhmmss+|-hhmm
'\d{8}T\d{6}(\+|-)\d{4}'
]
for item in date_formats:
date_format = re.compile(r'\b{}\b'.format(item), re.IGNORECASE|re.MULTILINE)
find_date = re.search(date_format, input)
if find_date:
print (find_date.group(0))
# outputs
December 18th
4 March
24th of April
28th Feb
Jan 31, 2019
find_dates(string_with_dates)