Вы можете сделать что-то вроде этого:
import dateparser
import calendar
import re
month_full = [x.lower() for x in list(calendar.month_name)] # ['january', 'february',... ]
month_short = [x.lower() for x in list(calendar.month_abbr)] # ['jan', 'feb',... ]
def parseDate(date_str):
#see if month is written in the filename
for idx, x in enumerate(month_full):
if idx == 0: continue
if x in date_str:
date_str = date_str.lower().replace(x, str(idx).zfill(2) )
for idx, x in enumerate(month_short):
if idx == 0: continue
if x in date_str:
date_str = date_str.lower().replace(x, str(idx).zfill(2) )
date_str = date_str.replace('-', '').replace('.', '')
#Extract digits with len > 6 and < 9 in string
date_str= [x for x in re.findall(r'([0-9]{6,8})', date_str)][0]
#Possible dateformats
date_formats = {6:['%d%M%y','%m%d%y' ], 8:['%d%m%Y', '%m%d%Y']}
try:
#Filter out results where date was parsed with minutes
return list(set([dateparser.parse(date_str, date_formats=[x]) for x in date_formats[len(date_str)] if dateparser.parse(date_str, date_formats=[x]).minute == 0] ))
except:
return dateparser.parse(date_str)
dates = ["19022019_tasks.pdf","Meeting_Minutes 12-19-18.pdf", "tasks_for-August-22-2018-Special-Meeting","27092018_minutes.pdf","02AUG2018-Meeting.pdf","Minutes_meeting_08.02.2018.pdf","file020719.pdf"]
for x in dates:
print(x)
print('===>', parseDate(x))
print()
output:
19022019_tasks.pdf
===> [datetime.datetime(2019, 2, 19, 0, 0)]
Meeting_Minutes 12-19-18.pdf
===> [datetime.datetime(2018, 12, 19, 0, 0)]
tasks_for-August-22-2018-Special-Meeting
===> [datetime.datetime(2018, 8, 22, 0, 0)]
27092018_minutes.pdf
===> [datetime.datetime(2018, 9, 27, 0, 0)]
02AUG2018-Meeting.pdf
===> [datetime.datetime(2018, 2, 8, 0, 0), datetime.datetime(2018, 8, 2, 0, 0)]
Minutes_meeting_08.02.2018.pdf
===> [datetime.datetime(2018, 2, 8, 0, 0), datetime.datetime(2018, 8, 2, 0, 0)]
file020719.pdf
===> [datetime.datetime(2019, 2, 7, 0, 0)]
Обратите внимание, что в некоторых случаях вам придется решить, какой из них соответствует (например, 08.02.2018 год может быть 8 февраля или 2 августа)