У меня есть фрейм данных с датой, отформатированной как %Y-%m-%d %H:%M:%S
или как %Y-%m-%d
:
...
87986 1979-06-18 00:00:00
87987 1979-06-18 00:00:00
87988 1987-03-18
87989 1983-11-01
...
Я бы хотел отформатировать их все одинаково. Я попытался:
df['birthdate']=pd.to_datetime(df['birthdate'].astype(str), format='%Y-%m-%d')
Но вернулся:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1860 try:
-> 1861 values, tz_parsed = conversion.datetime_to_datetime64(data)
1862 # If tzaware, these values represent unix timestamps, so we
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-345-d5c1036edc26> in <module>
10 return born
11
---> 12 df['birthdate']=pd.to_datetime(df['birthdate'].astype(str), format='%Y-%m-%d')
13 df["age"] = df["birthdate"].apply(calculate_age)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)
590 else:
591 from pandas import Series
--> 592 values = convert_listlike(arg._values, True, format)
593 result = Series(values, index=arg.index, name=arg.name)
594 elif isinstance(arg, (ABCDataFrame, compat.MutableMapping)):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike_datetimes(arg, box, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
300 arg, dayfirst=dayfirst, yearfirst=yearfirst,
301 utc=utc, errors=errors, require_iso8601=require_iso8601,
--> 302 allow_object=True)
303
304 if tz_parsed is not None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1864 return values.view('i8'), tz_parsed
1865 except (ValueError, TypeError):
-> 1866 raise e
1867
1868 if tz_parsed is not None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1855 dayfirst=dayfirst,
1856 yearfirst=yearfirst,
-> 1857 require_iso8601=require_iso8601
1858 )
1859 except ValueError as e:
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
ValueError: time data None doesn't match format specified
Однако ни одна из моих строк не является None
.
Я тогда попробовал:
df['birthdate']=pd.to_datetime(df['birthdate']).dt.strftime('%Y-%m-%d')
Но получил:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1860 try:
-> 1861 values, tz_parsed = conversion.datetime_to_datetime64(data)
1862 # If tzaware, these values represent unix timestamps, so we
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
OutOfBoundsDatetime Traceback (most recent call last)
<ipython-input-350-b23ba7455ab9> in <module>
----> 1 df['birthdate']=pd.to_datetime(df['birthdate']).dt.strftime('%Y-%m-%d')
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)
590 else:
591 from pandas import Series
--> 592 values = convert_listlike(arg._values, True, format)
593 result = Series(values, index=arg.index, name=arg.name)
594 elif isinstance(arg, (ABCDataFrame, compat.MutableMapping)):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike_datetimes(arg, box, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
300 arg, dayfirst=dayfirst, yearfirst=yearfirst,
301 utc=utc, errors=errors, require_iso8601=require_iso8601,
--> 302 allow_object=True)
303
304 if tz_parsed is not None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1864 return values.view('i8'), tz_parsed
1865 except (ValueError, TypeError):
-> 1866 raise e
1867
1868 if tz_parsed is not None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1855 dayfirst=dayfirst,
1856 yearfirst=yearfirst,
-> 1857 require_iso8601=require_iso8601
1858 )
1859 except ValueError as e:
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas/_libs/tslibs/np_datetime.pyx in pandas._libs.tslibs.np_datetime.check_dts_bounds()
OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 7974-04-23 00:00:00
Приложение: calculate_age
функция
Вся причина этого форматирования состоит в том, чтобы иметь возможность рассчитать возраст / время, проведенное между датой рождения и сейчас , Я построил функцию:
from datetime import datetime, date
def calculate_age(born):
today = date.today
days_in_year = 365.2425
if born not in [None, 'NaT']:
age = int((date.today() - born.date()).days / days_in_year)
return age
else:
return born
df['birthdate']=pd.to_datetime(df['birthdate'], errors='coerce')
df["age"] = df["birthdate"].apply(calculate_age)