У меня есть фрейм данных, в котором есть столбцы следующих типов: object, float 64 и int64.Я хочу изменить эти типы на те, которые я могу использовать для визуализации данных.Я уже пробовал с помощью метода astype, но я получаю ошибку.Как мне поменять эти типы на более подходящие для визуализации данных.
#Import data into a dataframe
raw_data = pd.read_csv('FuelPrices2016 -2019 ulsp.csv')
raw_data.head()
#Import data into a dataframe
raw_data = pd.read_csv('FuelPrices2016 -2019 ulsp.csv')
raw_data.head()
Date Pump price in pence/litre ULSP Duty rate in pence/litre/ULSP VAT percentage rate Unnamed: 4
0 02/01/2012 132.40 57.95 20 NaN
1 09/01/2012 132.68 57.95 20 NaN
2 16/01/2012 133.29 57.95 20 NaN
3 23/01/2012 133.72 57.95 20 NaN
4 30/01/2012 134.10 57.95 20 NaN
#Drop unnamed column
raw_b = raw_data.drop(columns=['Unnamed: 4',])
raw_b
Date Pump price in pence/litre ULSP Duty rate in pence/litre/ULSP VAT percentage rate
0 02/01/2012 132.40 57.95 20
1 09/01/2012 132.68 57.95 20
2 16/01/2012 133.29 57.95 20
3 23/01/2012 133.72 57.95 20
4 30/01/2012 134.10 57.95 20
... ... ... ... ...
396 05/08/2019 128.37 57.95 20
397 12/08/2019 128.36 57.95 20
398 19/08/2019 128.17 57.95 20
399 26/08/2019 128.22 57.95 20
400 02/09/2019 127.86 57.95 20
401 rows × 4 columns
#Describe the data
raw_b.describe()
Pump price in pence/litre ULSP Duty rate in pence/litre/ULSP VAT percentage rate
count 401.000000 4.010000e+02 401.0
mean 123.043840 5.795000e+01 20.0
std 10.175522 7.114304e-15 0.0
min 101.360000 5.795000e+01 20.0
25% 115.600000 5.795000e+01 20.0
50% 123.270000 5.795000e+01 20.0
75% 130.830000 5.795000e+01 20.0
max 142.170000 5.795000e+01 20.0
#Check the types of the columns
raw_b.dtypes
Date object
Pump price in pence/litre ULSP float64
Duty rate in pence/litre/ULSP float64
VAT percentage rate int64
dtype: object
#Change date into a date
raw_b(pd.to_datetime(['Date']))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1978 try:
-> 1979 values, tz_parsed = conversion.datetime_to_datetime64(data)
1980 # If tzaware, these values represent unix timestamps, so we
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-22-5ab1ccfbf9a4> in <module>
1 #Change date into a date
----> 2 raw_b(pd.to_datetime(['Date']))
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
206 else:
207 kwargs[new_arg_name] = new_arg_value
--> 208 return func(*args, **kwargs)
209
210 return wrapper
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)
792 result = _convert_and_box_cache(arg, cache_array, box)
793 else:
--> 794 result = convert_listlike(arg, box, format)
795 else:
796 result = convert_listlike(np.array([arg]), box, format)[0]
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike_datetimes(arg, box, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
461 errors=errors,
462 require_iso8601=require_iso8601,
--> 463 allow_object=True,
464 )
465
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1982 return values.view("i8"), tz_parsed
1983 except (ValueError, TypeError):
-> 1984 raise e
1985
1986 if tz_parsed is not None:
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\pandas\core\arrays\datetimes.py in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object)
1973 dayfirst=dayfirst,
1974 yearfirst=yearfirst,
-> 1975 require_iso8601=require_iso8601,
1976 )
1977 except ValueError as e:
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime_object()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime_object()
pandas\_libs\tslibs\parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\dateutil\parser\_parser.py in parse(timestr, parserinfo, **kwargs)
1356 return parser(parserinfo).parse(timestr, **kwargs)
1357 else:
-> 1358 return DEFAULTPARSER.parse(timestr, **kwargs)
1359
1360
~\Anaconda3\envs\py3-TF2.0\lib\site-packages\dateutil\parser\_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
647
648 if res is None:
--> 649 raise ValueError("Unknown string format:", timestr)
650
651 if len(res) == 0:
ValueError: ('Unknown string format:', 'Date')