import pandas as pd
import numpy as np # for test data
import random # for test data
# test data
np.random.seed(365)
random.seed(365)
rows = 40
data = {'device': [random.choice(['846ee', '0a26e', '8a906', '6bf11', 'd3923']) for _ in range(rows)],
'datetime': pd.bdate_range(datetime(2020, 7, 1), freq='15min', periods=rows).tolist()}
# create test dataframe
df = pd.DataFrame(data)
# this date column is already in a datetime format; for the real dataframe, make sure it's converted
# df.datetime = pd.to_datetime(df.datetime)
# this extracts the time component from the datetime and is a datetime.time object
df['time'] = df['datetime'].dt.floor('T').dt.time
# this creates a timedelta column; note it's format
df['timedelta'] = pd.to_timedelta(df['time'].astype(str))
# this creates a float representing the hour and its fractional component (minutes)
df['hours'] = pd.to_timedelta(df['time'].astype(str)) / pd.Timedelta(hours=1)
# extracts just the hour
df['hour'] = df['datetime'].dt.hour
display (df.head ())
- Этот вид должен прояснить разницу между методами извлечения времени.
device datetime time timedelta hours hour
0 8a906 2020-07-01 00:00:00 00:00:00 0 days 00:00:00 0.00 0
1 0a26e 2020-07-01 00:15:00 00:15:00 0 days 00:15:00 0.25 0
2 8a906 2020-07-01 00:30:00 00:30:00 0 days 00:30:00 0.50 0
3 d3923 2020-07-01 00:45:00 00:45:00 0 days 00:45:00 0.75 0
4 0a26e 2020-07-01 01:00:00 01:00:00 0 days 01:00:00 1.00 1
5 d3923 2020-07-01 01:15:00 01:15:00 0 days 01:15:00 1.25 1
6 6bf11 2020-07-01 01:30:00 01:30:00 0 days 01:30:00 1.50 1
7 d3923 2020-07-01 01:45:00 01:45:00 0 days 01:45:00 1.75 1
8 6bf11 2020-07-01 02:00:00 02:00:00 0 days 02:00:00 2.00 2
9 d3923 2020-07-01 02:15:00 02:15:00 0 days 02:15:00 2.25 2
10 0a26e 2020-07-01 02:30:00 02:30:00 0 days 02:30:00 2.50 2
11 846ee 2020-07-01 02:45:00 02:45:00 0 days 02:45:00 2.75 2
12 0a26e 2020-07-01 03:00:00 03:00:00 0 days 03:00:00 3.00 3
13 846ee 2020-07-01 03:15:00 03:15:00 0 days 03:15:00 3.25 3
14 846ee 2020-07-01 03:30:00 03:30:00 0 days 03:30:00 3.50 3
Графическое устройство подсчитывает для каждого часа с seaborn.countplot
plt.figure(figsize=(8, 6))
sns.countplot(x='hour', hue='device', data=df)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

- Используйте
seaborn.FacetGrid
- Это будет укажите почасовое распределение для каждого устройства.
import seaborn as sns
import matplotlib.pyplot as plt
g = sns.FacetGrid(df, row='device', height=5)
g.map(sns.distplot, 'hours', bins=24, kde=True)
g.set(xlim=(0, 24), xticks=range(0, 25, 1))
введите описание изображения здесь