Я хочу иметь набор функций, которые могут отображать гистограмму переменной в DataFrame для столбцов, которые могут соответствовать дням, месяцам или часам. Когда я пытаюсь сделать это, я получаю графики, в которых дни или месяцы перечислены в алфавитном порядке по горизонтальной оси, когда они должны быть перечислены по времени. Как это сделать?
Мой код MWE выглядит следующим образом и предназначен для ноутбука Jupyter:
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
plt.rcParams["figure.figsize"] = (17, 10)
df = pd.DataFrame(
[
["2017-01", 1],
["2017-01", 1],
["2017-01", 1],
["2017-02", 1],
["2017-03", 1],
["2017-03", 1],
["2017-03", 1],
["2017-03", 1],
["2017-04", 1],
["2017-05", 1],
["2017-05", 1],
["2017-05", 1],
["2017-05", 1],
["2017-05", 1],
["2017-06", 1],
["2017-06", 1],
["2017-06", 1],
["2017-06", 1],
["2017-06", 1],
["2017-06", 1],
["2017-06", 1],
["2017-06", 1],
["2017-06", 1],
["2017-07", 1],
["2017-07", 1],
["2017-08", 1],
["2017-08", 1],
["2017-08", 1],
["2017-08", 1],
["2017-08", 1],
["2017-08", 1],
["2017-09", 1],
["2017-09", 1],
["2017-09", 1],
["2017-09", 1],
["2017-09", 1],
["2017-10", 1],
["2017-10", 1],
["2017-10", 1],
["2017-11", 1],
["2017-11", 1],
["2017-11", 1],
["2017-11", 1],
["2017-11", 1],
["2017-12", 1]
],
columns = ["datetime", "score"]
)
df["datetime"] = pd.to_datetime(df["datetime"])
#df["hour"] = df["datetime"].dt.hour
#df["weekday_name"] = df["datetime"].dt.weekday_name
df["month_name"] = df["datetime"].dt.strftime("%B")
df.index = df["datetime"]
del df["datetime"]
df.head()
# ----------
def histogram_hour_counts(df, variable):
"""
Create a day-long histogram of counts of the variable for each hour. It is
assumed that the DataFrame index is datetime and that the variable
`hour` exists.
"""
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
log.error("index is not datetime")
return False
if not "hour" in df.columns:
log.error("field hour not found in DataFrame")
return False
counts = df.groupby(by = "hour")[variable].count()
counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)
def histogram_day_counts(df, variable):
"""
Create a week-long histogram of counts of the variable for each day. It is
assumed that the DataFrame index is datetime and that the variable
`weekday_name` exists.
"""
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
log.error("index is not datetime")
return False
if not "weekday_name" in df.columns:
log.error("field weekday_name not found in DataFrame")
return False
counts = df.groupby(by = "weekday_name")[variable].count()
counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)
def histogram_month_counts(df, variable):
"""
Create a year-long histogram of counts of the variable for each month. It is
assumed that the DataFrame index is datetime and that the variable
`month_name` exists.
"""
if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
log.error("index is not datetime")
return False
if not "month_name" in df.columns:
log.error("field month_name not found in DataFrame")
return False
counts = df.groupby(by = "month_name")[variable].count()
counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)
# ----------
histogram_month_counts(variable = "score", df = df)