Следующий подход состоит в том, чтобы найти режимы и их индексы (то есть значения x и y для построения точки и текста в). Он использует np.histogram
для получения количества распределений и делений бинов, соответствующих гистограмме.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def get_random_prices():
"""sample from three normal distributions"""
dist1 = np.random.normal(loc=5, scale=.3, size=1000)
dist2 = np.random.normal(loc=7, scale=.3, size=1000)
dist3 = np.random.normal(loc=11, scale=.3, size=1000)
min_price, max_price = 0, 20
all_dist = np.concatenate([dist1, dist2, dist3])
return all_dist[all_dist >= min_price][all_dist <= max_price]
def get_modes(counts, min_sep=.5):
# simple method to get mode values and locations (indexes)
# assume counts are already smoothed
# assume last mode is not past penultimate spot
modes = []
max_ct = 0
increasing = True
for i, count in enumerate(counts[1:]):
if count >= counts[i]:
max_ct = count
increasing = True
elif increasing:
modes.append((max_ct, i))
max_ct = 0
increasing = False
return modes
fig, ax = plt.subplots()
# create randomly generated data
df = pd.DataFrame({'price': get_random_prices()})
# get histogram data and display the same histogram
n_bins = 40
counts, divisions = np.histogram(df['price'], bins=n_bins)
df['price'].hist(bins=n_bins)
# find the peaks
modes = get_modes(counts)
# add the dots and labels for each peak, onto the histogram object
for mode, mode_index in modes:
ax.plot(divisions[mode_index], mode, 'ro')
# use offsets to make text appear away from the dots
# probably write logic to determine this instead of hard-coding
offset_x, offset_y = .1, 2
ax.text(divisions[mode_index] + offset_x, mode + offset_y, mode, color='r')
Следующее отображается с modes
, равным [(229, 5), (248, 15), (239, 35)]
, и np.__version__, pd.__version__
были ('1.16.2', '0.24.2')
: