На вашем первом изображении сплайн является приближением ко всем вашим точкам данных. В вашем фрагменте spline
является атрибутом, установленным для вашей строки между вашим графическим представлением ваших точек данных. Это очень разные вещи. Чтобы выполнить то, что вы ищете, вы должны более внимательно изучить вклады пользователей np8
и Matthew Drury
на других SO-сообщений и github . Вам также следует внимательно посмотреть, как рассчитываются разные сплайны. Следующий график, где оценивается естественный кубический сплайн , получен с помощью примера кода с именем Snippet 2: The whole thing
ниже. Он довольно большой, но в основном это происходит из-за функции get_natural_cubic_spline_model
из Python естественного сглаживания сплайнов . Сюжетная часть просто следует этой логике:
Фрагмент 1: Фокусируется только на сюжетной части
# data points
points = go.Scatter(
x = x,
y = y,
mode = 'markers',
name = 'iris')
# spline
line = go.Scatter(
x = df_spline['x'],
y = df_spline['y_est'],
mode = 'lines',
name = 'spline')
# gather data
data=[points, line]
# build figure
fig=go.Figure(data)
# plot
fig.show()
Сюжет:
Фрагмент 2: Все это
# imports
import plotly.express as px
import plotly.graph_objs as go
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
# sample data set
iris = px.data.iris() # iris is a pandas DataFrame
x=iris['sepal_length']
y=iris['sepal_width']
# spline using function from https://stackoverflow.com/questions/51321100/python-natural-smoothing-splines
def get_natural_cubic_spline_model(x, y, minval=None, maxval=None, n_knots=None, knots=None):
"""
Get a natural cubic spline model for the data.
For the knots, give (a) `knots` (as an array) or (b) minval, maxval and n_knots.
If the knots are not directly specified, the resulting knots are equally
space within the *interior* of (max, min). That is, the endpoints are
*not* included as knots.
Parameters
----------
x: np.array of float
The input data
y: np.array of float
The outpur data
minval: float
Minimum of interval containing the knots.
maxval: float
Maximum of the interval containing the knots.
n_knots: positive integer
The number of knots to create.
knots: array or list of floats
The knots.
Returns
--------
model: a model object
The returned model will have following method:
- predict(x):
x is a numpy array. This will return the predicted y-values.
"""
if knots:
spline = NaturalCubicSpline(knots=knots)
else:
spline = NaturalCubicSpline(max=maxval, min=minval, n_knots=n_knots)
p = Pipeline([
('nat_cubic', spline),
('regression', LinearRegression(fit_intercept=True))
])
p.fit(x, y)
return p
class AbstractSpline(BaseEstimator, TransformerMixin):
"""Base class for all spline basis expansions."""
def __init__(self, max=None, min=None, n_knots=None, n_params=None, knots=None):
if knots is None:
if not n_knots:
n_knots = self._compute_n_knots(n_params)
knots = np.linspace(min, max, num=(n_knots + 2))[1:-1]
max, min = np.max(knots), np.min(knots)
self.knots = np.asarray(knots)
@property
def n_knots(self):
return len(self.knots)
def fit(self, *args, **kwargs):
return self
class NaturalCubicSpline(AbstractSpline):
"""Apply a natural cubic basis expansion to an array.
The features created with this basis expansion can be used to fit a
piecewise cubic function under the constraint that the fitted curve is
linear *outside* the range of the knots.. The fitted curve is continuously
differentiable to the second order at all of the knots.
This transformer can be created in two ways:
- By specifying the maximum, minimum, and number of knots.
- By specifying the cutpoints directly.
If the knots are not directly specified, the resulting knots are equally
space within the *interior* of (max, min). That is, the endpoints are
*not* included as knots.
Parameters
----------
min: float
Minimum of interval containing the knots.
max: float
Maximum of the interval containing the knots.
n_knots: positive integer
The number of knots to create.
knots: array or list of floats
The knots.
"""
def _compute_n_knots(self, n_params):
return n_params
@property
def n_params(self):
return self.n_knots - 1
def transform(self, X, **transform_params):
X_spl = self._transform_array(X)
if isinstance(X, pd.Series):
col_names = self._make_names(X)
X_spl = pd.DataFrame(X_spl, columns=col_names, index=X.index)
return X_spl
def _make_names(self, X):
first_name = "{}_spline_linear".format(X.name)
rest_names = ["{}_spline_{}".format(X.name, idx)
for idx in range(self.n_knots - 2)]
return [first_name] + rest_names
def _transform_array(self, X, **transform_params):
X = X.squeeze()
try:
X_spl = np.zeros((X.shape[0], self.n_knots - 1))
except IndexError: # For arrays with only one element
X_spl = np.zeros((1, self.n_knots - 1))
X_spl[:, 0] = X.squeeze()
def d(knot_idx, x):
def ppart(t): return np.maximum(0, t)
def cube(t): return t*t*t
numerator = (cube(ppart(x - self.knots[knot_idx]))
- cube(ppart(x - self.knots[self.n_knots - 1])))
denominator = self.knots[self.n_knots - 1] - self.knots[knot_idx]
return numerator / denominator
for i in range(0, self.n_knots - 2):
X_spl[:, i+1] = (d(i, X) - d(self.n_knots - 2, X)).squeeze()
return X_spl
# spline calculations
m1=get_natural_cubic_spline_model(x, y, minval=min(x), maxval=max(x), n_knots=6)
y_est_m1=m1.predict(x)
# gather results and sort them so that the line is not messed up
df_spline=pd.DataFrame({'x':x,
'y':y,
'y_est':m1.predict(x)})
df_spline=df_spline.sort_values(by=['x'])
### PLOTLY ###
# data source
points = go.Scatter(
x = x,
y = y,
mode = 'markers',
name = 'iris')
# spline
line = go.Scatter(
x = df_spline['x'],
y = df_spline['y_est'],
mode = 'lines',
name = 'spline')
# gather data
data=[points, line]
# build figure
fig=go.Figure(data)
# plot
fig.show()