python scipy griddata не выполняет линейную интерполяцию, как ожидалось - PullRequest
0 голосов
/ 05 мая 2020

Это мои данные:

date_num,expiry_num,strike,value,interp
731988,731988,0.02501,0.0095094,0.0095094
731988,731988,0.03001,0.0091658,0.0096807
731988,731988,0.03501,0.0089164,0.009852
731988,731988,0.03751,0.0088471,0.00993765
731988,731988,0.04001,0.0088244,0.0100233
731988,731988,0.04251,0.008853,0.01010895
731988,731988,0.04501,0.00898,0.0101946
731988,731988,0.04751,0.009066,0.01028025
731988,731988,0.05001,0.0092429,0.0103659
731988,731988,0.05251,0.009458,0.01045155
731988,731988,0.05501,0.0097043,0.0105372
731988,731988,0.06001,0.010264,0.0107085
731988,731988,0.06501,0.0108798,0.0108798
731988,732018,0.02501,0.0095094,0.0095094
731988,732018,0.03001,0.0091658,0.0096807
731988,732018,0.03501,0.0089164,0.009852
731988,732018,0.03751,0.0088471,0.00993765
731988,732018,0.04001,0.0088244,0.0100233
731988,732018,0.04251,0.008853,0.01010895
731988,732018,0.04501,0.00898,0.0101946
731988,732018,0.04751,0.009066,0.01028025
731988,732018,0.05001,0.0092429,0.0103659
731988,732018,0.05251,0.009458,0.01045155
731988,732018,0.05501,0.0097043,0.0105372
731988,732018,0.06001,0.010264,0.0107085
731988,732018,0.06501,0.0108798,0.0108798
731988,732079,0.02543,0.0094153,0.0094153
731988,732079,0.03043,0.0090666,0.009585463
731988,732079,0.03543,0.0088118,0.009755625
731988,732079,0.03793,0.0087399,0.009840706
731988,732079,0.04043,0.0087152,0.009925788
731988,732079,0.04293,0.0087425,0.010010869
731988,732079,0.04543,0.0088643,0.01009595
731988,732079,0.04793,0.0089551,0.010181031
731988,732079,0.05043,0.0091326,0.010266113
731988,732079,0.05293,0.0093489,0.010351194
731988,732079,0.05543,0.0095964,0.010436275
731988,732079,0.06043,0.0101587,0.010606438
731988,732079,0.06543,0.0107766,0.0107766
731988,732170,0.02597,0.0095394,0.0095394
731988,732170,0.03097,0.0091987,0.009711525
731988,732170,0.03597,0.0089515,0.00988365
731988,732170,0.03847,0.008883,0.009969713
731988,732170,0.04097,0.008861,0.010055775
731988,732170,0.04347,0.0088902,0.010141838
731988,732170,0.04597,0.0090131,0.0102279
731988,732170,0.04847,0.0091035,0.010313963
731988,732170,0.05097,0.0092803,0.010400025
731988,732170,0.05347,0.0094953,0.010486088
731988,732170,0.05597,0.0097414,0.01057215
731988,732170,0.06097,0.0103008,0.010744275
731988,732170,0.06597,0.0109164,0.0109164
731988,732353,0.04685,0.0091422,0.0091422

А вот мой скрипт:

import pandas as pd
from scipy.interpolate import griddata
df = pd.read_csv("base_data.csv")
df["interp"] = griddata(
    df[["expiry_num","strike"]].values, 
    df["value"].values,df[["expiry_num","strike"]].values, 
    method='linear')

import matplotlib.pyplot as plt
plt.scatter(df.loc[df["expiry_num"] == 732018,"strike"],df.loc[df["expiry_num"] == 732018,"value"])
plt.scatter(df.loc[df["expiry_num"] == 732018,"strike"],df.loc[df["expiry_num"] == 732018,"interp"])
plt.show()

Результат выглядит так:

введите описание изображения здесь

Почему данные сетки не выполняют интерполяцию?

1 Ответ

1 голос
/ 05 мая 2020
  • Эти данные кажутся одномерными, y=f(x), а не многомерными, z=f(x, y).
  • Точка интерполяции заключается в создании новой информации на основе существующей информации. Таким образом, длина интерполированных данных больше, чем существующих данных.
    • В этом примере num=41 означает, что 41 точка данных была создана с помощью интерполированной функции, которая использовала исходные 13 точек.
  • Кажется, что вы заинтересованы в группировке данных по expiry_num, хотя все данные в этом примере одинаковы
    • Я создам dict из dataframes с expiry_num как keys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d

# using your data, but not the one row of expiry_num = 732353
df_dict = {key: df[['strike', 'value']][df.expiry_num == key] for key in df.expiry_num.unique()}

# plot
plt.figure(figsize=(10, 10))

for i, (k, v) in enumerate(df_dict.items(), start=221):
    plt.subplot(i)

    # interpolate function
    f = interp1d(v.strike, v.value, kind='cubic')

    # create x-axis values, num can be as many points as you want
    xnew = (np.linspace(v.strike.min(), v.strike.max(), num=41, endpoint=True))

    # calculate y values
    ynew = f(xnew)

    # plot
    plt.plot(v.strike, v.value, 'o', xnew, ynew, '--')
    plt.legend(['data', 'cubic'], loc='best')
    plt.title(f'expiry_num: {k}')

enter image description here

Примечания к комментариям:

  • Следующая строка:
df["interp"] = griddata(df[["expiry_num","strike"]].values, df["value"].values, df[["expiry_num","strike"]].values, method='linear')
  • ... неверно, потому что df[["expiry_num","strike"]].values не предоставляет никаких новых значений для вычисления и, что более важно, функция интерполяции не зависит от expiry_num.
    • Например, если xnew = exp_732018.strike, то test = exp_732018.value
  • griddata по умолчанию interp1d для 1-го
from scipy.interpolate import griddata

exp_732018 = df[['strike', 'value']][df.expiry_num == 732018]

# 41 x-values to calculate
xnew = (np.linspace(exp_732018.strike.min(), exp_732018.strike.max(), num=41, endpoint=True))

# 41 new y-values
test = griddata(exp_732018.strike.values, exp_732018.value.values, xnew, method='linear')

# plot
plt.scatter(xnew, test, label='griddata')
plt.scatter(exp_732018.strike.values, exp_732018.value.values, label='existing data')
plt.legend()
plt.ylim(0.008, 0.012)
plt.show()

enter image description here

Используемые образцы данных

date_num,expiry_num,strike,value
731988,731988,0.02501,0.0095094
731988,731988,0.030010000000000002,0.009165799999999998
731988,731988,0.03501,0.0089164
731988,731988,0.03751,0.0088471
731988,731988,0.040010000000000004,0.0088244
731988,731988,0.04251,0.008853
731988,731988,0.04501,0.00898
731988,731988,0.047510000000000004,0.009066
731988,731988,0.05001,0.0092429
731988,731988,0.05251,0.009458
731988,731988,0.05501,0.009704299999999999
731988,731988,0.06001,0.010264
731988,731988,0.06501,0.010879799999999998
731988,732018,0.02501,0.0095094
731988,732018,0.030010000000000002,0.009165799999999998
731988,732018,0.03501,0.0089164
731988,732018,0.03751,0.0088471
731988,732018,0.040010000000000004,0.0088244
731988,732018,0.04251,0.008853
731988,732018,0.04501,0.00898
731988,732018,0.047510000000000004,0.009066
731988,732018,0.05001,0.0092429
731988,732018,0.05251,0.009458
731988,732018,0.05501,0.009704299999999999
731988,732018,0.06001,0.010264
731988,732018,0.06501,0.010879799999999998
731988,732079,0.02543,0.0094153
731988,732079,0.030430000000000002,0.0090666
731988,732079,0.03543,0.0088118
731988,732079,0.03793,0.0087399
731988,732079,0.04043,0.0087152
731988,732079,0.04293,0.0087425
731988,732079,0.04543,0.008864299999999999
731988,732079,0.04793,0.0089551
731988,732079,0.05043,0.009132600000000001
731988,732079,0.05293,0.009348899999999999
731988,732079,0.05542999999999999,0.0095964
731988,732079,0.06043,0.0101587
731988,732079,0.06543,0.0107766
731988,732170,0.02597,0.0095394
731988,732170,0.030969999999999998,0.0091987
731988,732170,0.03597,0.0089515
731988,732170,0.03847,0.008883
731988,732170,0.04097,0.008860999999999999
731988,732170,0.04347,0.008890200000000001
731988,732170,0.04597,0.0090131
731988,732170,0.04847,0.0091035
731988,732170,0.05097,0.0092803
731988,732170,0.053470000000000004,0.009495299999999998
731988,732170,0.055970000000000006,0.0097414
731988,732170,0.06097,0.010300799999999999
731988,732170,0.06597,0.0109164
...