Chi Squared и уменьшенный Chi Squared = 0 с весами с использованием lmfit - PullRequest
0 голосов
/ 13 мая 2018

Я использую lmfit, чтобы соответствовать рамановским данным.Я использую 7 гауссовских пиков и линейный фон (5 из этих пиков известны, а два неизвестны). Я включил массив весов (обратный квадрат stdev).Когда я включаю веса - подгонка визуально выглядит намного лучше, а построенные остатки также имеют меньшую структуру, однако значения хи-квадрат и уменьшенные значения хи-квадрат сообщаются как 0. Я не уверен, что использую взвешиваниеоцените правильно, или, возможно, неправильно интерпретируете результаты.Я скопировал мой код ниже.Большое спасибо за любую помощь

x=[ 1151.41  1155.26  1159.11  1162.96  1166.8   1170.65  1174.5   1178.35
  1182.19  1186.03  1189.88  1193.72  1197.57  1201.41  1205.25  1209.08
  1212.92  1216.77  1220.6   1224.44  1228.27  1232.11  1235.94  1239.78
  1243.61  1247.44  1251.27  1255.1   1258.94  1262.77  1266.6   1270.42
  1274.25  1278.08  1281.91  1285.73  1289.55  1293.37  1297.2   1301.02
  1304.84  1308.66  1312.48  1316.3   1320.12  1323.94  1327.75  1331.57
  1335.39  1339.2   1343.01  1346.83  1350.64  1354.45  1358.26  1362.07
  1365.88  1369.69  1373.49  1377.31  1381.11  1384.92  1388.72  1392.52
  1396.33  1400.13  1403.93  1407.73  1411.53  1415.34  1419.13  1422.93
  1426.73  1430.52  1434.32  1438.12  1441.91  1445.7   1449.49  1453.29
  1457.08  1460.87  1464.66  1468.45  1472.24  1476.03  1479.81  1483.6
  1487.38  1491.17  1494.96  1498.74]


y=[  3.34835702   3.37753701   3.46421441   3.47561446   3.52906995
   3.7430628    3.9445303    4.08069468   4.26772632   4.57141717
   4.62653264   4.95543734   5.00090221   5.03820472   4.96421951
   4.94593035   4.71712245   4.73136333   4.5449431    4.54198878
   4.68326867   4.72099151   4.79832143   4.93313301   5.23041882
   5.67948661   6.14714543   6.650372     7.289351     7.85672892
   8.52692445   9.19116051  10.00674694  10.54570038  10.87682289
  11.35534123  11.42260359  11.19421866  10.80316156  10.50632887
   9.62797798   9.1298809    8.28793321   7.73275794   7.0211812
   6.81838292   6.39480371   6.08652915   5.72148564   5.51527137
   5.21654715   5.25030395   5.28647568   5.2812212    5.36064974
   5.41940211   5.10942821   5.10866681   5.03248779   4.87465847
   4.62568491   4.60337401   4.57096411   4.56132562   4.55625841
   4.81601962   4.91783646   5.40526849   5.7393016    6.44435143
   7.27609291   8.17028904   9.53128053  10.71589492  11.74175004
  12.82955492  13.1016751   13.1616784   12.64563727  11.91554063
  10.84031597   9.74302278   8.93611092   8.01037116   7.37684127
   7.08713295   6.77556246   6.8143279    6.85773728   6.87895641
   7.02718543   7.10869694]

weights=[ 0.01244515  0.0056474   0.00926648  0.00674491  0.00553186  0.00589553
  0.00576142  0.00527653  0.00599983  0.00787368  0.00708644  0.00357355
  0.00303342  0.00916585  0.00275235  0.00328612  0.00256573  0.00370105
  0.00428572  0.00332175  0.00352448  0.00506917  0.00357058  0.00351362
  0.0040452   0.00512055  0.00217692  0.00228909  0.00237594  0.00141826
  0.00278388  0.00107677  0.00139026  0.00171414  0.0015294   0.00124306
  0.00139236  0.00160101  0.00113102  0.00125891  0.00131655  0.00172122
  0.00261752  0.00199811  0.00259736  0.00315851  0.00434901  0.00293013
  0.00338841  0.00230605  0.00272563  0.00596109  0.00420324  0.00510059
  0.00326837  0.00373911  0.00418471  0.00413105  0.00581997  0.0047461
  0.00297461  0.00713362  0.00438222  0.00304455  0.00672188  0.00444613
  0.00375129  0.00312274  0.00429026  0.00340182  0.00453605  0.00175878
  0.0021822   0.0018159   0.0014724   0.00155655  0.00093935  0.00067046
  0.00074241  0.00097113  0.0008329   0.00107734  0.00133742  0.00119752
  0.00142113  0.00174815  0.00254059  0.0033123   0.0030637   0.00280913
  0.00314901  0.00259063]

import os
import matplotlib.pyplot as plt
import pandas as pd
from lmfit.models import LinearModel
from lmfit.models import GaussianModel
import numpy as np

plotme = True


# import calibrated file
filename = 'file.csv'
data = pd.read_csv('file.csv', sep=",", index_col=[0], header=0)
samplename = os.path.splitext(os.path.basename(filename))[0]

# section out the data to use
data = data.set_index('x_ref', drop=False)
fingerprint = data[data.index > 1150]
fingerprint = fingerprint[fingerprint.index < 1500]

# define the x, y, and weights
x = x
y = y
wv = weights

lin_mod1 = LinearModel(prefix='lin1_')
pars = lin_mod1.guess(y, x=x)

gauss1 = GaussianModel(prefix='one_')
pars.update(gauss1.make_params())

pars['one_center'].set(1200)
pars['one_sigma'].set(2)
pars['one_amplitude'].set(10, min=0)

gauss2 = GaussianModel(prefix='two_')
pars.update(gauss2.make_params())

pars['two_center'].set(1275)
pars['two_sigma'].set(2)
pars['two_amplitude'].set(10, min=0)

gauss3 = GaussianModel(prefix='three_')
pars.update(gauss3.make_params())

pars['three_center'].set(1450)
pars['three_sigma'].set(2)
pars['three_amplitude'].set(10, min=0)

gauss4 = GaussianModel(prefix='unknown_')
pars.update(gauss4.make_params())

pars['unknown_center'].set(1355)
pars['unknown_sigma'].set(2)
pars['unknown_amplitude'].set(10, min=0)

gauss5 = GaussianModel(prefix='unknown2_')
pars.update(gauss5.make_params())

pars['unknown2_center'].set(1510)
pars['unknown2_sigma'].set(2)
pars['unknown2_amplitude'].set(10, min=0)

gauss6 = GaussianModel(prefix='six_')
pars.update(gauss6.make_params())

pars['six_center'].set(1550)
pars['six_sigma'].set(2)
pars['six_amplitude'].set(10, min=0)

gauss7 = GaussianModel(prefix='seven_')
pars.update(gauss7.make_params())

pars['seven_center'].set(1600)
pars['seven_sigma'].set(2)
pars['seven_amplitude'].set(10, min=0)

mod = gauss1 + gauss2 + gauss3 + gauss4 + gauss5 + gauss6 + gauss7 + lin_mod1
init = mod.eval(pars, x=x)
out = mod.fit(y, pars, x=x, weights=wv)

print(out.fit_report())

if plotme:
    plt.subplot(2, 1, 1)
    plt.plot(x, y, 'bo')
    # plt.plot(x, init, 'k--')
    plt.plot(x, out.best_fit, 'r-')

    comps = out.eval_components(x=x)
    plt.plot(x, comps['one_'], '--')
    plt.plot(x, comps['two_'], '--')
    plt.plot(x, comps['three_'], '--')
    plt.plot(x, comps['unknown_'], '--')
    plt.plot(x, comps['six_'], '--')
    plt.plot(x, comps['unknown2_'], '--')
    plt.plot(x, comps['seven_'], '--')
    plt.plot(x, comps['lin1_'], '--')

    plt.subplot(2, 1, 2)
    plt.title('residuals')
    plt.plot(x, out.residual, '-')

    plt.subplots_adjust(top=0.95, bottom=0.10, left=0.10, right=0.95, hspace=0.50, wspace=0.35)

    plt.show() 

вывод, включая веса:

[[Model]]
    (((((((Model(gaussian, prefix='one_') + Model(gaussian, prefix='two_')) + Model(gaussian, prefix='three_')) + Model(gaussian, prefix='unknown_')) + Model(gaussian, prefix='unknown2_')) + Model(gaussian, prefix='six_')) + Model(gaussian, prefix='seven_')) + Model(linear, prefix='lin1_'))
[[Fit Statistics]]
    # function evals   = 366
    # data points      = 172
    # variables        = 23
    chi-square         = 0.000
    reduced chi-square = 0.000
    Akaike info crit   = -2481.840
    Bayesian info crit = -2409.448
[[Variables]]
    lin1_intercept:       7.87347446 +/- 0.202758 (2.58%) (init= 16.00284)
    lin1_slope:          -0.00399367 +/- 0.000115 (2.88%) (init=-0.006952215)
    one_sigma:            21.2666157 +/- 1.423474 (6.69%) (init= 2)
    one_center:           1204.02129 +/- 1.132164 (0.09%) (init= 1200)
    one_amplitude:        102.715569 +/- 9.153523 (8.91%) (init= 10)
    one_fwhm:             50.0790521 +/- 3.352026 (6.69%)  == '2.3548200*one_sigma'
    one_height:           1.92685033 +/- 0.073258 (3.80%)  == '0.3989423*one_amplitude/max(1.e-15, one_sigma)'
    two_sigma:            25.7977895 +/- 0.871817 (3.38%) (init= 2)
    two_center:           1287.20816 +/- 1.068676 (0.08%) (init= 1275)
    two_amplitude:        527.208091 +/- 24.04663 (4.56%) (init= 10)
    two_fwhm:             60.7491508 +/- 2.052973 (3.38%)  == '2.3548200*two_sigma'
    two_height:           8.15285388 +/- 0.212054 (2.60%)  == '0.3989423*two_amplitude/max(1.e-15, two_sigma)'
    three_sigma:          19.5097449 +/- 0.798387 (4.09%) (init= 2)
    three_center:         1444.94305 +/- 0.757566 (0.05%) (init= 1450)
    three_amplitude:      518.683054 +/- 24.82361 (4.79%) (init= 10)
    three_fwhm:           45.9419376 +/- 1.880058 (4.09%)  == '2.3548200*three_sigma'
    three_height:         10.6062181 +/- 0.249698 (2.35%)  == '0.3989423*three_amplitude/max(1.e-15, three_sigma)'
    unknown_sigma:        33.8695907 +/- 4.913019 (14.51%) (init= 2)
    unknown_center:       1365.89866 +/- 2.572737 (0.19%) (init= 1355)
    unknown_amplitude:    224.557732 +/- 33.37427 (14.86%) (init= 10)
    unknown_fwhm:         79.7567896 +/- 11.56927 (14.51%)  == '2.3548200*unknown_sigma'
    unknown_height:       2.64501508 +/- 0.077333 (2.92%)  == '0.3989423*unknown_amplitude/max(1.e-15, unknown_sigma)'
    unknown2_sigma:       16.8566531 +/- 1.909319 (11.33%) (init= 2)
    unknown2_center:      1497.93496 +/- 1.297153 (0.09%) (init= 1510)
    unknown2_amplitude:   189.598971 +/- 27.39291 (14.45%) (init= 10)
    unknown2_fwhm:        39.6943839 +/- 4.496103 (11.33%)  == '2.3548200*unknown2_sigma'
    unknown2_height:      4.48719263 +/- 0.196189 (4.37%)  == '0.3989423*unknown2_amplitude/max(1.e-15, unknown2_sigma)'
    six_sigma:            22.0949199 +/- 2.010271 (9.10%) (init= 2)
    six_center:           1552.18018 +/- 1.324973 (0.09%) (init= 1550)
    six_amplitude:        333.936881 +/- 54.65493 (16.37%) (init= 10)
    six_fwhm:             52.0295593 +/- 4.733827 (9.10%)  == '2.3548200*six_sigma'
    six_height:           6.02951031 +/- 0.537614 (8.92%)  == '0.3989423*six_amplitude/max(1.e-15, six_sigma)'
    seven_sigma:          41.8944271 +/- 1.648292 (3.93%) (init= 2)
    seven_center:         1603.04249 +/- 4.014783 (0.25%) (init= 1600)
    seven_amplitude:      547.973799 +/- 48.61137 (8.87%) (init= 10)
    seven_fwhm:           98.6538348 +/- 3.881433 (3.93%)  == '2.3548200*seven_sigma'
    seven_height:         5.21811474 +/- 0.274354 (5.26%)  == '0.3989423*seven_amplitude/max(1.e-15, seven_sigma)'
[[Correlations]] (unreported correlations are <  0.100)
    C(lin1_intercept, lin1_slope)  = -0.999 
    C(seven_center, seven_amplitude)  = -0.987 
    C(unknown_sigma, unknown_amplitude)  =  0.980 
    C(unknown2_sigma, unknown2_amplitude)  =  0.971 
    C(seven_sigma, seven_center)  = -0.966 
    C(seven_sigma, seven_amplitude)  =  0.953 
    C(six_amplitude, seven_amplitude)  = -0.946 
    C(six_amplitude, seven_center)  =  0.935 
    C(one_sigma, one_amplitude)  =  0.920 
    C(six_sigma, six_amplitude)  =  0.910 
    C(two_amplitude, unknown_sigma)  = -0.900 
    C(two_amplitude, unknown_amplitude)  = -0.896 
    C(two_center, unknown_amplitude)  = -0.886 
    C(two_center, unknown_sigma)  = -0.880 
    C(six_amplitude, seven_sigma)  = -0.877 
    C(three_sigma, three_amplitude)  =  0.871 
    C(two_center, two_amplitude)  =  0.858 
    C(lin1_intercept, one_amplitude)  = -0.844 
    C(lin1_slope, one_amplitude)  =  0.839 
    C(two_sigma, two_amplitude)  =  0.826 
    C(two_sigma, unknown_center)  =  0.820 
    C(two_sigma, unknown_amplitude)  = -0.813 
    C(two_amplitude, unknown_center)  =  0.805 
    C(two_center, unknown_center)  =  0.799 
    C(six_sigma, seven_amplitude)  = -0.784 
    C(two_sigma, unknown_sigma)  = -0.764 
    C(lin1_intercept, one_sigma)  = -0.758 
    C(six_sigma, seven_center)   =  0.754 
    C(lin1_slope, one_sigma)     =  0.754 
    C(three_amplitude, unknown2_amplitude)  = -0.748 
    C(two_sigma, two_center)     =  0.737 
    C(unknown_center, unknown_amplitude)  = -0.731 
    C(three_amplitude, unknown2_sigma)  = -0.717 
    C(three_center, unknown2_amplitude)  = -0.713 
    C(three_center, unknown2_sigma)  = -0.705 
    C(three_sigma, unknown2_amplitude)  = -0.696 
    C(unknown_sigma, unknown_center)  = -0.688 
    C(unknown2_amplitude, six_sigma)  = -0.682 
    C(unknown2_amplitude, six_center)  =  0.681 
    C(unknown2_sigma, six_center)  =  0.675 
    C(six_sigma, seven_sigma)    = -0.671 
    C(three_sigma, unknown_sigma)  = -0.662 
    C(three_amplitude, unknown_sigma)  = -0.660 
    C(unknown2_sigma, six_sigma)  = -0.653 
    C(three_sigma, unknown2_sigma)  = -0.643 
    C(three_sigma, unknown_amplitude)  = -0.640 
    C(three_amplitude, unknown_amplitude)  = -0.631 
    C(three_center, three_amplitude)  =  0.621 
    C(one_center, two_sigma)     = -0.620 
    C(three_sigma, three_center)  =  0.548 
    C(two_amplitude, three_amplitude)  =  0.542 
    C(unknown2_sigma, six_amplitude)  = -0.539 
    C(unknown2_amplitude, six_amplitude)  = -0.533 
    C(two_amplitude, three_sigma)  =  0.518 
    C(two_center, three_amplitude)  =  0.517 
    C(one_amplitude, two_sigma)  = -0.497 
    C(two_center, three_sigma)   =  0.494 
    C(one_sigma, two_sigma)      = -0.484 
    C(unknown2_center, six_sigma)  = -0.483 
    C(one_center, two_amplitude)  = -0.468 
    C(one_amplitude, unknown_amplitude)  =  0.442 
    C(one_sigma, one_center)     =  0.431 
    C(one_center, one_amplitude)  =  0.422 
    C(three_sigma, unknown2_center)  =  0.416 
    C(one_center, unknown_center)  = -0.414 
    C(one_sigma, unknown_amplitude)  =  0.408 
    C(two_sigma, three_amplitude)  =  0.406 
    C(three_center, six_center)  = -0.404 
    C(three_amplitude, six_center)  = -0.398 
    C(two_sigma, three_sigma)    =  0.383 
    C(unknown2_center, six_amplitude)  = -0.374 
    C(one_center, unknown_amplitude)  =  0.364 
    C(three_amplitude, unknown2_center)  =  0.358 
    C(three_center, six_sigma)   =  0.356 
    C(one_amplitude, unknown_center)  = -0.349 
    C(three_amplitude, six_sigma)  =  0.348 
    C(three_sigma, six_center)   = -0.340 
    C(one_center, unknown_sigma)  =  0.337 
    C(unknown_sigma, unknown2_amplitude)  =  0.333 
    C(unknown_amplitude, unknown2_amplitude)  =  0.332 
    C(one_sigma, unknown_center)  = -0.329 
    C(one_amplitude, unknown_sigma)  =  0.324 
    C(unknown2_sigma, seven_amplitude)  =  0.321 
    C(unknown2_amplitude, seven_amplitude)  =  0.309 
    C(three_center, six_amplitude)  =  0.301 
    C(one_sigma, unknown_sigma)  =  0.300 
    C(unknown2_sigma, seven_center)  = -0.297 
    C(three_amplitude, six_amplitude)  =  0.296 
    C(three_sigma, six_sigma)    =  0.294 
    C(unknown_sigma, unknown2_sigma)  =  0.291 
    C(unknown_amplitude, unknown2_sigma)  =  0.288 
    C(lin1_intercept, unknown_amplitude)  = -0.286 
    C(lin1_slope, unknown_amplitude)  =  0.284 
    C(unknown2_amplitude, seven_center)  = -0.278 
    C(three_center, unknown2_center)  =  0.277 
    C(one_sigma, two_amplitude)  = -0.276 
    C(one_amplitude, two_amplitude)  = -0.275 
    C(unknown2_center, seven_amplitude)  =  0.272 
    C(unknown2_center, six_center)  =  0.265 
    C(unknown2_center, seven_center)  = -0.254 
    C(three_sigma, six_amplitude)  =  0.252 
    C(unknown2_sigma, seven_sigma)  =  0.249 
    C(two_amplitude, unknown2_amplitude)  = -0.245 
    C(two_center, unknown2_amplitude)  = -0.238 
    C(unknown_sigma, unknown2_center)  = -0.237 
    C(three_amplitude, unknown_center)  =  0.237 
    C(unknown_amplitude, unknown2_center)  = -0.228 
    C(unknown2_amplitude, seven_sigma)  =  0.225 
    C(six_sigma, six_center)     = -0.220 
    C(two_amplitude, unknown2_sigma)  = -0.215 
    C(six_center, seven_center)  =  0.212 
    C(one_amplitude, seven_sigma)  =  0.211 
    C(one_center, two_center)    = -0.208 
    C(two_center, unknown2_sigma)  = -0.207 
    C(unknown2_center, seven_sigma)  =  0.207 
    C(lin1_intercept, seven_sigma)  = -0.205 
    C(six_center, seven_sigma)   = -0.205 
    C(six_center, seven_amplitude)  = -0.205 
    C(one_amplitude, two_center)  = -0.201 
    C(one_amplitude, seven_amplitude)  =  0.190 
    C(lin1_intercept, two_sigma)  =  0.189 
    C(one_sigma, seven_sigma)    =  0.188 
    C(lin1_slope, two_sigma)     = -0.188 
    C(two_sigma, unknown2_amplitude)  = -0.187 
    C(lin1_slope, seven_sigma)   =  0.186 
    C(lin1_intercept, seven_amplitude)  = -0.185 
    C(two_amplitude, unknown2_center)  =  0.183 
    C(two_center, unknown2_center)  =  0.173 
    C(one_center, three_amplitude)  = -0.173 
    C(one_sigma, seven_amplitude)  =  0.170 
    C(three_center, seven_amplitude)  = -0.170 
    C(lin1_slope, seven_amplitude)  =  0.169 
    C(one_sigma, two_center)     = -0.166 
    C(three_amplitude, seven_amplitude)  = -0.166 
    C(lin1_intercept, unknown_sigma)  = -0.164 
    C(lin1_slope, unknown_sigma)  =  0.163 
    C(three_sigma, unknown_center)  =  0.162 
    C(two_sigma, unknown2_sigma)  = -0.161 
    C(three_center, seven_center)  =  0.160 
    C(one_center, three_sigma)   = -0.157 
    C(three_amplitude, seven_center)  =  0.156 
    C(lin1_intercept, unknown_center)  =  0.146 
    C(lin1_slope, unknown_center)  = -0.146 
    C(one_amplitude, seven_center)  = -0.145 
    C(three_sigma, seven_amplitude)  = -0.145 
    C(unknown_amplitude, six_sigma)  = -0.141 
    C(unknown_amplitude, six_amplitude)  = -0.137 
    C(unknown_sigma, six_center)  =  0.136 
    C(lin1_intercept, seven_center)  =  0.135 
    C(three_center, unknown_center)  = -0.135 
    C(three_sigma, seven_center)  =  0.134 
    C(unknown_sigma, six_sigma)  = -0.133 
    C(three_center, seven_sigma)  = -0.132 
    C(two_sigma, unknown2_center)  =  0.132 
    C(three_amplitude, seven_sigma)  = -0.130 
    C(one_sigma, seven_center)   = -0.130 
    C(unknown_amplitude, six_center)  =  0.126 
    C(one_amplitude, three_sigma)  = -0.123 
    C(one_amplitude, six_amplitude)  = -0.122 
    C(unknown_sigma, six_amplitude)  = -0.122 
    C(lin1_slope, seven_center)  = -0.121 
    C(unknown_amplitude, seven_amplitude)  =  0.118 
    C(three_sigma, seven_sigma)  = -0.116 
    C(unknown_amplitude, seven_sigma)  =  0.114 
    C(one_sigma, three_sigma)    = -0.113 
    C(unknown2_center, unknown2_amplitude)  =  0.111 
    C(one_sigma, six_amplitude)  = -0.109 
    C(unknown2_sigma, unknown2_center)  =  0.109 
    C(one_amplitude, unknown2_amplitude)  =  0.107 
    C(two_amplitude, six_center)  = -0.107 
    C(lin1_intercept, six_amplitude)  =  0.105 
    C(lin1_intercept, two_center)  =  0.101 

components and residuals stacked with weights included in fit

1 Ответ

0 голосов
/ 13 мая 2018

Отчет о статистике хи-квадрат ограничен 3 цифрами в используемой вами версии.Я считаю, что это было исправлено в самой последней версии (0.9.9) и определенно исправлено в последней версии для разработчиков.Вы также можете распечатать значения с большей точностью:

print("chi-square = %14.8g, reduced chi-square = %14.8g" % (out.chisqr, out.redchi))

Но также: я думаю, что использование весов "обратного квадрата стандартного числа" может оказаться не тем, что вам нужно.Для ясности, веса являются мультипликативным коэффициентом, применяемым к (модель данных), и тогда хи-квадрат будет равен «Сумма [(модель данных) * веса] ** 2».Поэтому я думаю, что использование «weight = 1./(stddev of data)» (то есть не в квадрате) является наиболее распространенным подходом для данных, которые, как ожидается, будут нормально распределены, и ваши данные комбинационного рассеяния, вероятно, по крайней мере вощущение отсутствия доминирующего сигнала путем подсчета небольших чисел (где предпочтительна статистика Poission).

Надеюсь, это поможет.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...