Как выполнить итерацию индекса строки и найти группу с максимальным определителем из набора данных в python? - PullRequest
1 голос
/ 28 сентября 2019

Я хочу получить 7 значений индекса из моего набора данных, чья группа даст мне максимальный определитель | Matrix.T * Matrix |которая должна быть матрицей 6 на 6 (так как мой номер столбца равен 6).

Набор данных:

Serial_no,A,B,C,D,E,F
1,0.379,-0.588,-1.69,-0.0135,0.083,-0.0297
2,-0.144,0.278,0.354,-0.000672,-0.0228,0.014
3,0.295,-0.157,-1.63,-0.00451,0.0778,-0.00969
4,0.371,-0.623,-4.98,-0.000253,0.0872,-0.0109
5,0.369,-3.11,-8.3,-0.0000105,0.0871,-0.0327
6,0.369,-0.899,-7.19,-0.0000177,0.0872,-0.0109
7,0.383,-1.04,-2.76,-0.00418,0.089,-0.033
8,0.369,-1.04,-8.3,-0.00000263,0.0871,-0.0109
9,-0.124,0.421,0.679,0.00246,-0.0216,0.0133
10,0.37,2.15,-17.1,0.000244,0.0871,0.0109
11,0.369,5.61,-14.9,0.0000352,0.0872,0.0327
12,0.369,1.45,-11.6,-0.000000963,0.0872,0.0109
13,0.369,3.53,-9.41,-0.00000186,0.0872,0.0327
14,0.369,6.44,-17.2,0.000513,0.0872,0.0327
15,-0.11,-2.57,4.11,-0.000127,-0.0209,-0.0131
16,-0.11,-2.76,4.43,-0.000606,-0.0211,-0.0132
17,0.37,0.761,-6.09,0.0000571,0.0871,0.0109
18,0.3678,1.45,-3.88,0.00209,0.0865,0.0325
19,0.381,-2.46,-19.4,-0.00274,0.0874,-0.0111
20,0.369,4.36,-11.6,-0.000003,0.0872,0.0327
21,-0.111,-1.74,2.79,0.000000903,-0.0209,-0.0131
22,-0.111,-1.91,3.05,-0.000000953,-0.0209,-0.0131
23,0.368,2.28,-6.09,0.000164,0.0871,0.0327
24,-0.11,-0.913,1.46,-0.0000412,-0.0209,-0.0131
25,-0.111,-1.08,1.73,-0.0000101,-0.0209,-0.0131
26,-0.144,-0.278,0.354,0.000672,-0.0228,-0.014
27,0.344,-0.344,-2.76,-0.00202,0.0877,-0.0107
28,0.369,3.11,-8.3,0.0000105,0.0871,0.0327
29,0.383,1.04,-2.76,0.00418,0.089,0.033
30,-0.124,-0.421,0.679,-0.00246,-0.0216,-0.0133
import pandas as pd
import numpy as np

#importing t dataset with pandas
df=pd.read_csv('Dataset.csv')
df = df.set_index('Serial_no')
X=df.iloc[:,:]

Я пробовал это, но не получил значения индекса строкиматрицы

N = 7
def my_det(df,i):
    mat = df.iloc[i:i+N]
    return np.linalg.det(mat.T @ mat)

all_det = [my_det(df,i) for i in range(len(df)-N)]

print(np.argmax(all_det))
# 7

print(np.max(all_det))

набор данных:

enter image description here

1 Ответ

1 голос
/ 29 сентября 2019

Я не мог устоять.Вот решение с использованием формул Лапласа и Коши-Бине.И грубой силы один для проверки.

Пример прогона:

 [55.67740601  0.61391687]

Этот вывод показывает, что в вашем примере 30x6 грубая сила и умные решения дали тот же ответ, и что грубая сила взяла55,7 секунды, в то время как для умного потребовалось 0,6 секунды.

Код:

import numpy as np
import pandas as pd
import itertools as it
import io

def brute_force(A):
    return max(map(list,it.combinations(range(len(A)),7)),
               key=lambda s: np.linalg.det(A[s].T@A[s]))

def smart(A):
    N = len(A)
    # binomial coefficients
    bcs = np.zeros((7,N+1),int)
    # determinants for jxj minors j=1..6
    dets = [A[:,0]]
    bcs[0] = np.arange(N+1)
    # subsets of size j for j=1..7
    combs = [bcs[None,0]]
    for j in range(1,7):
        # extend bcs, combs
        bcs[j] = bcs[j-1].cumsum()
        combs.append(np.zeros((j+1,bcs[j,-j-1]),int))
        combs[-1][0,bcs[j,1:-j-1]] = 1
        combs[-1][0] = combs[-1][0].cumsum()
        for i in range(N-j):
            combs[-1][1:,bcs[j,i]:bcs[j,i+1]] = combs[-2][:,:bcs[j-1,i+1]]
        # use Laplace's formula to compute j+1xj+1 minors
        # the tricky bit is getting the indexing right...
        idxs = np.zeros((j+1,bcs[j,-j-1]),int)
        idxs[1:] = bcs[np.arange(j)[::-1,None],combs[-1][:-1]+1].cumsum(0)
        idxs[-2::-1] += bcs[np.arange(j)[:,None],combs[-1][-1:0:-1]].cumsum(0)
        if j<6:
            dets.append(np.einsum('ji,ji->i',(A[combs[-1]+np.arange(j,-1,-1)[:,None],j])*(-1)**np.arange(j+1)[:,None],dets[-1][idxs]))
        else:
            # use Cauchy-Binet to calculate the M^T M determinants
            d62 = np.square(dets[-1])
            d7 = d62[idxs].sum(0)
            # find index of max, translate back to subset
            return combs[-1][::-1,d7.argmax()]+np.arange(7)

a = """
Serial_no,A,B,C,D,E,F
1,0.379,-0.588,-1.69,-0.0135,0.083,-0.0297
2,-0.144,0.278,0.354,-0.000672,-0.0228,0.014
3,0.295,-0.157,-1.63,-0.00451,0.0778,-0.00969
4,0.371,-0.623,-4.98,-0.000253,0.0872,-0.0109
5,0.369,-3.11,-8.3,-0.0000105,0.0871,-0.0327
6,0.369,-0.899,-7.19,-0.0000177,0.0872,-0.0109
7,0.383,-1.04,-2.76,-0.00418,0.089,-0.033
8,0.369,-1.04,-8.3,-0.00000263,0.0871,-0.0109
9,-0.124,0.421,0.679,0.00246,-0.0216,0.0133
10,0.37,2.15,-17.1,0.000244,0.0871,0.0109
11,0.369,5.61,-14.9,0.0000352,0.0872,0.0327
12,0.369,1.45,-11.6,-0.000000963,0.0872,0.0109
13,0.369,3.53,-9.41,-0.00000186,0.0872,0.0327
14,0.369,6.44,-17.2,0.000513,0.0872,0.0327
15,-0.11,-2.57,4.11,-0.000127,-0.0209,-0.0131
16,-0.11,-2.76,4.43,-0.000606,-0.0211,-0.0132
17,0.37,0.761,-6.09,0.0000571,0.0871,0.0109
18,0.3678,1.45,-3.88,0.00209,0.0865,0.0325
19,0.381,-2.46,-19.4,-0.00274,0.0874,-0.0111
20,0.369,4.36,-11.6,-0.000003,0.0872,0.0327
21,-0.111,-1.74,2.79,0.000000903,-0.0209,-0.0131
22,-0.111,-1.91,3.05,-0.000000953,-0.0209,-0.0131
23,0.368,2.28,-6.09,0.000164,0.0871,0.0327
24,-0.11,-0.913,1.46,-0.0000412,-0.0209,-0.0131
25,-0.111,-1.08,1.73,-0.0000101,-0.0209,-0.0131
26,-0.144,-0.278,0.354,0.000672,-0.0228,-0.014
27,0.344,-0.344,-2.76,-0.00202,0.0877,-0.0107
28,0.369,3.11,-8.3,0.0000105,0.0871,0.0327
29,0.383,1.04,-2.76,0.00418,0.089,0.033
30,-0.124,-0.421,0.679,-0.00246,-0.0216,-0.0133
"""

df = pd.read_csv(io.StringIO(a),index_col=0)

from time import perf_counter as pc
T = []
T.append(pc())
xb = brute_force(df.to_numpy())
T.append(pc())
xs = smart(df.to_numpy())
T.append(pc())

assert (xs==xb).all()
print(np.diff(T))
...