Просто чтобы обновить эту ветку, если у кого-то еще есть подобная проблема. Я думаю, что после нескольких пересмотров кода я нашел правильное решение для этого.
def filterCoincidence(self, AT1, AT2, AT3, window = 0.05e-3):
'''
Filters the dataset with arbitrary different data rates on different channels to coincident samples.
The coincidence is checked with regard to a time window specified as argument.
- arguments:
- three times series AT1, AT2 and AT3 (arrival times of particles in my case)
- window size (50 microseconds as default setting)
- output: indices of combined samples
'''
start_time = datetime.datetime.now()
AT_list = [AT1, AT2, AT3]
# take the shortest period of time
min_EndArrival = np.max(AT_list)
max_BeginArrival = np.min(AT_list)
for i, col in enumerate(AT_list):
min_EndArrival = min(min_EndArrival, np.max(col))
max_BeginArrival = max(max_BeginArrival, np.min(col))
for i, col in enumerate(AT_list):
AT_list[i] = np.delete(AT_list[i], np.where((col < max_BeginArrival - window) | (col > min_EndArrival + window)))
# get channel with lowest datarate
num_points = np.zeros(len(AT_list))
datarate = np.zeros(len(AT_list))
for i, AT in enumerate(AT_list):
num_points[i] = AT.shape[0]
datarate[i] = num_points[i] / (AT[-1]-AT[0])
used_ref = np.argmin(datarate)
# process coincidence
AT_ref_val = AT_list[used_ref]
AT_list = list(np.delete(AT_list, used_ref))
overview = np.zeros( (AT_ref_val.shape[0], 3), dtype=int)
overview[:,0] = np.arange(AT_ref_val.shape[0], dtype=int)
borders = np.empty(2, dtype=object)
max_diff = np.zeros(2, dtype=int)
for i, AT in enumerate(AT_list):
neighbors_lower = np.searchsorted(AT, AT_ref_val - window, side='left')
neighbors_upper = np.searchsorted(AT, AT_ref_val + window, side='left')
borders[i] = np.transpose([neighbors_lower, neighbors_upper])
coinc_ix = np.where(np.diff(borders[i], axis=1).flatten() != 0)[0]
max_diff[i] = np.max(np.diff(borders[i], axis=1))
overview[coinc_ix, i+1] = 1
use_ix = np.where(~np.any(overview==0, axis=1))
borders[0] = borders[0][use_ix]
borders[1] = borders[1][use_ix]
overview = overview[use_ix]
# create all possible combinations refer to the reference
combinations = np.prod(max_diff)
test = np.empty((overview.shape[0]*combinations, 3), dtype=object)
for i, [ref_ix, at1, at2] in enumerate(zip(overview[:, 0], borders[0], borders[1])):
test[i * combinations:i * combinations + combinations, 0] = ref_ix
at1 = np.arange(at1[0], at1[1])
at2 = np.arange(at2[0], at2[1])
test[i*combinations:i*combinations+at1.shape[0]*at2.shape[0],1:] = np.asarray(list(itertools.product(at1, at2)))
test = test[~np.any(pd.isnull(test), axis=1)]
# check distances
ix_ref = test[:,0]
test = test[:,1:]
test = np.insert(test, used_ref, ix_ref, axis=1)
test = test.astype(int)
AT_list.insert(used_ref, AT_ref_val)
AT_mat = np.zeros(test.shape)
for i, AT in enumerate(AT_list):
AT_mat[:,i] = AT[test[:,i]]
distances = np.zeros( (test.shape[0], len(list(itertools.combinations(range(3), 2)))))
for i, AT in enumerate(itertools.combinations(range(3), 2)):
distances[:,i] = np.abs(AT_mat[:,AT[0]]-AT_mat[:,AT[1]])
ix = np.where(np.all(distances <= window, axis=1))[0]
test = test[ix,:]
distances = distances[ix,:]
# check duplicates
# use sum of differences as similarity factor
dist_sum = np.max(distances, axis=1)
unique_sorted = np.argsort([np.unique(test[:,i]).shape[0] for i in range(test.shape[1])])[::-1]
test = np.hstack([test, dist_sum.reshape(-1, 1)])
test = test[test[:,-1].argsort()]
for j in unique_sorted:
_, ix = np.unique(test[:,j], return_index=True)
test = test[ix, :]
test = test[:,:3]
test = test[test[:,used_ref].argsort()]
# check that all values are after each other
ix = np.where(np.any(np.diff(test, axis=0) > 0, axis=1))[0]
ix = np.append(ix, test.shape[0]-1)
test = test[ix,:]
print('{} coincident samples obtained in {}.'.format(test.shape[0], datetime.datetime.now()-start_time))
return test
Я уверен, что есть лучшее решение, но для меня оно работает сейчас. И я знаю, что имена переменных должны определенно выбираться с большей ясностью (например, test
), но я приведу в порядок свой код в конце своей магистерской диссертации ... возможно: -)