Простая версия ниже.
Generi c Версия:
import numpy as np
import pandas as pd
df = pd.DataFrame(
[
[0.1, 0.2, 0.2, 0.3, 0.2, 0.2],
[0.1, 0.2, 0.2, 0.3, 0.2, 0.2],
[0.1, 0.2, 0.2, 0.2, 0.2, 0.2],
[0.3, 0.2, 0.2, 0.3, 0.2, 0.2],
[0.2, 0.2, 0.2, 0.1, 0.2, 0.2],
[0.3, 0.2, 0.2, 0.1, 0.2, 0.2],
[0.3, 0.2, 0.2, 0.1, 0.2, 0.2],
],
columns="A.x A.y A.z B.x B.y B.z".split(),
)
# A.x A.y A.z B.x B.y B.z
# 0 0.1 0.2 0.2 0.3 0.2 0.2
# 1 0.1 0.2 0.2 0.3 0.2 0.2
# 2 0.1 0.2 0.2 0.2 0.2 0.2
# 3 0.3 0.2 0.2 0.3 0.2 0.2
# 4 0.2 0.2 0.2 0.1 0.2 0.2
# 5 0.3 0.2 0.2 0.1 0.2 0.2
# 6 0.3 0.2 0.2 0.1 0.2 0.2
# identify repeating data
diff = (df.values[:-1] == df.values[1:])
# [[ True, True, True, True, True, True],
# [ True, True, True, False, True, True],
# [False, True, True, False, True, True],
# [False, True, True, False, True, True],
# [False, True, True, True, True, True],
# [ True, True, True, True, True, True]]
allfalse = np.full((1, diff.shape[1]), False)
# [[False, False, False, False, False, False]]
# add allfalse as first row
diff2 = np.concatenate((allfalse, diff), axis=0)
# grouped into 3s
grouped = diff2.reshape(diff2.shape[0], diff2.shape[1] // 3, 3)
# [[[False, False, False], [False, False, False]],
# [[ True, True, True], [ True, True, True]],
# [[ True, True, True], [False, True, True]],
# [[False, True, True], [False, True, True]],
# [[False, True, True], [False, True, True]],
# [[False, True, True], [ True, True, True]],
# [[ True, True, True], [ True, True, True]]]
# mask for triplets
mask = np.all(grouped, axis=2)
# [[False, False],
# [ True, True],
# [ True, False],
# [False, False],
# [False, False],
# [False, True],
# [ True, True]]
grouped[~mask] = False
# [[[False, False, False], [False, False, False]],
# [[ True, True, True], [ True, True, True]],
# [[ True, True, True], [False, False, False]],
# [[False, False, False], [False, False, False]],
# [[False, False, False], [False, False, False]],
# [[False, False, False], [ True, True, True]],
# [[ True, True, True], [ True, True, True]]]
# finally reshape back into original shape
repeated = grouped.reshape(diff2.shape[0], diff2.shape[1])
# [[False, False, False, False, False, False],
# [ True, True, True, True, True, True],
# [ True, True, True, False, False, False],
# [False, False, False, False, False, False],
# [False, False, False, False, False, False],
# [False, False, False, True, True, True],
# [ True, True, True, True, True, True]]
# set repeating values to NAN
df.values[repeated] = np.nan
# A.x A.y A.z B.x B.y B.z
# 0 0.1 0.2 0.2 0.3 0.2 0.2
# 1 NaN NaN NaN NaN NaN NaN
# 2 NaN NaN NaN 0.2 0.2 0.2
# 3 0.3 0.2 0.2 0.3 0.2 0.2
# 4 0.2 0.2 0.2 0.1 0.2 0.2
# 5 0.3 0.2 0.2 NaN NaN NaN
# 6 NaN NaN NaN NaN NaN NaN
Simple (r) Версия:
import numpy as np
import pandas as pd
df = pd.DataFrame(
[
[0.1, 0.2, 0.2],
[0.1, 0.2, 0.2],
[0.1, 0.2, 0.2],
[0.3, 0.2, 0.2],
[0.2, 0.2, 0.2],
[0.3, 0.2, 0.2],
[0.3, 0.2, 0.2],
],
columns="A.x A.y A.z".split(),
)
# A.x A.y A.z
# 0 0.1 0.2 0.2
# 1 0.1 0.2 0.2
# 2 0.1 0.2 0.2
# 3 0.3 0.2 0.2
# 4 0.2 0.2 0.2
# 5 0.3 0.2 0.2
# 6 0.3 0.2 0.2
# difference between consecutive values
diff = (df.values[:-1] == df.values[1:])
# [[ True, True, True],
# [ True, True, True],
# [False, True, True],
# [False, True, True],
# [False, True, True],
# [ True, True, True]]
# collapse rows into single value np.all(..., axis=1)
# make array len == number of rows in original DF
repeated = np.insert(np.all(diff, axis=1), 0, False)
# [False, True, True, False, False, False, True]
# modify df in-place
df.values[repeated] = [np.nan, np.nan, np.nan]
# A.x A.y A.z
# 0 0.1 0.2 0.2
# 1 NaN NaN NaN
# 2 NaN NaN NaN
# 3 0.3 0.2 0.2
# 4 0.2 0.2 0.2
# 5 0.3 0.2 0.2
# 6 NaN NaN NaN
Я уверен, что это может быть сделано красивее и эффективнее, но это шаг 2:)
Я подумаю о Bx .. C .x part ... опубликую обновление.
Наслаждайтесь!