Идея - уровни соответствия a
и b
и повторение столбцов, второй уровень, используемый для DataFrame.reindex
:
np.random.seed(123)
col0 = ['Set 1','Set 1','Set 1','Set 1','Set 2','Set 2','Set 2','Set 2','Set 2','Set 2']
col1 = ['paa','paa','jaa','paa','jaa','jaa','jaa','paa','paa','paa']
a = pd.DataFrame(data = np.random.randint(6, size=(3, 10)), columns = [col0,col1])
col0 = ['Set 1','Set 1','Set 2','Set 2']
col1 = ['P1_1','P1_2','P2_1','P2_2']
b = pd.DataFrame(data = np.random.randint(3, size=(3, 4)), columns = [col0,col1])
print (a)
Set 1 Set 2
paa paa jaa paa jaa jaa jaa paa paa paa
0 5 2 4 2 1 3 2 3 1 1
1 0 1 1 0 0 1 3 5 4 0
2 0 4 1 3 2 4 2 4 0 5
print (b)
Set 1 Set 2
P1_1 P1_2 P2_1 P2_2
0 0 1 0 0
1 0 2 1 1
2 2 2 2 1
#https://stackoverflow.com/a/3391105
def repeat_to_length(s, wanted):
return (s * (wanted//len(s) + 1))[:wanted]
out = []
for lvl in a.columns.levels[0]:
colsa = a.xs(lvl, axis=1, level=0).columns.tolist()
colsb = b.xs(lvl, axis=1, level=0).columns.tolist()
lvl1 = repeat_to_length(colsb, len(colsa))
out.extend(list(zip([lvl] * len(lvl1), lvl1)))
print (out)
[('Set 1', 'P1_1'), ('Set 1', 'P1_2'), ('Set 1', 'P1_1'),
('Set 1', 'P1_2'), ('Set 2', 'P2_1'), ('Set 2', 'P2_2'),
('Set 2', 'P2_1'), ('Set 2', 'P2_2'), ('Set 2', 'P2_1'), ('Set 2', 'P2_2')]
mux = pd.MultiIndex.from_tuples(out)
print (mux)
MultiIndex([('Set 1', 'P1_1'),
('Set 1', 'P1_2'),
('Set 1', 'P1_1'),
('Set 1', 'P1_2'),
('Set 2', 'P2_1'),
('Set 2', 'P2_2'),
('Set 2', 'P2_1'),
('Set 2', 'P2_2'),
('Set 2', 'P2_1'),
('Set 2', 'P2_2')],
)
c = b.reindex(mux, axis=1)
print (c)
Set 1 Set 2
P1_1 P1_2 P1_1 P1_2 P2_1 P2_2 P2_1 P2_2 P2_1 P2_2
0 0 1 0 1 0 0 0 0 0 0
1 0 2 0 2 1 1 1 1 1 1
2 2 2 2 2 2 1 2 1 2 1