Если я вас правильно понял, вам нужно построить новый MultiIndex
из Dates
индекса и mat
столбца и получить значения curve
для этого индекса.
import pandas as pd
import numpy as np
np.random.seed(12)
idx1 = pd.MultiIndex.from_product(
[["2017-05-02", "2017-05-03", "2017-05-04"], ["id1", "id2", "id3"]],
names=["Dates", "id"],
)
panel1 = pd.DataFrame(
np.random.randint(3, size=(9, 2)), index=idx1, columns=["ytm", "mat"]
)
idx2 = pd.MultiIndex.from_product(
[["2017-05-02", "2017-05-03", "2017-05-04"], ["0", "1", "2"]], names=["Dates", "yr"]
)
panel2 = pd.DataFrame(np.random.randint(3, size=9), index=idx2, columns=["curve"])
print(panel1)
# ytm mat
# Dates id
# 2017-05-02 id1 2 1
# id2 1 2
# id3 0 0
# 2017-05-03 id1 2 1
# id2 0 1
# id3 1 1
# 2017-05-04 id1 2 2
# id2 2 0
# id3 1 0
print(panel2)
# curve
# Dates yr
# 2017-05-02 0 0
# 1 1
# 2 2
# 2017-05-03 0 1
# 1 2
# 2 0
# 2017-05-04 0 1
# 1 2
# 2 0
panel1["CDM"] = panel2.loc[
pd.MultiIndex.from_arrays(
[panel1.index.get_level_values(0), panel1.mat.astype(str).rename("yr")]
)
].to_numpy()
print(panel1)
# ytm mat CDM
# Dates id
# 2017-05-02 id1 2 1 1
# id2 1 2 2
# id3 0 0 0
# 2017-05-03 id1 2 1 2
# id2 0 1 2
# id3 1 1 2
# 2017-05-04 id1 2 2 0
# id2 2 0 1
# id3 1 0 1
EDIT
Для сравнения mat
и yr
как с плавающей точкой и использования .reindex
вместо .loc
.
import pandas as pd
import numpy as np
np.random.seed(12)
idx1 = pd.MultiIndex.from_product(
[["2017-05-02", "2017-05-03", "2017-05-04"], ["id1", "id2", "id3"]],
names=["Dates", "id"],
)
panel1 = pd.DataFrame(
np.random.randint(3, size=(9, 2)), index=idx1, columns=["ytm", "mat"]
)
panel1.iloc[0, 1] = np.nan
idx2 = pd.MultiIndex.from_product(
[["2017-05-02", "2017-05-03", "2017-05-04"], ["0", "1", "2"]], names=["Dates", "yr"]
)
panel2 = pd.DataFrame(np.random.randint(3, size=9), index=idx2, columns=["curve"])
panel2 = panel2.rename(float, level=1)
print(panel1)
# ytm mat
# Dates id
# 2017-05-02 id1 2 NaN
# id2 1 2.0
# id3 0 0.0
# 2017-05-03 id1 2 1.0
# id2 0 1.0
# id3 1 1.0
# 2017-05-04 id1 2 2.0
# id2 2 0.0
# id3 1 0.0
print(panel2)
# curve
# Dates yr
# 2017-05-02 0.0 0
# 1.0 1
# 2.0 2
# 2017-05-03 0.0 1
# 1.0 2
# 2.0 0
# 2017-05-04 0.0 1
# 1.0 2
# 2.0 0
panel1["CDM"] = panel2.reindex(
pd.MultiIndex.from_arrays(
[panel1.index.get_level_values(0), panel1.mat.rename("yr")]
)
).to_numpy()
print(panel1)
# ytm mat CDM
# Dates id
# 2017-05-02 id1 2 NaN NaN
# id2 1 2.0 2.0
# id3 0 0.0 0.0
# 2017-05-03 id1 2 1.0 2.0
# id2 0 1.0 2.0
# id3 1 1.0 2.0
# 2017-05-04 id1 2 2.0 0.0
# id2 2 0.0 1.0
# id3 1 0.0 1.0