Ваш код не работает, потому что количество столбцов df1
и df2
отличается.
from io import StringIO
import pandas as pd
x1 = """No.,col1,col2,col3,Type,Oth
123,2,5,2,MN,...
453,4,3,1,MN,...
146,7,9,4,AA,...
175,2,4,3,MN,...
643,0,0,0,NAN,...
"""
x2 = """No.,col1,col2,col3,Type
123,24,57,22,MN
453,41,39,15,MN
175,21,43,37,MN
"""
df1 = pd.read_csv(StringIO(x1), sep=",")
df2 = pd.read_csv(StringIO(x2), sep=",")
df1.loc[df1.Type == 'MN', ["No.","col1","col2","col3","Type"]] = df2.values
# Output:
# >>> print(df1)
# No. col1 col2 col3 Type Oth
# 0 123 24 57 22 MN ...
# 1 453 41 39 15 MN ...
# 2 146 7 9 4 AA ...
# 3 175 21 43 37 MN ...
# 4 643 0 0 0 NAN ...
Но существует проблема, если порядок столбцов df1
и df2
различаются.
df1 = pd.read_csv(StringIO(x1), sep=",")
df3 = df2.copy()[["No.","Type","col1","col2","col3"]]
df1.loc[df1.Type == 'MN', ["No.","col1","col2","col3","Type"]] = df3.values
# Output:
# >>> print(df1)
# No. col1 col2 col3 Type Oth
# 0 123 MN 24 57 22 ...
# 1 453 MN 41 39 15 ...
# 2 146 7 9 4 AA ...
# 3 175 MN 21 43 37 ...
# 4 643 0 0 0 NAN ...
Чтобы избежать этого, вы можете попробовать
df1.loc[df1.Type == 'MN', ["No.","col1","col2","col3","Type"]] = (
df3[["No.","col1","col2","col3","Type"]].values)
# Output:
# >>> print(df1)
# No. col1 col2 col3 Type Oth
# 0 123 24 57 22 MN ...
# 1 453 41 39 15 MN ...
# 2 146 7 9 4 AA ...
# 3 175 21 43 37 MN ...
# 4 643 0 0 0 NAN ...
Тем не менее, существует проблема, если количество записей 'MN' отличается в df1
и df2
df1 = pd.read_csv(StringIO(x1), sep=",")
df4 = df2.copy().iloc[:2]
df1.loc[df1.Type == 'MN', ["No.","col1","col2","col3","Type"]] = (
df4[["No.","col1","col2","col3","Type"]].values)
# Error:
# ValueError: shape mismatch: value array of shape (2,) could not be broadcast to
# indexing result of shape (3,)
Итак, что вам нужно, может быть вот так
df = pd.merge(df1, df2, how='left', on=['No.', 'Type'])
df['col1'] = df.apply(lambda x: x.col1_y if x.Type == 'MN' else x.col1_x, axis=1)
df['col2'] = df.apply(lambda x: x.col2_y if x.Type == 'MN' else x.col2_x, axis=1)
df['col3'] = df.apply(lambda x: x.col3_y if x.Type == 'MN' else x.col3_x, axis=1)
df = df[["No.","col1","col2","col3","Type"]]
# Output:
#>>> print(df)
# No. col1 col2 col3 Type
#0 123 24.0 57.0 22.0 MN
#1 453 41.0 39.0 15.0 MN
#2 146 7.0 9.0 4.0 AA
#3 175 21.0 43.0 37.0 MN
#4 643 0.0 0.0 0.0 NAN