Вы можете использовать:
#first flatten values pslitted by ,
s = (df2.set_index(['WR ID','K ID'])['Course Code']
.str.split(',\s+', expand=True)
.stack()
.reset_index(level=2, drop=True)
.rename('Course Code')
)
#print (s)
#aggregate list per Course Code
df2 = (df2.drop('Course Code', axis=1)
.join(s, on=['WR ID','K ID'])
.groupby('Course Code')
.agg(list)
.reset_index()
)
print (df2)
Course Code WR ID K ID
0 C001 [SP-RS-03] [K006]
1 C002 [SP-RS-01, SP-RS-01] [K001, K004]
2 C003 [SP-RS-03] [K004]
3 C004 [SP-RS-01, SP-RS-03] [K001, K004]
from collections import Counter
#combination flattening nested lists, Counter and new format with counts
f = lambda x: ', '.join(f'{k}x{v}' if v > 1 else k
for k, v in Counter([z for y in x for z in y]).items())
#merge together and aggregate again
df = (df1.merge(df2, on='Course Code', how='left')
.groupby('Student ID')
.agg({'Course Code':', '.join,
'WR ID':f,
'K ID':f})
.reset_index()
)
print (df)
Student ID Course Code WR ID K ID
0 1 C001, C002 SP-RS-03, SP-RS-01x2 K006, K001, K004
1 2 C003 SP-RS-03 K004
2 3 C002, C001, C004 SP-RS-01x3, SP-RS-03x2 K001x2, K004x2, K006
3 4 C004 SP-RS-01, SP-RS-03 K001, K004
РЕДАКТИРОВАТЬ:
Проблема в том, что некоторые пропущенные значения, решение отменить их в пустые списки:
from collections import Counter
#combination flattening nested lists, Counter and new format with counts
f = lambda x: ', '.join(f'{k}x{v}' if v > 1 else k
for k, v in Counter([z for y in x for z in y]).items())
#merge together and aggregate again
df = df1.merge(df2, on='Course Code', how='left')
df[['WR ID','K ID']] = df[['WR ID','K ID']].applymap(lambda x: x if x==x else [])
df = (df.groupby('Student ID')
.agg({'Course Code':', '.join,
'WR ID':f,
'K ID':f})
.reset_index()
)