Используйте networkx
с connected_components
для словаря, а затем Series.map
для нового столбца:
import networkx as nx
# Create the graph from the dataframe
g = nx.Graph()
#replace - to df['No'] values
df['PrevNo'] = df['PrevNo'].mask(df['PrevNo'] == '-', df['No'])
# if - are missing values
#df['PrevNo'] = df['PrevNo'].fillna(df['No'])
g.add_edges_from(df[['No','PrevNo']].itertuples(index=False))
connected_components = nx.connected_components(g)
# Find the component id of the nodes
node2id = {}
for cid, component in enumerate(connected_components):
for node in component:
node2id[node] = cid + 1
df['UniqueID'] = df['No'].map(node2id)
print (df)
No IsRenew PrevNo UniqueID
0 IAB19 True IAB19 1
1 IAB25 False IAB25 2
2 IAB56 True IAB19 1
3 IAB22 True IAB56 1
4 IAB81 True IAB22 1
5 IAB82 True IAB82 3
6 IAB89 False IAB82 3
РЕДАКТИРОВАТЬ: Вторым редактированием вопроса- используйте defaultdict
для указания списков:
d = defaultdict(list)
for cid, component in enumerate(connected_components):
for node in component:
d[cid + 1].append(node)
print (dict(d))
{1: ['IAB56', 'IAB19', 'IAB22', 'IAB81'], 2: ['IAB25'], 3: ['IAB89', 'IAB82']}