Не используйте для циклов. Узнайте о внутренних и внешних соединениях в базах данных. Вводный курс в SQL охватывал бы эти концепции. Затем применить их к кадру данных pandas довольно просто:
#!/usr/bin/env python
import numpy as np
import pandas as pd
# simulate some data
x = pd.DataFrame(np.random.normal(0, 1, (4,4)), index=['a', 'b', 'c', 'd'], columns=['e', 'f', 'g', 'h'])
x[:] = x > 0
# e f g h
# a False False True False
# b False False False True
# c True True True True
# d False True True True
sparse = pd.DataFrame(x[x > 0].stack().index.tolist(), columns=['Documents', 'Terms'])
# Documents Terms
# 0 a g
# 1 b h
# 2 c e
# 3 c f
# 4 c g
# 5 c h
# 6 d f
# 7 d g
# 8 d h
cooccurrences = pd.merge(sparse, sparse, how='inner', on='Documents')
# Documents Terms_x Terms_y
# 0 a g g
# 1 b h h
# 2 c e e
# 3 c e f
# 4 c e g
# 5 c e h
# 6 c f e
# 7 c f f
# 8 c f g
# 9 c f h
# 10 c g e
# 11 c g f
# 12 c g g
# 13 c g h
# 14 c h e
# 15 c h f
# 16 c h g
# 17 c h h
# 18 d f f
# 19 d f g
# 20 d f h
# 21 d g f
# 22 d g g
# 23 d g h
# 24 d h f
# 25 d h g
# 26 d h h
# remove self loops and repeat pairings such as the second tuple in (u, v), (v, u)
valid = cooccurrences['Terms_x'] > cooccurrences['Terms_y']
valid_cooccurrences = cooccurrences[valid]
# Documents Terms_x Terms_y
# 6 c f e
# 10 c g e
# 11 c g f
# 14 c h e
# 15 c h f
# 16 c h g
# 21 d g f
# 24 d h f
# 25 d h g
counts = valid_cooccurrences.groupby(['Terms_x', 'Terms_y']).count()
# Documents
# Terms_x Terms_y
# f e 1
# g e 1
# f 2
# h e 1
# f 2
# g 2
documents = valid_cooccurrences.groupby(['Terms_x', 'Terms_y']).aggregate(lambda x : set(x))
# Documents
# Terms_x Terms_y
# f e {c}
# g e {c}
# f {d, c}
# h e {c}
# f {d, c}
# g {d, c}