Как насчет этого кода.Быстрое решение для новичка здесь, но я думаю, что это работает хорошо.
import pandas as pd
# let's create the dataframe
df = pd.DataFrame(data = {'col1': ['a','a','a','a'],
'col2': ['b','a','b','q'],
'col3': ['c','c','c','q'],
'col4': ['d','d','d','q'],
'col5': ['e','e','a','q'],
'col6': ['f','f','a','q'],
'col7': ['g','g','g','q']} )
almost_dups = [] # initialize the list we want to compute
for i in range(len(df)): # for every dataframe row
a = df.iloc[i].values # get row values
count = 0 # this will count the rows similar to the selected one
for j in range(len(df)): # for every other row
if i!=j: # if rows are different
b = df.iloc[j].values
if sum([i == j for i, j in zip(a, b)])>= 5: # if at least 5 values are same
count +=1 # increase counter
almost_dups.append(count) # append the count
df['almost_dups'] = almost_dups # append the list to dataframe, as a new column