Вы можете использовать pandas
, сделать что-то вроде:
import pandas as pd
# let's assume this is our data which will be read from the excel
rows = [ ( 'a' , 'b' , { 'a' , 'b' } ) , ( 'b' , 'c' , { 'b' , 'c' } )
( 'b' , 'a' , { 'a' , 'b' } ) ]
columns = [ 'x' , 'y' , 'z' ]
# create a dataframe (like a table in excel)
df = pd.DataFrame.from_records( rows , columns = columns)
# you'll need to use: "pd.read_excel('my_excel_fle')" to read the file
>> df
x y z
0 a b {b, a}
1 b c {b, c}
2 b a {b, a}
# now, put the columns which you want to find duplicates of in another column
df["dup_clmns"] = df.z.apply(lambda x: tuple(x))
>>df
x y z dup_clmns
0 a b {b, a} (b, a)
1 b c {b, c} (b, c)
2 b a {b, a} (b, a)
#now use "duplicates" or "drop_duplicates" (or whatever you want)
df.drop_duplicates(subset="dup_clmns", keep="first") # will remove duplicates of (x,y) columns
df.duplicated(subset="dup_clmns")
# will show duplicate rows based on the columns you choose