вероятно что-то вроде ниже:
import pandas as pd
from itertools import groupby
df = pd.DataFrame({
'col1':['A>G','C>T','C>T','G>T','C>T', 'A>G','A>G','A>G','C>T','C>T','C>T'],
'col2':['TCT','ACA','TCA','TCA','GCT', 'ACT','CTG','ATG','ACA','TCA','TCA'],
'start':[1000,2000,3000,4000,5000,6000,10000,20000,2000,3000,4000]})
final = []
pos = 0
for k,g in groupby([row.col1 for n,row in df.iterrows()]):
glist = [x for x in g]
first_pos = pos
last_pos = pos+len(glist)-1
if len(glist)>1:
print(glist)
val = df.iloc[first_pos].col1
first = df.iloc[first_pos].start
last = df.iloc[last_pos].start
final.append({'type':val,'length':len(glist),'diff':last-first})
pos = last_pos +1
final = pd.DataFrame(final)
print(final)
выход:
diff length type
0 1000 2 C>T
1 14000 3 A>G
2 2000 3 C>T