import pandas as pd
# Create DataFrame
data = [{'analyte': 'sample1'},
{'analyte': 'bacon', 'CAS1': 1},
{'analyte': 'eggs', 'CAS1': 2},
{'analyte': 'money', 'CAS1': 3, 'CAS2': 1, 'Value2': 1.11},
{'analyte': 'shoe', 'CAS1': 4},
{'analyte': 'boy', 'CAS1': 5},
{'analyte': 'girl', 'CAS1': 6},
{'analyte': 'onion', 'CAS1': 7, 'CAS2': 4, 'Value2': 6.53},
{'analyte': 'sample2'},
{'analyte': 'bacon', 'CAS1': 1},
{'analyte': 'eggs', 'CAS1': 2, 'CAS2': 1, 'Value2': 7.88},
{'analyte': 'money', 'CAS1': 3},
{'analyte': 'shoe', 'CAS1': 4, 'CAS2': 3, 'Value2': 15.5},
{'analyte': 'boy', 'CAS1': 5},
{'analyte': 'girl', 'CAS1': 6},
{'analyte': 'onion', 'CAS1': 7}]
df = pd.DataFrame(data)
# Create list of row indices for each sample name
# For above example: s = [0, 8, 16]
s = list(df['analyte'].index[df['analyte'].str[:6] == 'sample']) + [len(df)]
# Create new dataframes for each sample and print results
samples = {}
for i, j in zip(s, s[1:]):
sample_df = df.iloc[i+1 : j, :].reset_index(drop=True)
sample_name = df.iloc[i].loc['analyte']
samples.update( {sample_name : sample_df} )
print(samples['sample2'])
Другие варианты:
# if CAS1 cell of sample row is NaN
sample_indices = list(df['CAS1'].index[df['CAS1'].apply(np.isnan)]) + [len(df)]
# if CAS1 cell of sample row is NaN or None
sample_indices = list(df['CAS1'].index[df['CAS1'].isnull()]) + [len(df)]
# if CAS1 cell of sample row is an empty string
sample_indices = list(df['CAS1'].index[df['CAS1'] == '']) + [len(df)]