Ваш DataFrame был создан, как показано ниже:
import pandas as pd
import numpy as np
df = pd.DataFrame([[245,245,245,867,867], ['Reddit', np.nan, np.nan,'Facebook',np.nan], ['ColumnNeeded',np.nan, 'ColumnValue', 'ColumnNeeded','ColumnValue'],
['RedditInsight', np.nan, 'C', 'FacebookInsights', 'A'], ['RedditText', np.nan, 'H', 'FacbookText', 'L']])
Ваш DataFrame выглядит следующим образом:
0 1 2 3 4
0 245 245.0 245 867 867
1 Reddit NaN NaN Facebook NaN
2 ColumnNeeded NaN ColumnValue ColumnNeeded ColumnValue
3 RedditInsight NaN C FacebookInsights A
4 RedditText NaN H FacbookText L
и теперь код.
new_header = df.iloc[0] #Grab the first row for the header
df = df[1:] #Take the data less the header row
df.columns = new_header #Set the header row as the df header
#Drop the column with all NaNs
df = df.dropna(axis=1, how='all')
df = df.T #Transpose
#Must find a way to do this part programtically
#Manually changing the index currently
df.index = [245.0, 245.1, 867.0, 867.1]
iPrev = ""
l1 = []
for i in df.index:
indexNow = str(i)[:3]
#print(indexNow)
if iPrev == indexNow:
#print(df.at[i, 3], df.at[i, 4])
l2.append(df.at[i, 3])
l3.append(df.at[i, 4])
l1.append(l2)
l1.append(l3)
l2 = []
l3 = []
else:
iPrev = indexNow
l2 = [i, df.at[i, 1], df.at[i, 3]]
l3 = [i, df.at[i, 1], df.at[i, 4]]
#print(l2)
result = pd.DataFrame(l1, columns = ['ID','Company','ColumnNeeded','ColumnValue'])
print(result)
Придает
ID Company ColumnNeeded ColumnValue
0 245.0 Reddit RedditInsight C
1 245.0 Reddit RedditText H
2 867.0 Facebook FacebookInsights A
3 867.0 Facebook FacbookText L