Примечания:
- Это скребок данных по ссылке, приведенной в начале кода;не мои данные, только отформатированные мной
- У меня есть определенный набор столбцов, которые не могут быть пустыми
- У меня есть сообщение об ошибке для каждого связанного столбца
- Я пытаюсьнайти ВСЕ столбцы, которые являются NA, и назначить сообщения об ошибках соответствующей строке
- . Для целей тестирования я создал столбец комментариев, в котором должны появляться сообщения
Вопросы:
- Что не так с циклом, который я написал?
- Будет очень любезно, если вы подскажете, как мне это исправить?
Данные:
data={'Multi Engine Turbine Helicopters':["AS 355 NP","MD 902","EC 135 P2i / T2i","A109 Grand",
"Bell 429","EC 145","Bell 430 (Wheels)","EC 155B1",
"S-76C++","Bell 412 EP","AW 169","AW 139","AW 189",
"Bell 525","EC 175","AS 332 L1","AS 332 L2","EC 225",
"S-92","AW 101"],
'Base Price New - Million':[3.5,6,6,6,6,7.5,8,9.5,10,8,"Nan",13,"Nan","Nan","Nan",15,16,17,20,30],
'Direct Operating Cost / HR':["Nan",660,"Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan"],
'Op Cost / HR as on':["Nan",2007,"Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan","Nan"],
'M.A.U.W. - lbs':[5732,6250,6415,6614,6930,7904,9300,10846,11700,11900,10000,14110,18000,18000,17196,18960,20502,24251,26500,34392],
'Empty Weight - lbs':[3285,3375,3208,3510,4290,3977,5364,5774,7005,6863,"Nan",7985,"Nan","Nan",10150,9920,10410,11587,16875,"Nan"],
'Useful Load - lbs':[2447,2875,3208,3104,2640,3927,3936,5072,4695,5037,"Nan",6125,"Nan","Nan",7046,9040,10092,12664,9625,"NaN"],
'Max Sling Load - lbs':[2000, 3000,"NaN",2005,2200,3307,2800,3527,3300,4500,"Nan",5500,"Nan","Nan","Nan",9920,11020,11020,10000,"Nan"],
'Max Range - nm':[385,257,340,356,350,370,275,427,380,402,"Nan",437,"Nan",500,"Nan",454,447,443,600,540],
'Fast Cruise - knots':[120,134,140,154,142,133,143,143,155,120,145,167,145,150,"Nan",141,150,141,153,150],
'HOGE Ceiling ISA - ft':[7180,10400,6600,10000,8000,2530,6200,"Nan",3300,5200,"Nan",5500,"Nan","Nan",6000,7546,6430,1510,7000,"Nan"],
'Comments':["Nan","Nan","Max Sling Load - lbs cannot be blank","Nan","Nan","Nan","Nan","Nan","Nan","Nan",
"Base Price New - Million cannot be blank:Max Sling Load - lbs cannot be blank:Max Range - nm cannot be blank",
"Nan","Base Price New - Million cannot be blank:Useful Load - lbs cannot be blank:Max Sling Load - lbs cannot be blank:Max Range - nm cannot be blank",
"Base Price New - Million cannot be blank:Useful Load - lbs cannot be blank:Max Sling Load - lbs cannot be blank",
"Base Price New - Million cannot be blank:Max Sling Load - lbs cannot be blank:Max Range - nm cannot be blank",
"Nan","Nan","Nan","Nan","Useful Load - lbs cannot be blank:Max Sling Load - lbs cannot be blank"]
}
Код:
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 6 17:49:31 2019
@author: Arun
"DAta from http://www.helinews.com/multicomparison.php"""
import pandas as pd
import numpy as np
colsthatmustcontaindata=['Base Price New - Million','Useful Load - lbs','Max Sling Load - lbs','Max Range - nm']
colsmissingdataerrmsg=['Base Price New - Million cannot be blank','Useful Load - lbs cannot be blank','Max Sling Load - lbs cannot be blank','Max Range - nm cannot be blank']
df = pd.DataFrame(data)
print("Before NaN replacement\n")
print(df)
df1=df.replace("Nan",np.NaN)
print("\nAfter NaN replacement\n")
print(df1)
filteredDf = df1[df1[colsthatmustcontaindata].isna().any(axis=1)]
print("\nFiltered List\n")
print (filteredDf)
filteredDf['Verification Comments']=""
strs=[]
for i in colsthatmustcontaindata:
strs = np.where(filteredDf[i].isnull(),filteredDf['Verification Comments'] + " : " + str(colsthatmustcontaindata.index(i)) + " : " + colsmissingdataerrmsg[colsthatmustcontaindata.index(i)],"")
print("\nAfter the Loop\n")
print (filteredDf)