Используйте boolean indexing
с преобразованием обоих столбцов в числа c и добавлением фильтрации 1
и вычитанием 1
из actual
цепочки столбцов |
для побитового OR
, Series.eq
используется для проверки, равны ли значения:
print (df)
predictions actual
8013 fifteen twenty
5146 sixteen seventeen
5691 seventeen sixteen
13255 sixteen fifteen
13921 nineteen fourteen
13077 fourteen fifteen
#https://stackoverflow.com/a/493788/2901002
def text2int(textnum, numwords={}):
if not numwords:
units = [
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(units): numwords[word] = (1, idx)
for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
current = result = 0
for word in textnum.split():
if word not in numwords:
raise Exception("Illegal word: " + word)
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
return result + current
p = df['predictions'].apply(text2int)
a = df['actual'].apply(text2int)
df1 = df[p.eq(a+1) | p.eq(a-1)]
Или:
df1 = df[(p == a+1) | (p == a-1)]
print (df1)
predictions actual
5146 sixteen seventeen
5691 seventeen sixteen
13255 sixteen fifteen
13077 fourteen fifteen