Это дало некоторую работу, но мне удалось это сделать, хотя не думаю, что это Pythoni c.
import pandas as pd
import numpy as np
from numbers import Real
from typing import Callable
epsilon = 0.000_1
def appr(a: Real, b: Real, factor: Real) -> bool:
factor = abs(factor)
return (a >= b-factor) and (a <= b+factor)
def absolute_appr(a: Real, b: Real) -> bool:
return appr(a, b, epsilon)
def compare_element(e1, e2) -> bool:
if not type(e1).__module__ == type(e2).__module__:
return False
if type(e1).__module__ == np.__name__:
if not e1.dtype.kind == e2.dtype.kind:
print('Kinds of ' + str(e1) + ' (' + str(e1.dtype.kind) + ') and ' +
str(e2) + ' (' + str(e2.dtype.kind) + ') are different')
return False
if e1.dtype.kind == 'f':
if np.isnan(e1) and np.isnan(e2):
return True
return absolute_appr(e1, e2)
return e1 == e2
if not type(e1) is type(e2):
print('Types of ' + str(e1) + ' (' + str(type(e1)) + ') and ' +
str(e2) + ' (' + str(type(e2)) + ') are different')
return False
if isinstance(e1, float):
return absolute_appr(e1, e2)
return e1 == e2
def compare_columns(df1: pd.DataFrame, df2: pd.DataFrame) -> bool:
s1 = set(df1.columns.values.tolist()).copy()
s2 = set(df2.columns.values.tolist()).copy()
return s1 == s2
def normalize_dataframe(df: pd.DataFrame) -> list:
df.reset_index(inplace=True)
cl: list = df.columns.values.tolist()
cl.sort()
df.sort_values(by=cl, inplace=True)
df.reset_index(inplace=True, drop=True)
return cl
def approx_compare_dataframe(df1: pd.DataFrame, df2: pd.DataFrame):
df1 = df1.copy()
df2 = df2.copy()
normalize_dataframe(df1)
col_list: list = normalize_dataframe(df2)
if not compare_columns(df1, df2):
return False
for col in col_list:
for row in df1.index:
e1 = df1[col][row]
e2 = df2[col][row]
if not compare_element(e1, e2):
print('On col ' + str(col) + ' and row ' + str(row))
print(str(e1) + ' is not equal to ' + str(e2))
print('DataFrame 1: ')
print(df1)
print('DataFrame 2: ')
print(df2)
return False
return True
Тесты следующие:
index1 = pd.MultiIndex.from_tuples([('bird', 'falcon'),
('bird', 'parrot'),
('mammal', 'lion'),
('mammal', 'monkey')],
names=['class', 'name'])
columns1 = pd.MultiIndex.from_tuples([('speed', 'max'),
('speed', 'min'),
('species', 'type')])
df1 = pd.DataFrame([(389.0, 388.0, 'fly'),
( 24.0, 23.0, 'fly'),
( 80.5, 80.0, 'run'),
(np.nan, -1.0, 'jump')],
index=index1,
columns=columns1)
index2 = pd.MultiIndex.from_tuples([
('mammal', 'lion'),
('mammal', 'monkey'),
('bird', 'falcon'),
('bird', 'parrot'),
],
names=['class', 'name'])
columns2 = pd.MultiIndex.from_tuples([('speed', 'max'),
('speed', 'min'),
('species', 'type')])
df2 = pd.DataFrame([
( 80.5, 80.0, 'run'),
(np.nan, -1.0, 'jump'),
(389.0, 388.0, 'fly'),
( 24.0, 23.0, 'fly'),
],
index=index2,
columns=columns2)
index3 = pd.MultiIndex.from_tuples([('bird', 'falcon'),
('bird', 'parrot'),
('mammal', 'lion'),
('mammal', 'monkey')],
names=['class', 'name'])
columns3 = pd.MultiIndex.from_tuples([('speed', 'max'),
('speed', 'min'),
('species', 'type')])
df3 = pd.DataFrame([(389.00000001, 388.0, 'fly'),
( 24.0, 23.0, 'fly'),
( 80.5, 80.0, 'run'),
(np.nan, -1.0, 'jump')],
index=index3,
columns=columns3)
index4 = pd.MultiIndex.from_tuples([('bird', 'falcon'),
('bird', 'parrot'),
('mammal', 'lion'),
('mammal', 'monkey')],
names=['class', 'name'])
columns4 = pd.MultiIndex.from_tuples([
('speed', 'min'),
('speed', 'max'),
('species', 'type')])
df4 = pd.DataFrame([
(388.0, 389.0, 'fly'),
(23.0, 24.0, 'fly'),
(80.0, 80.5, 'run'),
(-1.0, np.nan, 'jump')],
index=index4,
columns=columns4)
index5 = pd.MultiIndex.from_tuples([('bird', 'falcon'),
('bird', 'parrot'),
('mammal', 'lion'),
('mammal', 'monkey')],
names=['class', 'name'])
columns5 = pd.MultiIndex.from_tuples([
('speed', 'min'),
('speed', 'max'),
('species', 'type')])
df5 = pd.DataFrame([
(388, 389.0, 'fly'),
(23, 24.0, 'fly'),
(80, 80.5, 'run'),
(-1, np.nan, 'jump')],
index=index5,
columns=columns5)
df5 = df5.astype({('speed', 'min'): int})
index6 = pd.MultiIndex.from_tuples([('bird', 'falcon'),
('bird', 'parrot'),
('mammal', 'lion'),
('mammal', 'monkey')],
names=['class', 'name'])
columns6 = pd.MultiIndex.from_tuples([('speed', 'max'),
('speed', 'min'),
('species', 'type')])
df6 = pd.DataFrame([(388.0, 388.0, 'fly'),
( 24.0, 23.0, 'fly'),
( 80.5, 80.0, 'run'),
(np.nan, -1.0, 'jump')],
index=index6,
columns=columns6)
index7 = pd.MultiIndex.from_tuples([('bird', 'falcon'),
('bird', 'parrot'),
('mammal', 'lion'),
('mammal', 'monkey')],
names=['class', 'name'])
columns7 = pd.MultiIndex.from_tuples([('speed', 'max'),
('speed', 'min'),
('species', 'type')])
df7 = pd.DataFrame([(389.0, 388.0, 'fly'),
( 24.0, 23.0, 'fly'),
( 80.5, 80.0, 'run'),
(np.nan, np.nan, 'jump')],
index=index7,
columns=columns7)
>>> df1
speed species
max min type
class name
bird falcon 389.0 388.0 fly
parrot 24.0 23.0 fly
mammal lion 80.5 80.0 run
monkey NaN -1.0 jump
>>> df2
speed species
max min type
class name
mammal lion 80.5 80.0 run
monkey NaN -1.0 jump
bird falcon 389.0 388.0 fly
parrot 24.0 23.0 fly
>>> df3
speed species
max min type
class name
bird falcon 389.0 388.0 fly
parrot 24.0 23.0 fly
mammal lion 80.5 80.0 run
monkey NaN -1.0 jump
>>> df4
speed species
min max type
class name
bird falcon 388.0 389.0 fly
parrot 23.0 24.0 fly
mammal lion 80.0 80.5 run
monkey -1.0 NaN jump
>>> df5
speed species
min max type
class name
bird falcon 388 389.0 fly
parrot 23 24.0 fly
mammal lion 80 80.5 run
monkey -1 NaN jump
>>> df6
speed species
max min type
class name
bird falcon 388.0 388.0 fly
parrot 24.0 23.0 fly
mammal lion 80.5 80.0 run
monkey NaN -1.0 jump
>>> df7
speed species
max min type
class name
bird falcon 389.0 388.0 fly
parrot 24.0 23.0 fly
mammal lion 80.5 80.0 run
monkey NaN NaN jump
approx_compare_dataframe(df1, df1)
True
approx_compare_dataframe(df1, df2)
True
approx_compare_dataframe(df1, df3)
True
approx_compare_dataframe(df1, df4)
True
approx_compare_dataframe(df1, df5)
Kinds of 388.0 (f) and 388 (i) are different
On col ('speed', 'min') and row 0
388.0 is not equal to 388
DataFrame 1:
class name speed species
max min type
0 bird falcon 389.0 388.0 fly
1 bird parrot 24.0 23.0 fly
2 mammal lion 80.5 80.0 run
3 mammal monkey NaN -1.0 jump
DataFrame 2:
class name speed species
min max type
0 bird falcon 388 389.0 fly
1 bird parrot 23 24.0 fly
2 mammal lion 80 80.5 run
3 mammal monkey -1 NaN jump
False
approx_compare_dataframe(df1, df6)
On col ('speed', 'max') and row 0
389.0 is not equal to 388.0
DataFrame 1:
class name speed species
max min type
0 bird falcon 389.0 388.0 fly
1 bird parrot 24.0 23.0 fly
2 mammal lion 80.5 80.0 run
3 mammal monkey NaN -1.0 jump
DataFrame 2:
class name speed species
max min type
0 bird falcon 388.0 388.0 fly
1 bird parrot 24.0 23.0 fly
2 mammal lion 80.5 80.0 run
3 mammal monkey NaN -1.0 jump
False
approx_compare_dataframe(df1, df7)
On col ('speed', 'min') and row 3
-1.0 is not equal to nan
DataFrame 1:
class name speed species
max min type
0 bird falcon 389.0 388.0 fly
1 bird parrot 24.0 23.0 fly
2 mammal lion 80.5 80.0 run
3 mammal monkey NaN -1.0 jump
DataFrame 2:
class name speed species
max min type
0 bird falcon 389.0 388.0 fly
1 bird parrot 24.0 23.0 fly
2 mammal lion 80.5 80.0 run
3 mammal monkey NaN NaN jump
False