Вы можете использовать двойной цикл for для перебора всех функций и столбцов:
for funcname in ['sum', 'max', 'std']:
for i in range(len(df.columns)):
f.write("sum of the {} column is {}\n"
.format(i+1, getattr(df.iloc[:, 0], funcname)()))
getattr(df, 'sum')
эквивалентно df.sum
.
import pandas as pd
import glob
import csv
files = glob.glob('*.csv')
for file in files:
df = pd.read_csv(file, header=None)
output_file_name = "output_" + file
with open(output_file_name, 'w') as f:
# f.write("{}\n".format(df.describe()))
for funcname in ['sum', 'max', 'std']:
for i in range(len(df.columns)):
f.write("sum of the {} column is {}\n"
.format(i+1, getattr(df.iloc[:, 0], funcname)()))
Обратите внимание, что df.describe()
представляет сводную статистику в аккуратном формате.Вы можете рассмотреть возможность печати df.describe()
:
In [26]: df = pd.DataFrame(np.random.random((10,6)))
In [27]: df
Out[27]:
0 1 2 3 4 5
0 0.791727 0.397873 0.924195 0.202464 0.789961 0.077095
1 0.920516 0.637618 0.383694 0.623393 0.328440 0.606576
2 0.844562 0.231242 0.183842 0.902065 0.286643 0.743508
3 0.411101 0.370284 0.249545 0.955745 0.561450 0.597586
4 0.185035 0.989508 0.522821 0.218888 0.569865 0.773848
5 0.196904 0.377201 0.816561 0.914657 0.482806 0.686805
6 0.809536 0.480733 0.397394 0.152101 0.645284 0.921204
7 0.004433 0.168943 0.865408 0.472513 0.188554 0.012219
8 0.534432 0.739246 0.628112 0.789579 0.268880 0.835339
9 0.701573 0.580974 0.858254 0.461687 0.493617 0.285601
In [28]: df.describe()
Out[28]:
0 1 2 3 4 5
count 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000
mean 0.539982 0.497362 0.582983 0.569309 0.461550 0.553978
std 0.324357 0.246491 0.274233 0.313254 0.189960 0.318598
min 0.004433 0.168943 0.183842 0.152101 0.188554 0.012219
25% 0.250453 0.372014 0.387119 0.279588 0.297092 0.363598
50% 0.618003 0.439303 0.575466 0.547953 0.488212 0.646691
75% 0.805084 0.623457 0.847830 0.873943 0.567761 0.766263
max 0.920516 0.989508 0.924195 0.955745 0.789961 0.921204