1.csv:
33694,1298,2465341
33665,1299,20
33663,1299,8
2.csv:
53694,1398,3465341
33665,1399,20
33663,1399,8
3.csv:
13694,7778,3465341
44432,7780,20
33663,7780,8
import pandas as pd
import numpy as np
matrixFiles = ['1.csv', '2.csv', '3.csv']
dfs = []
matrix_list = []
#this dict stores the i number (keys) and the cellNumberInt (values)
cellNumberInt_dict = {}
i = 0
for file in sorted(matrixFiles):
matrix = pd.read_csv(file)
cellNumber = matrix.columns[1]
cellNumberInt = np.int64(cellNumber)
cellNumberInt_dict[i] = cellNumberInt
if i > 0:
matrix.rename(columns={str(cellNumberInt) : cellNumberInt + cellNumberInt_dict[i-1]}, inplace=True)
dfs.append(matrix)
if i < len(matrixFiles)-1:
#we only want to keep the df values here, keeping the columns that don't
# have shared names messes up the pd.concat()
matrix_list.append(matrix.values)
i += 1
# get the last df in the dfs list because it has the last cellNumberInt
last_df = dfs[-1]
#concat all of the values from the dfs except for the last one
arrs = np.concatenate(matrix_list)
#make a df from the numpy arrays
new_df = pd.DataFrame(arrs, columns=last_df.columns.tolist())
big_file = pd.concat([last_df, new_df])
big_file.rename(columns={big_file.columns.tolist()[1] : sum(cellNumberInt_dict.values())}, inplace=True)
print (big_file)
13694 10474 3465341
0 44432 7780 20
1 33663 7780 8
0 33665 1299 20
1 33663 1299 8
2 33665 1399 20
3 33663 1399 8