import dask.dataframe as pd
def build_data_file():
first_df = pd.read_csv('/home/lion/du/data/first_df', sep=';', blocksize=10000000,dtype={'user_ID': str},
names=['user_ID', 'att1', 'att2',
'att3','att4','att5','att6','att7','att8','att9','att10','att11','att12','att13','att14', 'att15',
'att16', 'att17', 'att18','att19', 'att20', 'att21','att22','att23', 'att24','att25', 'att26', 'att27','att28', 'att29',
'att30', 'att31','att32', 'att33','att34', 'att35','att36', 'att37','att38','att39', 'att40','att41', 'att42',
'att43','att44','att45','att46', 'att47','att48','att49']).set_index('user_ID')
second_df = pd.read_csv('/home/lion/du/data/second_df', sep=';', blocksize=10000000,dtype={'user_ID': str},
names=['user_ID', 'att_S_1',
'att_S_2', 'att_S_3']).set_index('user_ID')
df_final =pd.merge(first_df, second_df, left_index=True , right_index=True)
df_final.to_csv('result*.csv', sep=';')
if __name__ == '__main__':
build_data_file()