Сравнение электронных таблиц Excel не меняет формат для новых или удаленных строк - PullRequest
0 голосов
/ 03 мая 2019

Я запускаю следующий скрипт Python для сравнения двух электронных таблиц Excel.Часть кода, которая печатает newRows и droppedRows, работает нормально.Но формат строк / ячеек Excel не изменяется для этих строк в операторах if.Я не уверен, что мне не хватает.Мои данные содержат строки, которые являются новыми и отброшены, поскольку они отображаются на моей консоли.

import pandas as pd
from pathlib import Path
​
​
def excel_diff(path_OLD, path_NEW, index_col):
​
    df_OLD = pd.read_excel(path_OLD, index_col=index_col).fillna(0)
    df_NEW = pd.read_excel(path_NEW, index_col=index_col).fillna(0)
​
    # Perform Difference
    dfDiff = df_NEW.copy()
    droppedRows = []
    newRows = []
​
    cols_OLD = df_OLD.columns
    cols_NEW = df_NEW.columns
    sharedCols = list(set(cols_OLD).intersection(cols_NEW))
​
    # Perform comparsion between rows in both spreadsheets
    for row in dfDiff.index:
        if (row in df_OLD.index) and (row in df_NEW.index):
            for col in sharedCols:
                value_OLD = df_OLD.loc[row,col]
                value_NEW = df_NEW.loc[row,col]
                if value_OLD==value_NEW:
                    dfDiff.loc[row,col] = df_NEW.loc[row,col]
                else:
                    dfDiff.loc[row,col] = ('{}→{}').format(value_OLD,value_NEW)
        else:
            newRows.append(row)

    # Perform comparison to determine rows dropped in the actual spreadsheet 
    for row in df_OLD.index:
        if row not in df_NEW.index:
            droppedRows.append(row)
            dfDiff = dfDiff.append(df_OLD.loc[row,:])
​
    dfDiff = dfDiff.sort_index().fillna('')
    print(dfDiff)
    print('\nNew Rows:     {}'.format(newRows)) ------> this works
    print('Dropped Rows: {}'.format(droppedRows)) -----> this works
​
    # Save output and format
    fname = '{} vs {}.xlsx'.format(path_OLD.stem,path_NEW.stem)
    writer = pd.ExcelWriter(fname, engine='xlsxwriter')
​
    dfDiff.to_excel(writer, sheet_name='DIFF', index=True)
    df_NEW.to_excel(writer, sheet_name=path_NEW.stem, index=True)
    df_OLD.to_excel(writer, sheet_name=path_OLD.stem, index=True)
​
    # get xlsxwriter objects
    workbook  = writer.book
    worksheet = writer.sheets['DIFF']
    #worksheet.hide_gridlines(2)
    #worksheet.set_default_row(15)
​
    # define formats
    date_fmt = workbook.add_format({'align': 'center', 'num_format': 'yyyy-mm-dd'})
    center_fmt = workbook.add_format({'align': 'center'})
    number_fmt = workbook.add_format({'align': 'center', 'num_format': '#,##0.00'})
    cur_fmt = workbook.add_format({'align': 'center', 'num_format': '$#,##0.00'})
    perc_fmt = workbook.add_format({'align': 'center', 'num_format': '0%'})
    grey_fmt = workbook.add_format({'bg_color': '#B9C1B4'})
    highlight_fmt = workbook.add_format({'font_color': '#FF0000', 'bg_color':'#B1B3B3'})
    new_fmt = workbook.add_format({'bold': True, 'italic': True })
​
    # set format over range
    ## highlight changed cells in red
    worksheet.conditional_format('A1:ZZ1000', {'type': 'text',
                                            'criteria': 'containing',
                                            'value':'→',
                                            'format': highlight_fmt})
​
    # highlight new/changed rows
    for row in range(dfDiff.shape[0]):
    #highlight new rows in green, and dropped rows in grey 
        if row+1 in newRows:
            print ('New Row')
            worksheet.set_row(row+1, 15, new_fmt) -> this does not work
            print ('New Row')
        if row+1 in droppedRows:
            worksheet.set_row(row+1, 15, grey_fmt)-> this does not work
            print ('Dropped row')
​
    # save
    writer.save()
    print('\nDone.\n')
​
​
# Main function to read in the input files to be compared
def main():
    path_OLD = Path('stat_prem_ideal_0502.xlsx')
    path_NEW = Path('stat_prem_actual_0502.xlsx')
​
    # get index col from data - the first column should always be the index column - Record_ID in this case
    df = pd.read_excel(path_NEW)
    index_col = df.columns[0]
    print('\nIndex column: {}\n'.format(index_col))
​
    excel_diff(path_OLD, path_NEW, index_col)
​
​
if __name__ == '__main__':
    main()

Вывод сценария:

New Rows:     [229, 229]
Dropped Rows: [94]

Done.

...