Сравнивая два файла Excel, как покрасить ячейки, которые находятся в одной, а не в другой? - PullRequest
0 голосов
/ 06 июня 2019

Моя проблема в том, что я пытаюсь сравнить два файла Excel и выделить различия между ними, а затем объединить их в третий файл Excel.

Я получил фактическое добавление к работе, но теперь яЯ пытаюсь раскрасить клетки, которые оказываются в одном Excel, а не в другом.например ..

df1
1 10 50 90
2 20 60 100
3 30 70 110
4 40 80 120

df2
1 15
2 20
3 35
4 40

finished product
1 10 --> 15 50 90
2 20        60 100
3 30 --> 35 70 110
4 40        80 120

последние 2 столбца должны быть раскрашены, мне трудно не понять, как отформатировать ячейки ТОЛЬКО в одном файле Excel .. ЭтоМое попытанное решение:

values = ["test"]
values.clear()

#if df1.shape < df2.shape:
for row in range(df1.shape[0],df2.shape[0]):
    for col in range (df1.shape[1],df2.shape[1]):
        new_value = df2.iloc[row, col]
        values.append(new_value)

print("Here are the new values that will be added to excel3: ", new_value)

Дело в том, что я знаю, что это неправильно, поскольку new_value захватывает фактические данные в df2.iloc [row, col], но я не уверен, как получитьиндексы всех ячеек, которые не находятся в «меньшем» файле Excel.Будем благодарны за любые предложения.

Также: вот полная программа, если это кому-нибудь поможет ... (я знаю, это много)

#Author: Adam Espinosa
#This program will compare two excel files and export the differences into a third excel

#Needed packages
import pandas as pd
import numpy as np
import os
import copy

#Changes the col number into its corresponding excel col letter
def col_num(n):
    n = n + 1
    string = ""
    while n > 0:
        n, remainder = divmod(n - 1, 26)
        string = chr(65 + remainder) + string
    return string

#Puts the characters from the col_num method into a string (Could be improved)   
def char_array(cols):
    i = 0
    ex_cols = ""
    while i < len(cols):
        if i == len(cols) - 1:
            ex_cols += (col_num(cols[i]))
        else:
            ex_cols += (col_num(cols[i])) + " "
        i += 1
    return ex_cols

print("\nExcel Comparer v1.3\n")

#Retrieve excel files for comparison
while True:
    file = input("Please enter the SMALLER (rows,cols) Excel file for comparison: ")
    try:
        df1 = pd.read_excel(file + ".xlsx")
        break
    except FileNotFoundError:
        print("File not Found, please make sure this program is in the same directory as both excel files.")
while True:
    file = input("Please enter the LARGER (rows,cols) Excel file for comparison: ")
    try:
        df2 = pd.read_excel(file + ".xlsx")
        break
    except FileNotFoundError:
        print("File not Found, please make sure this program is in the same directory as both excel files.")
print("\n\nFiles compared succesfully!\n\n")

#In the case of empty cells, set the nan values to True so they aren't false for values_compared
df1 = df1.replace(pd.np.nan, '', regex = True)
df2 = df2.replace(pd.np.nan, '', regex = True)

print("Rows of first Excel: ", df1.shape[0])
print("Rows of second Excel: ", df2.shape[0])
print("Columns of first Excel: ", df1.shape[1])
print("Columns of second Excel: ", df2.shape[1])

#original row and column for formatting the cells with color
origRow = df1.shape[0]
origCol = df1.shape[1]

origRow = origRow * 1
origCol = origCol * 1

#takes the columns of an excel and subtracts it by the other to get the offset
offset1 = df1.shape[0] - df2.shape[0]
offset2 = df2.shape[0] - df1.shape[0]
offset3 = df1.shape[1] - df2.shape[1]
offset4 = df2.shape[1] - df1.shape[1]

#The index of which column/row that the dataframe is currently on
index1 = df1.shape[0]
index2 = df2.shape[0]
index3 = df1.shape[1]
index4 = df2.shape[1]

#A mutable variable for the number of rows and cols as df.shape is not mutable
equal1 = df1.shape[0]
equal2 = df2.shape[0]
equal3 = df1.shape[1]
equal4 = df2.shape[1]

values = ["test"]
values.clear()

#if df1.shape < df2.shape:
for row in range(df1.shape[0],df2.shape[0]):
    for col in range (df1.shape[1],df2.shape[1]):
        new_value = df2.iloc[row, col]
        values.append(new_value)

print("Here are the new values that will be added to excel3: ", new_value)

#depending on which file has more rows, append rows to the smaller df
while equal1 > equal2:
    df2 = df2.append(df1.iloc[index1 - offset1,:], ignore_index = True)
    equal2 = equal2 + 1
    index1 = index1 + 1
while equal1 < equal2:
    df1 = df1.append(df2.iloc[index2 - offset2,:], ignore_index = True)
    equal1 = equal1 + 1
    index2 = index2 + 1

#values of the columns that will be appended
columns1 = list(df1.columns.values)
columns2 = list(df2.columns.values)

#depending on which file has more columns, append columns to the smaller df
while equal3 > equal4:
    df2[columns1[index3 - offset3]] = df1.iloc[:, index3 - offset3]
    equal4 = equal4 + 1
    index3 = index3 + 1
while equal3 < equal4:
    df1[columns2[index4 - offset4]] = df2.iloc[:, index4 - offset4]
    equal3 = equal3 + 1
    index4 = index4 + 1

print("\ndf1 values\n", df1)
print("\ndf2 values\n", df2)

print("Rows of first Excel: ", origRow)
print("Rows of second Excel: ", df2.shape[0])
print("Columns of first Excel: ", origCol)
print("Columns of second Excel: ", df2.shape[1])

#determines whether the files are exactly equal
print("\nAre the Documents exactly the same:", df1.equals(df2))

#Get all cells where the values are not equal(False)
values_compared = df1.values == df2.values
print("\nEach cell on whether or not they're equivalent:\n", values_compared)

rows, cols = np.where(values_compared == False)

#Prints out the rows and cols of non-equal values to console
print("\nThe indexes of each non-equal value:")
print("Col: [", char_array(cols), "]")
print("Row: ", (rows + 2))

#Df1 will now show the differences between the two files
for item in zip(rows, cols):
    df1.iloc[item[0], item[1]] = '{} --> {}'.format(df1.iloc[item[0], item[1]], df2.iloc[item[0], item[1]])

#Create a pandas excelwriter with the xlsxwriter engine
writer = pd.ExcelWriter('excel3.xlsx', engine = 'xlsxwriter')

#Creates a new excel file and writes the differences shown
df1.to_excel(writer, sheet_name='Sheet1', index = False, header = True)

#Working with multiple sheets in an excel file (IN PROGRESS)
workbook = writer.book
worksheet = writer.sheets['Sheet1']

#Gives the non-equal values a background color of green for easy viewing
cell_format = workbook.add_format()
cell_format.set_bg_color('yellow')

cell_format_new = workbook.add_format()
cell_format_new.set_bg_color('green')

worksheet.conditional_format(0,0,len(df1.values),len(df1.values[1]), {'type':     'text',
                                        'criteria': 'containing',
                                        'value':    '->',
                                        'format':   cell_format})
worksheet.conditional_format(origRow,origCol,len(df1.values),len(df1.values[1]), {'type':     'no_blanks',
                                        'format':   cell_format_new})
writer.save()

print("\nexcel3.xlsx has been written to this directory with the discrepancies.")
os.system("PAUSE")
...