Моя проблема в том, что я пытаюсь сравнить два файла Excel и выделить различия между ними, а затем объединить их в третий файл Excel.
Я получил фактическое добавление к работе, но теперь яЯ пытаюсь раскрасить клетки, которые оказываются в одном Excel, а не в другом.например ..
df1
1 10 50 90
2 20 60 100
3 30 70 110
4 40 80 120
df2
1 15
2 20
3 35
4 40
finished product
1 10 --> 15 50 90
2 20 60 100
3 30 --> 35 70 110
4 40 80 120
последние 2 столбца должны быть раскрашены, мне трудно не понять, как отформатировать ячейки ТОЛЬКО в одном файле Excel .. ЭтоМое попытанное решение:
values = ["test"]
values.clear()
#if df1.shape < df2.shape:
for row in range(df1.shape[0],df2.shape[0]):
for col in range (df1.shape[1],df2.shape[1]):
new_value = df2.iloc[row, col]
values.append(new_value)
print("Here are the new values that will be added to excel3: ", new_value)
Дело в том, что я знаю, что это неправильно, поскольку new_value захватывает фактические данные в df2.iloc [row, col], но я не уверен, как получитьиндексы всех ячеек, которые не находятся в «меньшем» файле Excel.Будем благодарны за любые предложения.
Также: вот полная программа, если это кому-нибудь поможет ... (я знаю, это много)
#Author: Adam Espinosa
#This program will compare two excel files and export the differences into a third excel
#Needed packages
import pandas as pd
import numpy as np
import os
import copy
#Changes the col number into its corresponding excel col letter
def col_num(n):
n = n + 1
string = ""
while n > 0:
n, remainder = divmod(n - 1, 26)
string = chr(65 + remainder) + string
return string
#Puts the characters from the col_num method into a string (Could be improved)
def char_array(cols):
i = 0
ex_cols = ""
while i < len(cols):
if i == len(cols) - 1:
ex_cols += (col_num(cols[i]))
else:
ex_cols += (col_num(cols[i])) + " "
i += 1
return ex_cols
print("\nExcel Comparer v1.3\n")
#Retrieve excel files for comparison
while True:
file = input("Please enter the SMALLER (rows,cols) Excel file for comparison: ")
try:
df1 = pd.read_excel(file + ".xlsx")
break
except FileNotFoundError:
print("File not Found, please make sure this program is in the same directory as both excel files.")
while True:
file = input("Please enter the LARGER (rows,cols) Excel file for comparison: ")
try:
df2 = pd.read_excel(file + ".xlsx")
break
except FileNotFoundError:
print("File not Found, please make sure this program is in the same directory as both excel files.")
print("\n\nFiles compared succesfully!\n\n")
#In the case of empty cells, set the nan values to True so they aren't false for values_compared
df1 = df1.replace(pd.np.nan, '', regex = True)
df2 = df2.replace(pd.np.nan, '', regex = True)
print("Rows of first Excel: ", df1.shape[0])
print("Rows of second Excel: ", df2.shape[0])
print("Columns of first Excel: ", df1.shape[1])
print("Columns of second Excel: ", df2.shape[1])
#original row and column for formatting the cells with color
origRow = df1.shape[0]
origCol = df1.shape[1]
origRow = origRow * 1
origCol = origCol * 1
#takes the columns of an excel and subtracts it by the other to get the offset
offset1 = df1.shape[0] - df2.shape[0]
offset2 = df2.shape[0] - df1.shape[0]
offset3 = df1.shape[1] - df2.shape[1]
offset4 = df2.shape[1] - df1.shape[1]
#The index of which column/row that the dataframe is currently on
index1 = df1.shape[0]
index2 = df2.shape[0]
index3 = df1.shape[1]
index4 = df2.shape[1]
#A mutable variable for the number of rows and cols as df.shape is not mutable
equal1 = df1.shape[0]
equal2 = df2.shape[0]
equal3 = df1.shape[1]
equal4 = df2.shape[1]
values = ["test"]
values.clear()
#if df1.shape < df2.shape:
for row in range(df1.shape[0],df2.shape[0]):
for col in range (df1.shape[1],df2.shape[1]):
new_value = df2.iloc[row, col]
values.append(new_value)
print("Here are the new values that will be added to excel3: ", new_value)
#depending on which file has more rows, append rows to the smaller df
while equal1 > equal2:
df2 = df2.append(df1.iloc[index1 - offset1,:], ignore_index = True)
equal2 = equal2 + 1
index1 = index1 + 1
while equal1 < equal2:
df1 = df1.append(df2.iloc[index2 - offset2,:], ignore_index = True)
equal1 = equal1 + 1
index2 = index2 + 1
#values of the columns that will be appended
columns1 = list(df1.columns.values)
columns2 = list(df2.columns.values)
#depending on which file has more columns, append columns to the smaller df
while equal3 > equal4:
df2[columns1[index3 - offset3]] = df1.iloc[:, index3 - offset3]
equal4 = equal4 + 1
index3 = index3 + 1
while equal3 < equal4:
df1[columns2[index4 - offset4]] = df2.iloc[:, index4 - offset4]
equal3 = equal3 + 1
index4 = index4 + 1
print("\ndf1 values\n", df1)
print("\ndf2 values\n", df2)
print("Rows of first Excel: ", origRow)
print("Rows of second Excel: ", df2.shape[0])
print("Columns of first Excel: ", origCol)
print("Columns of second Excel: ", df2.shape[1])
#determines whether the files are exactly equal
print("\nAre the Documents exactly the same:", df1.equals(df2))
#Get all cells where the values are not equal(False)
values_compared = df1.values == df2.values
print("\nEach cell on whether or not they're equivalent:\n", values_compared)
rows, cols = np.where(values_compared == False)
#Prints out the rows and cols of non-equal values to console
print("\nThe indexes of each non-equal value:")
print("Col: [", char_array(cols), "]")
print("Row: ", (rows + 2))
#Df1 will now show the differences between the two files
for item in zip(rows, cols):
df1.iloc[item[0], item[1]] = '{} --> {}'.format(df1.iloc[item[0], item[1]], df2.iloc[item[0], item[1]])
#Create a pandas excelwriter with the xlsxwriter engine
writer = pd.ExcelWriter('excel3.xlsx', engine = 'xlsxwriter')
#Creates a new excel file and writes the differences shown
df1.to_excel(writer, sheet_name='Sheet1', index = False, header = True)
#Working with multiple sheets in an excel file (IN PROGRESS)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
#Gives the non-equal values a background color of green for easy viewing
cell_format = workbook.add_format()
cell_format.set_bg_color('yellow')
cell_format_new = workbook.add_format()
cell_format_new.set_bg_color('green')
worksheet.conditional_format(0,0,len(df1.values),len(df1.values[1]), {'type': 'text',
'criteria': 'containing',
'value': '->',
'format': cell_format})
worksheet.conditional_format(origRow,origCol,len(df1.values),len(df1.values[1]), {'type': 'no_blanks',
'format': cell_format_new})
writer.save()
print("\nexcel3.xlsx has been written to this directory with the discrepancies.")
os.system("PAUSE")