Следующее должно показать вам необходимые логики c. Я должен был сначала написать python, а затем перевести на vba (python был написан не на языке pythoni c, а таким образом, который позволял переводить на VBA.)
Option Explicit
Public Sub ReshapeData()
Dim df1(), tracker As Scripting.Dictionary, headers As Object, rows As Scripting.Dictionary
df1 = ActiveSheet.Range("A2:C11").Value 'no headers and 2d indexed from 1
Set tracker = New Scripting.Dictionary
Set headers = CreateObject("System.Collections.ArrayList")
Set rows = New Scripting.Dictionary
'Populate headers
headers.Add "Header1"
Dim r As Long
For r = LBound(df1, 1) To UBound(df1, 1) 'loop rows r = 1; no headers
Dim header1 As String, val As String, name As String, curr_id As String
header1 = df1(r, 1) 'not 0 as with python as 2d from range .contains
name = df1(r, 2)
val = df1(r, 3)
rows(header1) = vbNullString
curr_id = header1 & ":" & name
If Not tracker.exists(curr_id) And Not headers.contains(name) Then
tracker(curr_id) = val
headers.Add name
End If
If headers.contains(name) Then
If Not tracker.exists(curr_id) Then
tracker(curr_id) = val
ElseIf tracker(curr_id) <> val Then
headers.Add name
End If
End If
Next
Dim nRow As Long, nCol As Long
' determine array dimensions (will use df in python)
nRow = rows.Count + 1
nCol = headers.Count
Dim rowsArr(), nRows As Long, nCols As Long
rowsArr = rows.keys
For r = LBound(rowsArr) To UBound(rowsArr) 'starts at 0 updated dict with these
rows(rowsArr(r)) = r + 2 ' as headers will be in row 1
Next
' generate array (vba)/df(python)
Dim df()
ReDim df(1 To nRow, 1 To nCol)
Dim headersArr()
headersArr = headers.ToArray 'assume 0 based array generated as cannot test
For r = LBound(headersArr) To UBound(headersArr) ' r = 0
df(1, r + 1) = headersArr(r)
Next
'populate first column of array (vba)/df(python)
For r = LBound(rowsArr) To UBound(rowsArr) ' r = 0. First val is A100
df(r + 2, 1) = rowsArr(r) 'assuming 0 indexing
Next
' Appropriately populate vba array | python df
Dim id_count As Scripting.Dictionary, row As Long, col As Long
Set id_count = New Scripting.Dictionary
For r = LBound(df1, 1) To UBound(df1, 1) ' r = 1
header1 = df1(r, 1)
name = df1(r, 2)
curr_id = header1 & ":" & name
row = rows(header1)
id_count(curr_id) = id_count(curr_id) + 1
col = get_col_number(headersArr, name, id_count(curr_id)) ' determine col number to write value to based on
df(row, col) = df1(r, 3)
Next
ActiveSheet.Cells(1, 5).Resize(UBound(df, 1), UBound(df, 2)) = df
End Sub
Public Function get_col_number(ByRef headersArr(), ByVal name As String, ByVal required_match As Long) As Long
Dim i As Long, matchCount As Long
For i = LBound(headersArr) To UBound(headersArr)
If headersArr(i) = name Then
matchCount = matchCount + 1
If matchCount = required_match Then
get_col_number = i + 1 'assuming this is correct adjustment
Exit Function
End If
End If
Next
End Function
Python тестовый скрипт (не pythoni c по причине, указанной выше):
import pandas as pd
def get_col_number(headers:list, name:str, required_match:int)->int:
# return index in headers of nth occurrence of name as name can repeat and we want to know which
# column number is the right one to write to
return [i for i, n in enumerate(headers) if n == name][required_match]
df1 = pd.DataFrame(
{
'Header1': ['A100','A100','A100','B200','C300','C300','D400','D400','A100','C300'] ,
'Header2': ['Ahmed','Yasser','Ahmed','Yasser','Ahmed','Khalil','Yasser','Ahmed','Ahmed','Yasser'] ,
'Header3': ['Value1','Value2','Value3','Value5','Value6','Value7','Value9','Value10','Value4','Value8']
})
def main():
print('input dataframe.....')
print(100 * '==')
print(df1)
tracker = {}
headers = ['Header1'] # this would need to be an array in vba or an arrayList?
# CreateObject("System.Collections.ArrayList") then have .Add and finally .ToArray to retrieve as array
rows = {}
for r, _ in df1.iterrows():
header1 = df1.iloc[r,0]
rows[header1] = ''
curr_id = ':'.join([df1.iloc[r,0] , df1.iloc[r,1]]) #concatenate header1 and header2 to give an id
val = df1.iloc[r,2]
name = df1.iloc[r,1]
if curr_id not in tracker and name not in headers:
#tracker is a dict so can use Not .exists in VBA
tracker[curr_id] = val
headers.append(name)
if name in headers:
if curr_id not in tracker:
tracker[curr_id] = val
elif tracker[curr_id] != val:
headers.append(name)
# determine array dimensions (df in python)
nrow = len(rows.keys())
ncol = len(headers)
rows = {r:n for n, r in enumerate(rows.keys())} # for python need to know row number for Header1 values so updated dict with these
# generate array (vba)/df(python)
df = pd.DataFrame(["" for c in range(ncol)] for r in range(nrow)) #this would be a dimensioned array in vba
df.columns = headers
#populate first column of array (vba)/will use df(python)
for r, key in enumerate(rows.keys()): # this would be for r = lbound(arr,1) where arr = rows.keys() ; key = arr(r)
df.iloc[r,0] = key
# Appropriately populate vba array | python use a df
id_count = {}
for r, _ in df1.iterrows(): # loop df rows; this would be rows of VBA array i.e. dimension 1
header1 = df1.iloc[r,0]
name = df1.iloc[r,1]
curr_id = ':'.join([header1, name])
row = rows[header1]
if curr_id in id_count:
id_count[curr_id]+=1
else:
id_count[curr_id] = 0
col = get_col_number(headers, name,id_count[curr_id]) # determine col number to write value to based on
try:
df.iloc[row ,col] = df1.iloc[r,2] # headers won't be included in python indexing. VBA indexing will include headers as row 0.
except:
print(list(_), row, col)
print()
print('output dataframe.....')
print(100 * '==')
print(df)
if __name__ == "__main__":
main()