我只是不知道从哪里开始 ETL 冒险。我有一个由过时的 ERP 软件提供的多行标题杂乱数据集。我只需要清理它即可适合基本的数据透视/vlookups 以及可能的 Power BI。
你认为我应该走VBA路线还是电源查询?
任何其他建议都非常感谢,因为我的大脑正在沸腾..
谢谢!
这是我用过的VBA,但它是有问题的VBA
Sub TransformDataToSingleRow()
Dim ws As Worksheet
Dim destRow As Long
Dim currentRow As Long
Dim lastCol As Long
Dim headers As Collection
Dim cell As Range
Dim valueDict As Object
' Set up the worksheet and output row
Set ws = ActiveSheet
destRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 2 ' Output starts 2 rows below data
' Store all unique headers into a collection
Set headers = New Collection
Set valueDict = CreateObject("Scripting.Dictionary") ' For mapping headers to values
' Loop through each cell in the sheet
For currentRow = 1 To ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
For Each cell In ws.Rows(currentRow).Cells
If cell.Value <> "" Then
' Check if it's a header: text followed by a value in the next cell
If cell.Value Like "*:*" And cell.Offset(0, 1).Value <> "" Then
On Error Resume Next ' Prevent duplicates in the collection
headers.Add cell.Value, CStr(cell.Value)
On Error GoTo 0
' Add value to dictionary
valueDict(cell.Value) = cell.Offset(0, 1).Value
End If
' Capture tabular data with headers from the last row
If ws.Cells(currentRow, 1).Value = "Due Date" Then
lastCol = ws.Cells(currentRow, ws.Columns.Count).End(xlToLeft).Column
For col = 1 To lastCol
On Error Resume Next
headers.Add ws.Cells(currentRow, col).Value, ws.Cells(currentRow, col).Value
On Error GoTo 0
valueDict(ws.Cells(currentRow, col).Value) = ws.Cells(currentRow + 1, col).Value
Next col
End If
End If
Next cell
Next currentRow
' Output headers and values to a single row
Dim header As Variant
Dim destCol As Long
destCol = 1
' Output headers in the first row
For Each header In headers
ws.Cells(destRow, destCol).Value = header
destCol = destCol + 1
Next header
' Output values in the second row
destCol = 1
For Each header In headers
ws.Cells(destRow + 1, destCol).Value = valueDict(header)
destCol = destCol + 1
Next header
MsgBox "Transformation complete!", vbInformation
End Sub
在 powerquery 中应该相当简单。 我将整个范围带入 powerquery 中,其中包含表/范围中的数据...并且不检查 [ ] 是否有标题
进入家庭高级编辑器并粘贴
let Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Added Index" = Table.AddIndexColumn(Source, "Index", 0, 1, Int64.Type),
#"Added Custom" = Table.AddColumn(#"Added Index", "Custom", each try if Text.Contains([Column1],"Item:") then [Index] else null otherwise null),
#"Filtered Rows" = Table.SelectRows(#"Added Custom", each ([Column1] <> null)),
#"Filled Down" = Table.FillDown(#"Filtered Rows",{"Custom"}),
#"Grouped Rows" = Table.Group(#"Filled Down", {"Custom"}, {{"data", each
let #"Removed Top Rows" = Table.Skip(_,2),
#"Promoted Headers" = Table.PromoteHeaders(#"Removed Top Rows", [PromoteAllScalars=true]),
#"Removed Other Columns" = Table.SelectColumns(#"Promoted Headers",{"Loc", "Prod on Hand", "MRP Order", "Net", "Received", "Required"}),
#"Added Custom1" = Table.AddColumn(#"Removed Other Columns", "Item", each Text.BetweenDelimiters(#"Filtered Rows"{0}[Column1],": ",",")),
#"Added Custom2" = Table.AddColumn(#"Added Custom1", "Description", each Text.AfterDelimiter(#"Filtered Rows"{0}[Column1],",")),
#"Added Custom3" = Table.AddColumn(#"Added Custom2", "Current Stock", each #"Filtered Rows"{1}[Column3]),
#"Added Custom4" = Table.AddColumn(#"Added Custom3", "Unit", each #"Filtered Rows"{1}[Column5]),
#"Added Custom5" = Table.AddColumn(#"Added Custom4", "Minimum", each #"Filtered Rows"{1}[Column7]),
#"Added Custom6" = Table.AddColumn(#"Added Custom5", "Maximum", each #"Filtered Rows"{1}[Column9])
in #"Added Custom6", type table }}),
#"Removed Columns" = Table.RemoveColumns(#"Grouped Rows",{"Custom"}),
#"Expanded data" = Table.ExpandTableColumn(#"Removed Columns", "data", {"Item", "Description", "Current Stock", "Unit", "Minimum", "Maximum","Loc", "Prod on Hand", "MRP Order", "Net", "Received", "Required" }, {"Item", "Description", "Current Stock", "Unit", "Minimum", "Maximum","Loc", "Prod on Hand", "MRP Order", "Net", "Received", "Required" })
in #"Expanded data"