初学者被EXCEL ETL困住了

问题描述 投票:0回答:1

我只是不知道从哪里开始 ETL 冒险。我有一个由过时的 ERP 软件提供的多行标题杂乱数据集。我只需要清理它即可适合基本的数据透视/vlookups 以及可能的 Power BI。

你认为我应该走VBA路线还是电源查询?

任何其他建议都非常感谢,因为我的大脑正在沸腾..

谢谢!

之前 garbage data

之后 good data

这是我用过的VBA,但它是有问题的VBA

Sub TransformDataToSingleRow()
    Dim ws As Worksheet
    Dim destRow As Long
    Dim currentRow As Long
    Dim lastCol As Long
    Dim headers As Collection
    Dim cell As Range
    Dim valueDict As Object
                     
    ' Set up the worksheet and output row
    Set ws = ActiveSheet
    destRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).Row + 2 ' Output starts 2 rows below data
    
    ' Store all unique headers into a collection
    Set headers = New Collection
    Set valueDict = CreateObject("Scripting.Dictionary") ' For mapping headers to values
    
    ' Loop through each cell in the sheet
    For currentRow = 1 To ws.Cells(ws.Rows.Count, 1).End(xlUp).Row
        For Each cell In ws.Rows(currentRow).Cells
            If cell.Value <> "" Then
                ' Check if it's a header: text followed by a value in the next cell
                If cell.Value Like "*:*" And cell.Offset(0, 1).Value <> "" Then
                    On Error Resume Next ' Prevent duplicates in the collection
                    headers.Add cell.Value, CStr(cell.Value)
                    On Error GoTo 0
                    
                    ' Add value to dictionary
                    valueDict(cell.Value) = cell.Offset(0, 1).Value
                End If
                
                ' Capture tabular data with headers from the last row
                If ws.Cells(currentRow, 1).Value = "Due Date" Then
                    lastCol = ws.Cells(currentRow, ws.Columns.Count).End(xlToLeft).Column
                    For col = 1 To lastCol
                        On Error Resume Next
                        headers.Add ws.Cells(currentRow, col).Value, ws.Cells(currentRow, col).Value
                        On Error GoTo 0
                        valueDict(ws.Cells(currentRow, col).Value) = ws.Cells(currentRow + 1, col).Value
                    Next col
                End If
            End If
        Next cell
    Next currentRow
    
    ' Output headers and values to a single row
    Dim header As Variant
    Dim destCol As Long
    destCol = 1
    
    ' Output headers in the first row
    For Each header In headers
        ws.Cells(destRow, destCol).Value = header
        destCol = destCol + 1
    Next header
    
    ' Output values in the second row
    destCol = 1
    For Each header In headers
        ws.Cells(destRow + 1, destCol).Value = valueDict(header)
        destCol = destCol + 1
    Next header
    
    MsgBox "Transformation complete!", vbInformation
End Sub
excel vba powerquery etl
1个回答
0
投票

在 powerquery 中应该相当简单。 我将整个范围带入 powerquery 中,其中包含表/范围中的数据...并且不检查 [ ] 是否有标题

进入家庭高级编辑器并粘贴

let Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Added Index" = Table.AddIndexColumn(Source, "Index", 0, 1, Int64.Type),
#"Added Custom" = Table.AddColumn(#"Added Index", "Custom",  each try if Text.Contains([Column1],"Item:") then [Index] else null otherwise null),
#"Filtered Rows" = Table.SelectRows(#"Added Custom", each ([Column1] <> null)),
#"Filled Down" = Table.FillDown(#"Filtered Rows",{"Custom"}),
#"Grouped Rows" = Table.Group(#"Filled Down", {"Custom"}, {{"data", each 
    let  #"Removed Top Rows" = Table.Skip(_,2),
    #"Promoted Headers" = Table.PromoteHeaders(#"Removed Top Rows", [PromoteAllScalars=true]),
    #"Removed Other Columns" = Table.SelectColumns(#"Promoted Headers",{"Loc", "Prod on Hand", "MRP Order", "Net", "Received", "Required"}),
    #"Added Custom1" = Table.AddColumn(#"Removed Other Columns", "Item", each Text.BetweenDelimiters(#"Filtered Rows"{0}[Column1],": ",",")),
    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Description", each Text.AfterDelimiter(#"Filtered Rows"{0}[Column1],",")),
    #"Added Custom3" = Table.AddColumn(#"Added Custom2", "Current Stock", each #"Filtered Rows"{1}[Column3]),
    #"Added Custom4" = Table.AddColumn(#"Added Custom3", "Unit", each #"Filtered Rows"{1}[Column5]),
    #"Added Custom5" = Table.AddColumn(#"Added Custom4", "Minimum", each #"Filtered Rows"{1}[Column7]),
    #"Added Custom6" = Table.AddColumn(#"Added Custom5", "Maximum", each #"Filtered Rows"{1}[Column9])   
    in  #"Added Custom6", type table }}),
#"Removed Columns" = Table.RemoveColumns(#"Grouped Rows",{"Custom"}),
#"Expanded data" = Table.ExpandTableColumn(#"Removed Columns", "data", {"Item", "Description", "Current Stock", "Unit", "Minimum", "Maximum","Loc", "Prod on Hand", "MRP Order", "Net", "Received", "Required" }, {"Item", "Description", "Current Stock", "Unit", "Minimum", "Maximum","Loc", "Prod on Hand", "MRP Order", "Net", "Received", "Required" })
in #"Expanded data"
© www.soinside.com 2019 - 2024. All rights reserved.