下图显示了一个示例数据表(A 到 B 列),右侧的表(E 到 F 列)显示了我想要的输出。
同一ID可以有任意多个。同一ID中的数据可以多次复制,也可以包含不同的数据。
我需要组合每个 ID 的所有不同数据项。
如果有多个数据项,数据项将以逗号分隔,并且可以是多种长度的数字和字母的混合(即使我的示例显示单个字符)。所需数据始终位于每个逗号之间,其中存在逗号(单个数据项除外)。
ID 是数字。
Option Explicit
Sub CombineData()
' Source
Const sName As String = "Sheet1"
Const sDelimiter As String = ", "
' Destination
Const dName As String = "Sheet1"
Const dFirstCellAddress As String = "E2"
Const dDelimiter As String = ", "
' Source range to an array.
Dim Data As Variant
Dim rCount As Long
With ThisWorkbook.Worksheets(sName).Range("A1").CurrentRegion
rCount = .Rows.Count - 1
If rCount < 1 Then Exit Sub ' no data or only headers
Data = .Resize(rCount, 2).Offset(1).Value
End With
' Array to a dictionary of dictionaries.
Dim dict As Object: Set dict = CreateObject("Scripting.Dictionary")
dict.CompareMode = vbTextCompare
Dim Key As Variant
Dim Item As Variant
Dim r As Long
Dim n As Long
For r = 1 To rCount
Item = CStr(Data(r, 2))
If Not IsError(Item) Then
If Len(Item) > 0 Then
Key = Data(r, 1)
If Not IsError(Key) Then
If Len(Key) > 0 Then
Item = Split(Item, sDelimiter)
If Not dict.Exists(Key) Then
Set dict(Key) = CreateObject("Scripting.Dictionary")
End If
For n = 0 To UBound(Item)
dict(Key)(Item(n)) = Empty
Next n
End If
End If
End If
End If
Next r
rCount = dict.Count
If rCount = 0 Then Exit Sub ' only error values or blanks
' Dictionary of dictionaries to the array.
ReDim Data(1 To rCount, 1 To 2)
r = 0
For Each Key In dict.Keys
r = r + 1
Data(r, 1) = Key
Data(r, 2) = Join(dict(Key).Keys, dDelimiter)
Next Key
' Array to the destination range.
With ThisWorkbook.Worksheets(dName).Range(dFirstCellAddress).Resize(, 2)
.Resize(rCount).Value = Data
.Resize(.Worksheet.Rows.Count - .Row - rCount + 1).Offset(rCount).Clear
End With
MsgBox "Data combined.", vbInformation
End Sub
请使用下一个方法。正如我在评论中所说,它使用字典来提取唯一键和其他四个数组来保留中间值并构建最终值。以下代码能够处理两种(可能的)分隔符:逗号“,”和逗号后跟一个或多个空格“,”,“,”,“,”。它仅在内存中工作,并且应该非常快,即使对于大范围也是如此:
Sub extractUniqueIDsUniqueData()
Dim sh As Worksheet, lastR As Long, arr, arrItem, arrIt, arrFin
Dim i As Long, mtch, El, dict As Object
Set sh = ActiveSheet
lastR = sh.Range("A" & sh.rows.count).End(xlUp).row
arr = sh.Range("A2:B" & lastR).value 'place the range in an array for faster iteration
Set dict = CreateObject("Scripting.Dictionary")
'fill the dictionary:
For i = 1 To UBound(arr)
If Not dict.Exists(arr(i, 1)) Then
arrItem = Split(Replace(arr(i, 2), " ", ""), ",") 'replace spaces before splitting
dict.Add arr(i, 1), arrItem
Else
If arr(i, 2) <> "" Then 'skip the empty strings in B:B
arrIt = Split(Replace(arr(i, 2), " ", ""), ",")
arrItem = dict(arr(i, 1))
If UBound(arrItem) = -1 Then 'if no any element in the item array
arrItem = arrIt 'use the existing processed B:B value instead
Else
For Each El In arrIt
mtch = Application.match(El, arrItem, 0)
If IsError(mtch) Then 'not existing in the item array
ReDim Preserve arrItem(UBound(arrItem) + 1)
arrItem(UBound(arrItem)) = El 'add the new element in the item array
End If
Next El
End If
dict(arr(i, 1)) = arrItem 'place the array back as dictionary item
End If
End If
Next i
'Process the dictionary content:
ReDim arrFin(1 To dict.count + 1, 1 To 2) 'redim the array to keep all dictionary elements
'fill the header in the final array:
arrFin(1, 1) = "FinalList": arrFin(1, 2) = "Combined DATA"
'fill the rest of the final array rows
For i = 0 To dict.count - 1
arrFin(i + 2, 1) = dict.Keys()(i)
arrFin(i + 2, 2) = Join(dict.items()(i), ", ")
Next i
'drop the final array content at once:
With sh.Range("E1").Resize(UBound(arrFin), UBound(arrFin, 2))
.value = arrFin
.EntireColumn.AutoFit
End With
MsgBox "Ready..."
End Sub
这会根据提供的输入数据生成组合输出。 该代码使用
Dictionaries
来帮助获取唯一的值集。
Option Explicit
Public Sub Test()
Dim sourceWksht As Worksheet
Set sourceWksht = Application.ActiveWorkbook.Worksheets.("Sheet1")
Dim rawData As Variant
rawData = sourceWksht.Range("A2:B12").Value2
Dim rawInputDictionary As Dictionary
Set rawInputDictionary = New Dictionary
Dim csvValue As String
Dim rawIndex As Long
For rawIndex = LBound(rawData, 1) To UBound(rawData, 1)
csvValue = Trim$(rawData(rawIndex, 2))
If Not rawInputDictionary.Exists(rawData(rawIndex, 1)) And Len(csvValue) > 0 Then
rawInputDictionary.Add rawData(rawIndex, 1), csvValue
ElseIf Len(csvValue) > 0 Then
rawInputDictionary.Item(rawData(rawIndex, 1)) _
= rawInputDictionary.Item(rawData(rawIndex, 1)) & "," & csvValue
End If
Next
GenerateOutput rawInputDictionary, sourceWksht
End Sub
Private Sub GenerateOutput(ByVal rawInputDictionary As Dictionary, ByVal wksht As Worksheet)
Dim outputArray As Variant
ReDim outputArray(1 To rawInputDictionary.Count, 1 To 2)
Dim outputArrayIndex As Long
outputArrayIndex = 1
Dim idKey As Variant
For Each idKey In rawInputDictionary.Keys
outputArray(outputArrayIndex, 1) = idKey
outputArray(outputArrayIndex, 2) = GenerateCombinedData(rawInputDictionary.Item(idKey))
outputArrayIndex = outputArrayIndex + 1
Next
Dim outputRange As Range
Set outputRange = wksht.Range("E2:F" & CStr(rawInputDictionary.Count + 1))
outputRange.Value = outputArray
End Sub
Private Function GenerateCombinedData(ByVal idValues As String) As String
Dim combinedData As String
combinedData = vbNullString
Dim outputDictionary As Dictionary
Set outputDictionary = New Dictionary
Dim valuesArrayIndex As Long
Dim valuesArray As Variant
valuesArray = Split(idValues, ",")
For valuesArrayIndex = LBound(valuesArray) To UBound(valuesArray)
If Not outputDictionary.Exists(valuesArray(valuesArrayIndex)) Then
combinedData = combinedData & valuesArray(valuesArrayIndex) & ","
'Use the outputDictionary 'Keys' to ignore duplicate values
outputDictionary.Add valuesArray(valuesArrayIndex), ""
End If
Next
'Trim the trailing comma
combinedData = Left$(combinedData, Len(combinedData) - 1)
GenerateCombinedData = combinedData
End Function