在Excel中使用正则表达式和格式读取文本文件

问题描述 投票:0回答:1

我有一个txt文件,包括一些注释行和很多数据行,如下所示

XYZ3-CCAV::[2] mcb XYZ3 hpy diag ce56 dsc
[UT000029118.494] XYZ3:mcb >> LN (CDRxN  , UC_CFG,XTP_RST,STP) SD LCK XRMPP CLK90 CLKP1 PF(M,L) VGA DCO P1kII M1kII  EPD(1,2,3,4,5,6)       XTMPP  AMAP(n1,m,p1,2,3,rpara)   Head(L,R,U,D)  LINK_TIME
[UT000029118.495] XYZ3:mcb >>  0 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   0    44     2     0,1   17   4 205    0  30,  2,  2, -2,  1,  1      0   22, 90, 0, 0, 0, 0     296,464,153,155    57.6
[UT000029118.495] XYZ3:mcb >>  1 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   0    44     0     0,1   17   2 202    0  31,  2, -1,  5, -1,  1      0   22, 90, 0, 0, 0, 0     296,464,155,155    58.5
[UT000029118.496] XYZ3:mcb >>  2 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   0    43     0     0,1   17   0 209    0  33,  1,  0,  1,  3, -3      0   22, 90, 0, 0, 0, 0     312,449,159,159    60.1
[UT000029118.497] XYZ3:mcb >>  3 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   1    45     0     0,1   17   6 202    0  33,  2,  0, -1,  3,  0      0   22, 90, 0, 0, 0, 0     328,449,153,159    60.3
[UT000029118.497] XYZ3:mcb >> 

XYZ3-CCAV::[2] Headscan 51 0 0xf 0
Headscan: min_dwell_bits 100000
Headscan: max_dwell_bits 100000000

我可以使用Excel内置正则表达式(VBS)来提取数据行

[UT000029118.495] XYZ3:mcb >>  0 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   0    44     2     0,1   17   4 205    0  30,  2,  2, -2,  1,  1      0   22, 90, 0, 0, 0, 0     296,464,153,155    57.6
[UT000029118.495] XYZ3:mcb >>  1 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   0    44     0     0,1   17   2 202    0  31,  2, -1,  5, -1,  1      0   22, 90, 0, 0, 0, 0     296,464,155,155    58.5
[UT000029118.496] XYZ3:mcb >>  2 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   0    43     0     0,1   17   0 209    0  33,  1,  0,  1,  3, -3      0   22, 90, 0, 0, 0, 0     312,449,159,159    60.1
[UT000029118.497] XYZ3:mcb >>  3 (OSx1:x1, 0x0c,     0,0,    0)  1*  1*   1    45     0     0,1   17   6 202    0  33,  2,  0, -1,  3,  0      0   22, 90, 0, 0, 0, 0     328,449,153,159    60.3

我尝试使用下面的代码将数据行写入Excel文件(在Excel文件中创建了名为“EyeInfo”的工作表):

Sub open_log_file()
    Dim Full_Name As String, text As String, textline As String
    Dim ws As Worksheet 'Used to Store file path and file name

    'Set up worksheet
    Set ws = Worksheets("EyeInfo")
    ws.UsedRange.Clear

    'Call the Window to open the file
    Full_Name = Application.GetOpenFilename("Diag Log File(*.log;*.txt;*.*),*.log;*.txt;*.*")

    'read the file
    Open Full_Name For Input As #1
    Do Until EOF(1)
        Line Input #1, textline
        text = text & textline
    Loop
    Close #1

    ' define regular expression
    Dim regEx_CE As Object
    Set regEx_CE = CreateObject("VBScript.RegExp")

    With regEx_CE
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = "\w*\[\d+\]\s+mcb\s+XYZ3\s+hpy\s+diag\s+(ce\d+)\s+dsc"
    End With

    Dim regEx_LN As Object
    Set regEx_LN = CreateObject("VBScript.RegExp")

    With regEx_LN
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = "\[\w*\.\w*\]\s*\w*:\w*\s*>>\s*\d+.*"
    End With

    ' Execute the match process line by line and put the data in Excel/EyeInfo
    Set CE_match = regEx_CE.Execute(text)
    Set LN_match = regEx_LN.Execute(text)
    ws.Cells(1, 1) = Full_Name
    ws.Cells(2, 1) = "Number of Ports to Be Extracted"
    ws.Cells(2, 2) = CE_match.Count
    For i = 0 To CE_match.Count - 1
        ws.Cells(i * 4 + 3, 1) = CE_match(i).Value
        ws.Cells(i * 4 + 3, 2) = LN_match(i * 4 + 0).Value
        ws.Cells(i * 4 + 4, 2) = LN_match(i * 4 + 1).Value
        ws.Cells(i * 4 + 5, 2) = LN_match(i * 4 + 2).Value
        ws.Cells(i * 4 + 6, 2) = LN_match(i * 4 + 3).Value
    Next
End Sub

我想要做的是将数据放在由空格或逗号分隔的行中,以便数据行中的每个数据都可以很好地放在行的每个单元格中。但是这段代码将整个数据行放在Excel的单个单元格中。

excel excel-vba vba
1个回答
1
投票

绝对需要您的代码和数据来解决此问题。虽然可以更改其他内容,但基本问题是您读取文本文件的例程。那个例程正在删除所有的EOL令牌。

使用Line Input语句时,将跳过Carriage return-linefeed序列,而不是附加到字符串。

因此,当发生这种情况时,你的regEx_LN模式只会读取一行,因为模式末尾的*会读取所有内容,直到获得EOL或字符串的结尾。在text中只有一行,整个文件(从起点)被读入。

通过以下更改,您的例程可以处理您的数据:

'read the file
Open Full_Name For Input As #1
Do Until EOF(1)
    Line Input #1, textline
    text = text & vbCrLf & textline
Loop
Close #1

text = Mid(text, 2) 'remove first crlf

以下是进行修改并运行代码后的样子:

enter image description here

在您的原始问题中,您表示您还希望根据分隔符(空格或逗号)将数据行拆分为列。

另外,正如@AnsgarWiechers在下面的评论中强调的那样,只需一步即可读取整个文件,而不是分别读取每一行并连接。

在他的评论中,他使用Line Input方法展示了一条线。

我更喜欢使用FileSystemObject来阅读文本文件。在某些情况下,数据格式和读取要求可能会导致Line Input方法出现问题。

下面是代码

  • 使用FSO一步读取整个文件
  • 还将数据行解析为单个单元格

=======================================

Sub open_log_file()

Dim Full_Name As String, text As String, textline As String
Dim ws  As Worksheet 'Used to Store file path and file name

'Set up worksheet
Set ws = Worksheets("EyeInfo")
ws.UsedRange.Clear

'Call the Window to open the file
Full_Name = Application.GetOpenFilename("Diag Log File(*.log;*.txt;*.*),*.log;*.txt;*.*")

'read the file
'Open Full_Name For Input As #1
'Do Until EOF(1)
'    Line Input #1, textline
'    text = text & vbCrLf & textline
'Loop
'Close #1

'text = Mid(text, 2)

'Using FSO to read the file
Dim FSO As Object
Dim TS As Object

Set FSO = CreateObject("Scripting.FileSystemObject")
Set TS = FSO.OpenTextFile(Full_Name, ForReading)
text = TS.ReadAll


' define regular expression
Dim regEx_CE As Object
Set regEx_CE = CreateObject("VBScript.RegExp")

With regEx_CE
    .Global = True
    .MultiLine = True
    .IgnoreCase = False
    .Pattern = "\w*\[\d+\]\s+mcb\s+XYZ3\s+hpy\s+diag\s+(ce\d+)\s+dsc"
End With

Dim regEx_LN As Object
Set regEx_LN = CreateObject("VBScript.RegExp")

With regEx_LN
    .Global = True
    .MultiLine = True
    .IgnoreCase = False
    .Pattern = "\[\w*\.\w*\]\s*\w*:\w*\s*>>\s*\d+.*"
End With

' Execute the match process line by line and put the data in Excel/EyeInfo
Set CE_match = regEx_CE.Execute(text)
Set LN_match = regEx_LN.Execute(text)
ws.Cells(1, 1) = Full_Name
ws.Cells(2, 1) = "Number of Ports to Be Extracted"
ws.Cells(2, 2) = CE_match.Count
For i = 0 To CE_match.Count - 1
    ws.Cells(i * 4 + 3, 1) = CE_match(i).Value
    ws.Cells(i * 4 + 3, 2) = LN_match(i * 4 + 0).Value
    ws.Cells(i * 4 + 4, 2) = LN_match(i * 4 + 1).Value
    ws.Cells(i * 4 + 5, 2) = LN_match(i * 4 + 2).Value
    ws.Cells(i * 4 + 6, 2) = LN_match(i * 4 + 3).Value

    ws.Range(ws.Cells(i * 4 + 3, 2), ws.Cells(i * 4 + 6, 2)).TextToColumns _
        DataType:=xlDelimited, _
        textqualifier:=xlTextQualifierNone, _
        consecutivedelimiter:=True, _
        Tab:=False, _
        semicolon:=False, _
        comma:=True, _
        Space:=True, _
        other:=False

Next

End Sub

=======================================

以下是您的数据结果:

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.