P&ID 111222
DWG111-5456DOC512BC-1234
the On Export:
Sub wordScrape()
Dim wrdDoc As Object
Dim objFiles As Object
Dim fso As Object
Dim wordApp As Object
Dim sh1 As Worksheet
Dim x As Integer
' Change this to the folder containing your word documents
FolderName = "Y:\120\TEST"
Set sh1 = ThisWorkbook.Sheets(1)
Set fso = CreateObject("Scripting.FileSystemObject")
Set wordApp = CreateObject("Word.application")
Set objFiles = fso.GetFolder(FolderName).Files
x = 1
For Each wd In objFiles
If InStr(wd, ".docx") And InStr(wd, "~") = 0 Then
Set wrdDoc = wordApp.Documents.Open(wd.Path, ReadOnly = True)
'word document file name
sh1.Cells(x, 1) = wd.Name
'document number - Table 1, Row 2, Column 1
sh1.Cells(x, 2) = Application.WorksheetFunction.Clean(wrdDoc.Tables(1).Cell(Row:=2, Column:=1).Range)
'document title - Table 1, Row 3, Column 1
sh1.Cells(x, 3) = Application.WorksheetFunction.Clean(wrdDoc.Tables(1).Cell(Row:=3, Column:=1).Range)
'cell for tags for document - Table 1, Row 9, Column 2
' note - if more than 1 line, and automatic numbering in WORD doc, when exported, will remove numbering and line breaks - runs everything together
sh1.Cells(x, 4) = Application.WorksheetFunction.Clean(wrdDoc.Tables(1).Cell(Row:=9, Column:=2).Range)
'cell that notes frequency for doc - Table 1, Row 16, Column 2
sh1.Cells(x, 5) = Application.WorksheetFunction.Clean(wrdDoc.Tables(1).Cell(Row:=16, Column:=2).Range)
'sh1.Cells(x, 3) = ....more extracted data....
x = x + 1
wrdDoc.Close
End If
Next wd
wordApp.Quit
End Sub
这将从单词表单元格中提取常规或弹头文本,格式用于在Excel单元中使用。 如果将单词中的文本格式化为列表,它将添加“子弹”或数字(但是请注意,如果单元格的格式混合,则数字将关闭)
'get the text from a table cell
Function CellContent(wdCell) As String
Dim s As String, i As Long, pc As Long, p As Object
pc = wdCell.Range.Paragraphs.Count
'loop over paragraphs in cell (could just be 1)
For i = 1 To pc
s = s & IIf(i > 1, Chr(10), "") 'line break if not first para
Set p = wdCell.Range.Paragraphs(i)
'any list format applied ?
Select Case p.Range.listformat.listtype
Case 2: s = s & "* " 'bullet
Case 3: s = s & i & ". " 'numbered
End Select
s = s & p.Range.Text
Next i
CellContent = Left(s, Len(s) - 1) 'trim off end-of-cell mark from Word
End Function
您从当前的子中称呼它的方式:
sh1.Cells(x, 4) = CellContent( wrdDoc.Tables(1).Cell(9, 2) )
thank你蒂姆。该代码效果很好!!!