我正在尝试使用 VBA 查找特定网页上的所有网站超链接,包括用于超链接的文本。
该网站是 https://cdl.demosphere.com/,我正在尝试找到位于 URL 深处的 div.tg,请参见下图。例如,我试图从元素中获取 https://elements.demosphere-secure.com/scripts/runisa.dll?M2.65878:gp:229365.6993:74908+Elements/Display+E+46241++111127392下面列出的链接后面直接包含文字“U13-U14 Girls”
我试图不使用 ie。请参阅下面我已经尝试过的代码
Sub test()
Const Url = "https://cdl.demosphere.com/"
Dim Html As HTMLDocument, I&
Set Html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", Url, False
.send
Html.body.innerHTML = .responseText
End With
With Html.getElementsByTagName(".tg")
For I = 0 To .Length - 1
Cells(I, 1) = .Item(I).href
Next I
End With
End Sub
我也尝试用 Html.getElementsByTagName("div") 替换,这得到了结果,但“div”在 html 中不够深
要获取所有链接,您不需要任何 div 标签。您可以直接获取a标签。
Sub ExampleToGetURLs()
Const url As String = "http://elements.demosphere.com/74908/schedules/Fall2023/111127392.html?1704485956"
Dim doc As Object
Dim nodesAllA As Object
Dim nodeOneA As Object
Dim ws As Worksheet
Dim currRow As Long
Set doc = CreateObject("htmlFile")
Set ws = ActiveWorkbook.ActiveSheet
currRow = 1
With CreateObject("MSXML2.XMLHTTP.6.0")
.Open "GET", url, False
.send
If .Status = 200 Then
doc.body.innerHTML = .responseText
Set nodesAllA = doc.getElementsByTagName("a")
For Each nodeOneA In nodesAllA
ws.Cells(currRow, 1) = nodeOneA.href
ws.Cells(currRow, 2) = nodeOneA.innerText
currRow = currRow + 1
Next nodeOneA
Else
MsgBox "Page not loaded. HTTP status " & .Status
End If
End With
End Sub