我正在尝试从以下URL抓取数据:http://iswdataclient.azurewebsites.net/webSearchID.aspx?dbkey=parkercad。我可以插入和查询属性ID,但是在加载搜索结果后,我无法成功单击结果表中的“查看属性”链接。
我的最初调试提示该表单尚未实际提交,这意味着该链接不存在于网页上。但是,后续结果页面中的HTML显示了搜索结果的其他元素。我没有成功尝试以下操作以等待网页加载,但我认为这不是时间问题:
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Do While oIE.ReadyState = 4: WScript.Sleep 100: Loop
Do While oIE.ReadyState <> 4: WScript.Sleep 100: Loop
Do While IE.ReadyState = 4: DoEvents: Loop
Do Until IE.ReadyState = 4: DoEvents: Loop
Application.Wait (Now + TimeValue("00:00:03"))
我已经以多种方式解析了HTML,还考虑了事件处理问题,首先是在表单级别进行深入研究:
Set ie = CreateObject("internetexplorer.application")
With ie
.navigate "http://iswdataclient.azurewebsites.net/webProperty.aspx?dbkey=parkercad
.Visible = True
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With
For Each propid In Range(Cells(2, 8), Cells(2, 8)) 'Cells(Range("H" & Rows.Count).End(xlUp).Row, 8)) 'propid = R000001972
If propid <> "N/A" Then
On Error Resume Next
With ie.document.body
For iFRM = 0 To .getElementsByTagName("form").Length - 1
If .getElementsByTagName("form")(iFRM).ID = "searchForm" Then
With .getElementsByTagName("form")(iFRM)
For iNPT = 0 To .getElementsByTagName("input").Length - 1
Select Case .getElementsByTagName("input")(iNPT).Name
Case "ucSearchID$searchid"
.getElementsByTagName("input")(iNPT).Value = propid
Case "ucSearchID$ButtonSearch"
.getElementsByTagName("input")(iNPT).Click
End Select
Next iNPT
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Application.Wait (Now + TimeValue("00:00:02"))
Exit For
End With
Exit For
End If
Next iFRM
End With
以及对所需元素的简单解析:
Set ie = CreateObject("internetexplorer.application")
With ie
.navigate "http://iswdataclient.azurewebsites.net/webProperty.aspx?dbkey=parkercad
.Visible = True
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With
Set intags = ie.document.getElementsByTagName("input")
For Each intag In intags
If intag.classname = "searchid" Then
intag.Value = propid
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
intag.dispatchEvent evt
End If
Next intag
ie.document.getelementbyid("ucSearchID_ButtonSearch").Click
While ie.readyState <> 4 Or ie.Busy: DoEvents: Wend
以及对表格单元格的深入研究,我为其删除了代码。尽管我认为可能存在事件处理问题,但网页已更新,但我无法从结果表中解析更新的HTML。
Debug.Print ie.document.getelementbyid("lblResults").innerText
Debug.Print返回“您对''的搜索返回了0个结果”,而该网页显示了成功的搜索结果,并显示了“您对'R000001972'的搜索返回了1个结果。因此,我的代码成功提交了表单,但不执行结果页“查看属性”链接单击,因为它无法解析更新的HTML:
For at = 0 To ie.document.getElementsByTagName("a").Length - 1
Select Case ie.document.getElementsByTagName("a")(at).ID
Case "ucResultsGrid_" & propid
ie.document.getElementsByTagName("a")(at).Click
End Select
Next at
这似乎不是时间安排或事件处理问题。不确定如何进行。任何帮助将不胜感激。
这是一个aspx页面。您可以以简化形式执行相同的GET和POST请求。我用剪贴板写出样本表。您可以根据需要进行修改。
Option Explicit
Public Sub GetPropertyInfo()
Dim html As MSHTML.HTMLDocument, xhr As Object
Application.ScreenUpdating = False
Set html = New MSHTML.HTMLDocument
Set xhr = CreateObject("MSXML2.ServerXMLHTTP")
Dim body As String, propertyId As String
propertyId = "R000001972"
With xhr
.Open "GET", "http://iswdataclient.azurewebsites.net/webSearchID.aspx?dbkey=parkercad&stype=id&sdata=" & propertyId, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
html.body.innerHTML = .responseText
If html.querySelectorAll("#dvPrimary table tr").Length <= 1 Then Exit Sub
body = GetPostBody(html, propertyId)
.Open "POST", "http://iswdataclient.azurewebsites.net/webProperty.aspx?dbkey=parkercad&stype=id&sdata=" _
& propertyId & "&id=" & propertyId, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send body
html.body.innerHTML = .responseText
End With
Dim ws As Worksheet, clipboard As Object, i As Long
Set ws = ThisWorkbook.Worksheets(1)
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
With ws.Cells
.ClearContents
.ClearFormats
End With
With html.querySelectorAll("table")
For i = 8 To .Length - 1
clipboard.SetText .Item(i).outerHTML
clipboard.PutInClipboard
ws.Range("A" & GetLastRow(ws) + 2).PasteSpecial
Next
End With
Application.ScreenUpdating = True
End Sub
Public Function GetPostBody(ByVal html As MSHTML.HTMLDocument, ByVal propertyId As String) As String
Dim i As Long, result As String
With html.querySelectorAll("input[type=hidden]")
For i = 0 To .Length - 1
result = result & .Item(i).ID & "=" & .Item(i).Value & "&"
Next
End With
result = result & "__EVENTTARGET=ucResultsGrid$" & propertyId
GetPostBody = result
End Function
Public Function GetLastRow(ByVal sh As Worksheet) As Long
On Error Resume Next
GetLastRow = sh.Cells.Find(What:="*", _
After:=sh.Range("A1"), _
Lookat:=xlPart, _
LookIn:=xlFormulas, _
SearchOrder:=xlByRows, _
SearchDirection:=xlPrevious, _
MatchCase:=False).Row
On Error GoTo 0
End Function
参考(VBE>工具>参考):