使用 Excel VBA 屏幕抓取互联网 - 无法引用第二页

问题描述 投票:0回答:1

我的问题相对简单,而且回避得令人抓狂。这与我在互联网和 StackOverflow 上发现的许多问题没有什么不同,但没有任何建议可以帮助我解决这个小难题......

使用 Excel2010,我不会将数据输入到单个字段中,提交它(第 1 部分)并捕获几行数据(第 2 部分),以列表/表格格式粘贴到 Excel 中(第 30 部分 - 并执行 999,999 次......第 1 部分和第 3 部分正在工作 - 第 2 部分拒绝承认新的互联网窗口,所有 gettagnames 和 SelectTable 解决方法都只使用原始 URL - 附件是使用 Sendkeys 的绝望尝试 - 更糟糕的是 - 对于第一个循环 - 然后绝对没有任何东西! !

无论如何,代码应该相当简单 - 对编码顺序中的一些混乱表示歉意 - 这取决于我开始用手术刀切掉一些位,但经过几个小时的混乱后,我不得不使用斧头......

Dim HTMLdoc As HTMLDocument
Dim ie As InternetExplorer

Sub EPF_FSA()

'Application.DisplayAlerts = False
Application.EnableEvents = False

Dim iHTML_Element As IHTMLElement
Dim sURL As String
Dim miss1 As Integer
Dim FrmNo As Long
Dim FrmName As String
Dim Address1 As String
Dim Address2 As String
Dim Address3 As String
Dim Address4 As String
Dim Address5 As String
Dim Address6 As String
Dim Address7 As String
Dim Address8 As String
Dim AnyLuck As String
Dim RowNum As Integer
Dim ColNum As Integer

RowNum = 1
ColNum = 1


FrmNo = 100111

While FrmNo <= 100112
'Do While FrmNo <= 100112
On Error GoTo Err_Clear
sURL = "http://www.fsa.gov.uk/register/epfSearchForm.do"


Set ie = CreateObject("internetexplorer.application")
'Set Ex = CreateObject("MicrosoftExcel.application")
ie.navigate sURL
ie.Visible = True

Do
' Wait till the Browser is loaded
Loop Until ie.readyState = READYSTATE_COMPLETE

Set HTMLdoc = ie.document

HTMLdoc.all.epfref.Value = FrmNo

For Each iHTML_Element In HTMLdoc.getElementsByTagName("input")

If iHTML_Element.Type = "submit" Then miss1 = miss1 + 1
If miss1 = 2 Then iHTML_Element.Click: Exit For
Next

Err_Clear:
If Err <> 0 Then Err.Clear
Resume Next
'PART 2 ********************************************************************
Do
' Wait till the Browser is loaded
Loop Until ie.readyState = READYSTATE_COMPLETE

Call SendKeys("^a")
DoEvents
Call SendKeys("^c")
DoEvents

      ActiveSheet.PasteSpecial Format:="Text", link:=False, DisplayAsIcon:=False
        range("A2").Select

'Copy and select relevant text to sheet 2

  Worksheets("Sheet1").Activate
 FrmName = Cells(39, "A").Value

 Address1 = Cells(59, "A").Value
 Address2 = Cells(60, "A").Value
 Address3 = Cells(61, "A").Value
 Address4 = Cells(62, "A").Value
 Address5 = Cells(63, "A").Value
 Address6 = Cells(64, "A").Value
 Address7 = Cells(65, "A").Value
 Address8 = Cells(66, "A").Value
 AnyLuck = Cells(47, "A").Value


  Worksheets("Sheet2").Activate
 Cells(RowNum, "A").Value = FrmNo
 Cells(RowNum, "B").Value = FrmName
 Cells(RowNum, "C").Value = Address1
 Cells(RowNum, "D").Value = Address2
 Cells(RowNum, "E").Value = Address3
 Cells(RowNum, "F").Value = Address4
 Cells(RowNum, "G").Value = Address5
 Cells(RowNum, "H").Value = Address6
 Cells(RowNum, "I").Value = Address7
 Cells(RowNum, "J").Value = Address8
 Cells(RowNum, "K").Value = AnyLuck
   RowNum = RowNum + 1
'ActiveCell.Offset(1, 0).Select
  Worksheets("Sheet1").Activate
    Cells.Select
    Selection.Delete Shift:=xlUp
    range("A2").Select
'MsgBox (FrmNo & Chr(10) & FrmName)
'Part 3
FrmNo = FrmNo + 1
ie.Quit
ie.Quit
Wend
'Loop

Application.EnableEvents = True

End Sub
html excel vba web-scraping
1个回答
0
投票

看起来可以直接进入结果页面。尝试:

sUrl = "http://www.fsa.gov.uk/register/epfRefSearch.do?epfRef="
sUrl = sUrl & frmNo

然后导航到该页面。实际详细信息位于 ID 为“box”的 div 中

© www.soinside.com 2019 - 2024. All rights reserved.