我想从 www.investing.com 网络抓取历史数据。我使用 Erjon 的原始代码(Web scraping in Investing.com with Excel vba)作为模板,并面临以下错误的问题:
403:访问被禁止。
我认为脚本中需要进行身份验证,但我找不到实施的解决方案。我已注册 Google 帐户并登录 www.investing.com。是否可以通过vba脚本实现Google登录?
'Add references (Tools/References) to Microsoft HTML Object Library and Microsoft XML v6.0.
Option Explicit
Sub Export_Table()
'Html Objects---------------------------------------'
Dim HtmlDoc As MSHTML.HTMLDocument
Dim htmlBody As MSHTML.htmlBody
Dim ieTable As MSHTML.HTMLTable
Dim Element As MSHTML.HTMLElementCollection
'Workbooks, Worksheets, Ranges, LastRow, Incrementers ----------------'
Dim wb As Workbook
Dim Table As Worksheet
Dim I As Long
Dim dashboardSheet As Worksheet
Dim dataSheet As Worksheet
Dim curr_id As String
Dim smlID As String
Dim startDate As String
Dim endDate As String
Dim interval As String
Set dashboardSheet = Tabelle1
Set dataSheet = Tabelle2
curr_id = dashboardSheet.Range("C6").Value
smlID = dashboardSheet.Range("C7").Value
interval = dashboardSheet.Range("C17").Value
startDate = Format(dashboardSheet.Range("C10").Value, "mm.dd.yyyy")
endDate = Format(dashboardSheet.Range("C11").Value, "mm.dd.yyyy")
Set wb = ThisWorkbook
Set Table = wb.Worksheets("Data")
'-------------------------------------------'
Dim xmlHttpRequest As New MSXML2.XMLHTTP60 '
'-------------------------------------------'
I = 2
'Web Request --------------------------------------------------------------------------'
With xmlHttpRequest
.Open "POST", "https://www.investing.com/instruments/HistoricalDataAjax", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.send "curr_id=" & curr_id & "&smlID=" & smlID & "&header=Lumber+Futures+Historical+Data&st_date=" & Replace(startDate, ".", "%2F") & "&end_date=" & Replace(endDate, ".", "%2F") & "&interval_sec=" & interval & "&sort_col=date&sort_ord=DESC&action=historical_data"
If .Status = 200 Then
Set HtmlDoc = CreateHTMLDoc
Set htmlBody = HtmlDoc.body
htmlBody.innerHTML = xmlHttpRequest.responseText
Set ieTable = HtmlDoc.getElementById("curr_table")
For Each Element In ieTable.getElementsByTagName("tr")
Table.Cells(I, 1) = Element.Children(0).innerText
Table.Cells(I, 2) = Element.Children(1).innerText
Table.Cells(I, 3) = Element.Children(2).innerText
Table.Cells(I, 4) = Element.Children(3).innerText
Table.Cells(I, 5) = Element.Children(4).innerText
Table.Cells(I, 6) = Element.Children(5).innerText
Table.Cells(I, 7) = Element.Children(6).innerText
I = I + 1
DoEvents: Next Element
Else
MsgBox "Anforderung fehlgeschlagen. Status: " & .Status
Exit Sub
End If
End With
Set xmlHttpRequest = Nothing
Set HtmlDoc = Nothing
Set htmlBody = Nothing
Set ieTable = Nothing
Set Element = Nothing
End Sub
我从未通过代码登录过谷歌,也没有以这种方式访问过investing.com,但通常你可能需要从谷歌获取JWT并将其传递给investing.com
请参阅这篇文章 - https://developers.google.com/identity/protocols/oauth2#webserver。