使用 Excel VBA 抓取 Investing.com

问题描述 投票:0回答:1

我想从 www.investing.com 网络抓取历史数据。我使用 Erjon 的原始代码(Web scraping in Investing.com with Excel vba)作为模板,并面临以下错误的问题:

403:访问被禁止。

我认为脚本中需要进行身份验证,但我找不到实施的解决方案。我已注册 Google 帐户并登录 www.investing.com。是否可以通过vba脚本实现Google登录?


'Add references (Tools/References) to Microsoft HTML Object Library and Microsoft XML v6.0.
Option Explicit
Sub Export_Table()

'Html Objects---------------------------------------'
 Dim HtmlDoc As MSHTML.HTMLDocument
 Dim htmlBody As MSHTML.htmlBody
 Dim ieTable As MSHTML.HTMLTable
 Dim Element As MSHTML.HTMLElementCollection


'Workbooks, Worksheets, Ranges, LastRow, Incrementers ----------------'
 Dim wb As Workbook
 Dim Table As Worksheet
 Dim I As Long
 Dim dashboardSheet As Worksheet
 Dim dataSheet As Worksheet
 Dim curr_id As String
 Dim smlID As String
 Dim startDate As String
 Dim endDate As String
 Dim interval As String
 
 Set dashboardSheet = Tabelle1
 Set dataSheet = Tabelle2

 
 curr_id = dashboardSheet.Range("C6").Value
 smlID = dashboardSheet.Range("C7").Value
 interval = dashboardSheet.Range("C17").Value
 startDate = Format(dashboardSheet.Range("C10").Value, "mm.dd.yyyy")
 endDate = Format(dashboardSheet.Range("C11").Value, "mm.dd.yyyy")

 Set wb = ThisWorkbook
 Set Table = wb.Worksheets("Data")

 '-------------------------------------------'
 Dim xmlHttpRequest As New MSXML2.XMLHTTP60  '
 '-------------------------------------------'


 I = 2

'Web Request --------------------------------------------------------------------------'
 With xmlHttpRequest
 .Open "POST", "https://www.investing.com/instruments/HistoricalDataAjax", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"


.send "curr_id=" & curr_id & "&smlID=" & smlID & "&header=Lumber+Futures+Historical+Data&st_date=" & Replace(startDate, ".", "%2F") & "&end_date=" & Replace(endDate, ".", "%2F") & "&interval_sec=" & interval & "&sort_col=date&sort_ord=DESC&action=historical_data"

 If .Status = 200 Then

        Set HtmlDoc = CreateHTMLDoc
        Set htmlBody = HtmlDoc.body

        htmlBody.innerHTML = xmlHttpRequest.responseText

        Set ieTable = HtmlDoc.getElementById("curr_table")

        For Each Element In ieTable.getElementsByTagName("tr")
            Table.Cells(I, 1) = Element.Children(0).innerText
            Table.Cells(I, 2) = Element.Children(1).innerText
            Table.Cells(I, 3) = Element.Children(2).innerText
            Table.Cells(I, 4) = Element.Children(3).innerText
            Table.Cells(I, 5) = Element.Children(4).innerText
            Table.Cells(I, 6) = Element.Children(5).innerText
            Table.Cells(I, 7) = Element.Children(6).innerText

            I = I + 1
        DoEvents: Next Element
        
        Else
        MsgBox "Anforderung fehlgeschlagen. Status: " & .Status
        Exit Sub
        
 End If
End With


Set xmlHttpRequest = Nothing
Set HtmlDoc = Nothing
Set htmlBody = Nothing
Set ieTable = Nothing
Set Element = Nothing


End Sub


excel vba google-signin screen-scraping
1个回答
0
投票

我从未通过代码登录过谷歌,也没有以这种方式访问过investing.com,但通常你可能需要从谷歌获取JWT并将其传递给investing.com

请参阅这篇文章 - https://developers.google.com/identity/protocols/oauth2#webserver

© www.soinside.com 2019 - 2024. All rights reserved.