使用Excel vba在Investing.com上进行Web抓取

问题描述 投票:-2回答:3

我不知道vba。仅使用宏录制器。我需要将数据从网页下载到Excel电子表格,并且凭借我对vba的了解,我无法做到。

特别是,我想做一个宏来下载到Excel页面的数据表:https://www.investing.com/equities/cellnex-telecom-historical-data

必须根据时间,日期范围和订购配置此下载。

步骤如下:1.-目标是将数据从“CLNX历史数据”表复制到Excel电子表格。 2.-下载应该通过调用“Term”在下拉菜单中选择“Monthly”来完成。 3.-通过预先选择过去2年的日期范围进行下载。 4.-最后,按“最大”列的顺序按降序排列表格。 5.-选择术语,日期范围和订单后,将数据从“CLNX历史数据”表复制到Excel电子表格。

我已尝试使用宏录制器,但我无法配置术语,日期范围或顺序。

有人能帮助我吗?

谢谢你的帮助。

代码:

Sub DataInvesting()

Dim IE As Object

Set IE = CreateObject("InternetExplorer.Application")

IE.navigate "https://www.investing.com/equities/cellnex-telecom-historical-data"

Do Until IE.readyState = 4

DoEvents

Loop

IE.Document.getElementsByClassName("newInput selectBox float_lang_base_1")(0).Value = "Monthly"

IE.Visible = True

Set IE = Nothing

Set appIE = Nothing

End Sub
excel vba web-scraping
3个回答
1
投票

我无法测试这一点,尽管设置了一个免费帐户,它一直说密码错误。厌倦了5个密码重置和同样的问题,并怀疑它想要我的社交媒体细节。

下面概括地概述了我会考虑的步骤,尽管很可能需要一些定时等待。

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub Info()
    Dim ie As New InternetExplorer  
    Const URL As String  = ""https://www.investing.com/equities/cellnex-telecom-historical-data""
    With ie
        .Visible = True
        .Navigate2 URL

        While .Busy Or .readyState < 4: DoEvents: Wend

        .document.querySelector(".login").Click

        While .Busy Or .readyState < 4: DoEvents: Wend

        .Navigate2 URL

        While .Busy Or .readyState < 4: DoEvents: Wend


        With .document.querySelector("#loginFormUser_email")
            .Focus
            .Value = "[email protected]"
        End With
        With .document.querySelector("#loginForm_password")
            .Focus
            .Value = "systemSucksDoesn'tAcceptMyPassword"
        End With

        Application.Wait Now + TimeSerial(0, 0, 2)

        .document.querySelector("[onclick*=submitLogin]").Click

        While .Busy Or .readyState < 4: DoEvents: Wend

        .document.querySelector("#data_interval").Click
        .document.querySelector("[value=Monthly]").Click
        With .document.querySelector("#picker")
            .Focus
            .Value = "03/08/2017 - 03/08/2019"
            .FireEvent "onchange"
        End With

        'TODO Sorting column when clarified which column
        .document.querySelector("[title='Download Data']").Click

        Application.Wait Now + TimeSerial(0, 0, 10)

        Stop
        .Quit
    End With
End Sub

1
投票

我刚刚测试了以下代码并且它有效,而不是每次我们需要运行此宏时创建Internet Explorer的实例,我们将使用xmlhttp请求。只需复制整个代码并将其粘贴到vba中的模块中即可。不要忘记向Microsoft HTML Object Library和Microsoft XML v6.0添加引用(工具/引用)。

Option Explicit
Sub Export_Table()

'Html Objects---------------------------------------'
 Dim htmlDoc As MSHTML.HTMLDocument
 Dim htmlBody As MSHTML.htmlBody
 Dim ieTable As MSHTML.HTMLTable
 Dim Element As MSHTML.HTMLElementCollection


'Workbooks, Worksheets, Ranges, LastRow, Incrementers ----------------'
 Dim wb As Workbook
 Dim Table As Worksheet
 Dim i As Long

 Set wb = ThisWorkbook
 Set Table = wb.Worksheets("Sheet1")

 '-------------------------------------------'
 Dim xmlHttpRequest As New MSXML2.XMLHTTP60  '
 '-------------------------------------------'


 i = 2

'Web Request --------------------------------------------------------------------------'
 With xmlHttpRequest
 .Open "POST", "https://www.investing.com/instruments/HistoricalDataAjax", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.send "curr_id=951681&smlID=1695217&header=CLNX+Historical+Data&st_date=01%2F01%2F2017&end_date=03%2F01%2F2019&interval_sec=Monthly&sort_col=date&sort_ord=DESC&action=historical_data"


 If .Status = 200 Then

        Set htmlDoc = CreateHTMLDoc
        Set htmlBody = htmlDoc.body

        htmlBody.innerHTML = xmlHttpRequest.responseText

        Set ieTable = htmlDoc.getElementById("curr_table")

        For Each Element In ieTable.getElementsByTagName("tr")
            Table.Cells(i, 1) = Element.Children(0).innerText
            Table.Cells(i, 2) = Element.Children(1).innerText
            Table.Cells(i, 3) = Element.Children(2).innerText
            Table.Cells(i, 4) = Element.Children(3).innerText
            Table.Cells(i, 5) = Element.Children(4).innerText
            Table.Cells(i, 6) = Element.Children(5).innerText
            Table.Cells(i, 7) = Element.Children(6).innerText

            i = i + 1
        DoEvents: Next Element
 End If
End With


Set xmlHttpRequest = Nothing
Set htmlDoc = Nothing
Set htmlBody = Nothing
Set ieTable = Nothing
Set Element = Nothing

End Sub

Public Function CreateHTMLDoc() As MSHTML.HTMLDocument
    Set CreateHTMLDoc = CreateObject("htmlfile")
End Function

0
投票

试试这个。

Sub Web_Table_Option()
    Dim HTMLDoc As New HTMLDocument
    Dim objTable As Object
    Dim lRow As Long
    Dim lngTable As Long
    Dim lngRow As Long
    Dim lngCol As Long
    Dim ActRw As Long
    Dim objIE As InternetExplorer
    Set objIE = New InternetExplorer
    objIE.Navigate "https://www.investing.com/equities/cellnex-telecom-historical-data"

    Do Until objIE.ReadyState = 4 And Not objIE.Busy
        DoEvents
    Loop
    Application.Wait (Now + TimeValue("0:00:03")) 'wait for java script to load
    HTMLDoc.body.innerHTML = objIE.Document.body.innerHTML
    With HTMLDoc.body
        Set objTable = .getElementsByTagName("curr_table")
        For lngTable = 0 To objTable.Length - 1
            For lngRow = 0 To objTable(lngTable).Rows.Length - 1
                For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
                    ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
                Next lngCol
            Next lngRow
            ActRw = ActRw + objTable(lngTable).Rows.Length + 1
        Next lngTable
    End With
    objIE.Quit
End Sub
© www.soinside.com 2019 - 2024. All rights reserved.