网页刮痧excel VBA

问题描述 投票:1回答:2

我正试图从网上刮一张桌子,但由于某种原因,我没有得到整张桌子。它只获取1列而不是全部列。任何帮助将不胜感激!谢谢!

这是我的代码:

Sub HistoricalData()

    Dim xmlHttp As Object
    Dim TR_col As Object, TR As Object
    Dim TD_col As Object, TD As Object
    Dim row As Long, col As Long

    Set xmlHttp = CreateObject("MSXML2.XMLHTTP.6.0")
    xmlHttp.Open "GET", "http://www.cnbc.com/bonds-canada-treasurys", False
    xmlHttp.setRequestHeader "Content-Type", "text/xml"
    xmlHttp.send

    Dim html As Object
    Set html = CreateObject("htmlfile")
    html.body.innerHTML = xmlHttp.responseText

    Dim tbl As Object
    Set tbl = html.getElementById("curr_table")

    row = 1
    col = 1

    Set TR_col = html.getElementsByTagName("TR")
    For Each TR In TR_col
        Set TD_col = TR.getElementsByTagName("TD")
        For Each TD In TD_col
            Cells(row, col) = TD.innerText
            col = col + 1
        Next
        col = 1
        row = row + 1
    Next
End Sub
vba excel-vba web web-scraping excel
2个回答
3
投票

问题是你在页面加载完成之前回到了HTTP.responseText

在返回MSXML2.XMLHTTP.6.0之前,我无法让HTTP.responseText等待页面完成加载,所以我切换到IE

enter image description here

Sub HistoricalData()
    Const URL As String = "http://www.cnbc.com/bonds-canada-treasurys"
    Const READYSTATE_COMPLETE As Integer = 4
    Dim IE As Object
    Dim TR_col As Object, TR As Object
    Dim TD_col As Object, TD As Object
    Dim row As Long, col As Long

    Set IE = CreateObject("InternetExplorer.Application")

    IE.Navigate URL

    Do While (IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE)
        DoEvents
    Loop

    Set TR_col = IE.Document.getElementsByTagName("TR")

    For Each TR In TR_col
        Set TD_col = TR.getElementsByTagName("TD")

        For Each TD In TD_col
            Cells(row, col) = TD.innerText
            col = col + 1
        Next
        col = 1
        row = row + 1
    Next
End Sub

0
投票

迟了几年,我知道,但这是一个更优雅的解决方案恕我直言,它让你更多地控制数据,希望有人会发现它有用。

问题是您要求整个页面,而不仅仅是数据。

对于此解决方案,您需要导入VBA-JSON并添加对Microsoft Scripting Runtime的引用

Sub cnbc()
Dim req As New WinHttpRequest
Dim reqURL As String
Dim respString As String
Dim respJSON As Object
Dim item As Object
Dim i As Long
Dim key As String
i = 1
reqURL = "https://quote.cnbc.com/quote-html-webservice/quote.htm?partnerId=2&requestMethod=quick&exthrs=1&noform=1&fund=1&output=jsonp&symbols=CA1M-CA|CA3M-CA|CA1Y-CA|CA3Y-CA|CA4Y-CA|CA5Y-CA|CA20Y-CA|CA30Y-CA&callback=quoteHandler1"
With req
    .Open "GET", reqURL, False
    .send
    respString = .responseText
End With
key = "quoteHandler1("
respString = Mid(respString, InStr(respString, key) + Len(key), Len(respString) - Len(key) - 1) 'extract the JSON string
Set respJSON = JsonConverter.ParseJson(respString) 'parse JSON string into something usable
For Each item In respJSON("QuickQuoteResult")("QuickQuote")
    ThisWorkbook.Worksheets(1).Cells(i, "A") = item("shortName")
    ThisWorkbook.Worksheets(1).Cells(i, "B") = item("last")
    ThisWorkbook.Worksheets(1).Cells(i, "C") = item("change")
    i = i + 1
Next item
End Sub

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.