使用元素循环标题数组

问题描述 投票:0回答:1

我正在寻找可以使用类名循环标头数组的代码,但它不能包含标签名称或标识。这只是为了确保如果任何类不存在,那么相应的单元格应该留空,并且应该复制下一个元素。

我试着添加标题数组

  headers = Array("size", "features", "promo", "in store", "web")

但它需要循环使用我不想要的标签名称。

也想要促销(班级名称是“promo_offers”)'第1个月免费!'在第2行,问题是这个促销仅针对特定细胞 - 因此数据具有误导性,我在前4个细胞中获得促销,然后出现错误。

但是,我想只为那些提供促销信息的单位复制促销,否则单元格应为空白或需要设置任何其他值。以下是代码......

请建议如何构建代码。

Sub GetClassNames()

Dim html As HTMLDocument

Dim objIE As Object
Dim element As IHTMLElement
Dim ie As InternetExplorer
Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link

Dim count As Long
Dim erow As Long

'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer

'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True

'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0

Set html = objIE.document
Set elements = html.getElementsByClassName("unit_size medium")

For Each element In elements
    If element.className = "unit_size medium" Then
        erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
        Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText

        Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
        count = count + 1      
    End If
Next element
End Sub

对于任何东西,即promo为null,则相应的单元格应留空,应复制下一个元素

html excel vba web-scraping
1个回答
1
投票

您可以使用xmlhttp获取所有信息。

我抓住所有li元素的盒子和循环那些把每个li的html到一个新的HTMLDocument。我使用该对象的querySelector方法使用css选择器获取每行中的所有其他项。我在On Error Resume Next On Error GoTo 0中包装选择以掩盖在尝试访问不存在的元素时的错误,例如有些行没有促销。然后这些条目按要求保留为空白。

Option Explicit
Public Sub GetInfo()
    Dim ws As Worksheet, html As HTMLDocument, s As String
    Const URL As String = "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423"

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set html = New HTMLDocument
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        s = .responseText
        html.body.innerHTML = s

        Dim headers(), results(), listings As Object

        headers = Array("Size", "Features", "Promo", "In store", "Web")
        Set listings = html.querySelectorAll(".li_unit_listing")

        Dim rowCount As Long, numColumns As Long, r As Long, c As Long, item As Long

        rowCount = listings.Length
        numColumns = UBound(headers) + 1

        ReDim results(1 To rowCount, 1 To numColumns)
        Dim html2 As HTMLDocument
        Set html2 = New HTMLDocument
        For item = 0 To listings.Length - 1
            r = r + 1
            html2.body.innerHTML = listings.item(item).innerHTML
            On Error Resume Next
            results(r, 1) = Trim$(html2.querySelector(".unit_size").innerText)
            results(r, 2) = Trim$(html2.querySelector(".features").innerText)
            results(r, 3) = Trim$(html2.querySelector(".promo_offers").innerText)
            results(r, 4) = html2.querySelector(".board_rate").innerText
            results(r, 5) = html2.querySelector("[itemprop=price]").getAttribute("content")
            On Error GoTo 0
        Next

        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub

输出:

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.