我进行了大量搜索,以便找到有关如何使用XMLHTTP获取元数据的资料。而且我认为使用“早期绑定”方法无法做到这一点。唯一有效的方法是CreateObject("HTMLFile")
进行后期绑定,并处理该后期绑定的HTML。这种方法的缺点是它不支持使用QuerySelector
或QuerySelectorAll
。现在,我尝试不使用QuerySelector
Set post = .querySelector("table div span[itemprop='lowPrice']")
这会产生错误..我找不到更简单的方法来查找元素这是HTML内容
<table class="p">
<tbody><tr>
<td class="foto">
<div class="foto">
<a href="https://krmivo-psy.heureka.cz/brit-premium-by-nature-adult-l-15-kg/#gallery-open" target="_blank" class="gallery-link product-detail__gallery-link" onclick="dataLayer.push({'event':'sendEvent','event_category':'Product Detail - Desktop','event_action':'Gallery','event_label':'Otev\u0159en\u00ed galerie','event_value':0});">
<img src="https://im9.cz/iR/importprodukt-orig/4c2/4c2b1733c8b233edd5052d3063ac46d9--mmf250x250.jpg" alt="Brit Premium by Nature Adult L 15 kg" width="250" height="250" id="picture-main">
<span class="image-hover">
<span class="image-overlay"></span>
<span class="js-test-image-count-info image-count-info">Galerie <span class="picture-count">(2)</span></span>
</span>
<span class="product-detail__gallery-link__image__count-info">Galerie
<span class="product-detail__gallery-link__image__count-info__count">(2)</span>
</span>
</a>
<a href="https://krmivo-psy.heureka.cz/top-produkty/" class="top-ico gtm-header-link" data-gtm-link-description="Pořadí v TOP produktech"><span>Top</span><strong>1.</strong></a>
<div class="poty-ico">
<a href="http://www.produktroku.cz/" target="_blank"><img src="https://im9.cz/iR/recenze-externi/107.png" alt="Produkt Roku 2019" class="product-of-year-badge"></a></div>
</div>
</td>
<td>
<div class="main-info">
<div class="text-cover">
<div id="n649054946" data-id="649054946" class="item js-public-product-id">
<h2 itemprop="name">Brit Premium by Nature Adult L 15 kg</h2>
</div>
<div class="rating-box" itemprop="aggregateRating" itemscope="" itemtype="http://schema.org/AggregateRating">
<p class="eval">
<strong itemprop="ratingValue">95%</strong>
<a href="https://krmivo-psy.heureka.cz/brit-premium-by-nature-adult-l-15-kg/pridat-uzivatelskou-recenzi/#section">
<span class="rating"><span class="hidden">Hodnocení produktu: 95%</span><span class="over" title="Hodnocení produktu: 95%"><span style="width: 75px;"></span></span></span>
</a>
</p>
<span class="hidden-microdata" itemprop="ratingCount">
456
</span>
<p class="review-count delimiter-blank">
<a href="https://krmivo-psy.heureka.cz/brit-premium-by-nature-adult-l-15-kg/recenze/#section" class="gtm-header-link" data-gtm-link-description="Počet recenzí">
<span itemprop="reviewCount">344</span>
recenzí
</a>
</p>
<div class="cleaner"></div>
<p class="rating-box__item rating-box__favourite">
<a href="https://ucet.heureka.cz/prihlaseni?callbackUrl=https%3A%2F%2Fkrmivo-psy.heureka.cz%2Fbrit-premium-by-nature-adult-l-15-kg%2F" title="Chci to" class="gtm-header-link" data-gtm-link-description="Akce - oblíbené">Přidat do oblíbených</a>
</p>
<p id="cli649054946" class="rating-box__item rating-box__compare delimiter-blank cl-add">
<a class="checkbox gtm-header-link" data-gtm-link-description="Akce - porovnání" href="#" title="Porovnat">Přidat do porovnání</a>
</p>
<p class="delimiter-blank rating-box__item rating-box__price-watch js-price-watch-button">
<a href="#" title="Hlídat cenu" class="gtm-header-link" data-gtm-link-description="Akce - hlídat cenu">
Hlídat cenu
</a>
</p>
<p class="add-review rating-box__item rating-box__add-review delimiter-blank">
<a href="https://krmivo-psy.heureka.cz/brit-premium-by-nature-adult-l-15-kg/pridat-uzivatelskou-recenzi/#section" class="gtm-header-link" data-gtm-link-description="Akce - přidat recenzi">
Přidat recenzi
</a>
</p>
</div>
<div id="top-shop-info" class="top-shop-info">
<div class="inner">
<div class="guar">
<div>
<img class="guar-badge" src="https://im9.cz/css-v2/images/guaranty-seal.png?1" alt="Garance nákupu - SpokojenyPes.cz" width="27" height="34">
</div>
</div>
<div class="shop-claim bold">
<strong>Produkt vám dodá:</strong>
</div>
<div class="shop-logo">
<a href="https://www.heureka.cz/exit/spokojenypes-cz/3180319922/?z=41" target="_blank" rel="nofollow noopener" class="gtm-header-link" data-gtm-link-description="Exit - produkt vám dodá">
<img src="https://im9.cz/iR/importobchod-orig/1983_logo--mmf130x40.png" alt="SpokojenyPes.cz" width="130" height="40">
</a>
</div>
<div class="recommendation">
<a href="https://obchody.heureka.cz/spokojenypes-cz/recenze/" class="gtm-header-link" data-gtm-link-description="Hodnocení - Produkt vám dodá">
99% zákazníků doporučuje obchod
</a>
</div>
<div class="delivery-info bold price-delivery-free">
Doprava zdarma
</div>
<div class="availability-info bold in-stock">
skladem
</div>
</div>
<a data-gtm-link-description="Další nabídky" id="top-shop-count-info" href="https://krmivo-psy.heureka.cz/brit-premium-by-nature-adult-l-15-kg/porovnat-ceny/#section" class="top-shop-count-info box-active gtm-header-link">Dalších 134 nabídek od 728 Kč</a>
</div>
<p class="desc">
<span id="product-short-description">
Kompletní krmivo Brit Premium pro dospělé psy. Kuřecí receptura pro dospělé psy velkých plemen (25 - 45 kg).
<a id="product-short-description-button" href="https://krmivo-psy.heureka.cz/brit-premium-by-nature-adult-l-15-kg/specifikace/#section" title="celá specifikace Brit Premium by Nature Adult L 15 kg">celá specifikace</a>
</span>
</p>
</div>
<div itemprop="offers" itemscope="" itemtype="http://schema.org/AggregateOffer" style="display:none">
<span itemprop="lowPrice">728.00</span>
<span itemprop="highPrice">1579.00</span>
<span itemprop="offerCount">135</span>
<link itemprop="availability" href="http://schema.org/InStock">
</div>
<div itemprop="offers" itemscope="" itemtype="http://schema.org/Offer" class="price-from shopping-cart">
<link itemprop="itemCondition" href="http://schema.org/OfferItemCondition" content="http://schema.org/NewCondition">
<link itemprop="availability" href="http://schema.org/InStock">
<link itemprop="category" href="http://schema.org/category" content="Hobby / Chovatelství / Pro psy / Krmivo pro psy">
<link itemprop="image" href="http://schema.org/image" content="https://im9.cz/iR/importprodukt-orig/4c2/4c2b1733c8b233edd5052d3063ac46d9.jpg">
<div class="top-left">
<div id="top-button" class="buy-click-observed">
<p class="buy">
<a href="#" class="flat-button flat-button--top-position flat-button--orange buy-btn hb hb-3180319922 js-top-pos-btn" data-cart-position="0">
<i class="ico basket"></i>
<i class="ico check"></i>
<span class="in">Koupit na Heurece</span>
<span class="in replace">Přidáno do košíku</span>
</a>
</p>
</div>
<div class="n" id="top-offer-price">
<p class="buy-price">
<span itemprop="price" class="js-top-price" content="839.00">839 Kč</span>
<span class="price-vat-title small">s DPH</span>
<span itemprop="priceCurrency" content="CZK"></span>
</p>
</div>
<div class="clear"></div>
<div class="js-top-gifts-info top-shop-gifts-info-box">
</div>
</div>
<div class="clear"></div>
<div class="clear"></div>
</div>
<span id="new-pd"></span>
<script>
(function() {
loadScript("https:\/\/im9.cz\/js\/cache\/7e39f733-1-42bd9e7837b830d87e1af94da6d0e4a82055c56f.hash.js", function () {
var productHeadObserver = new ProductHeadObserver({ 'topShortDescElm': $('product-short-description'), 'topShopBox': $('top-shop-info'), 'maxOfferNameLength': 90 });
productHeadObserver.oneOfferInit();
});
H.Awards._reviewClick($$('#awards-list span.pa'));
var notSelectedCallback = function() {
if ('undefined' != typeof H.ShoppingCartHelper.BuyMoreOptions &&
typeof H.ShoppingCartHelper.BuyMoreOptions.buyClickNotSelectedCallback == 'function') {
H.ShoppingCartHelper.BuyMoreOptions.buyClickNotSelectedCallback();
}
};
H.ShoppingCartHelper.observeBuyClick($('top-button'), new H.ShoppingCart(), notSelectedCallback, 'js-top-pos-btn');
})();
</script>
<div class="clear"></div>
</div>
</td>
</tr>
</tbody></table>
这是整个HTMLhttps://pastebin.com/Dgu1wk2b
这是到目前为止的代码
Sub MyTest()
Dim source As Object
Dim obj As Object
Dim resp As String
Dim post As Object
Dim a, i As Long
With CreateObject("MSXML2.xmlHttp")
.Open "GET", "https://krmivo-psy.heureka.cz/brit-premium-by-nature-adult-l-15-kg/specifikace/#section", False
.send
resp = .responseText
End With
With CreateObject("HTMLFile")
.write resp
Set post = .getElementsByTagName("meta")
For i = 0 To post.Length - 1
On Error Resume Next
Debug.Print post.item(i).getAttribute("name")
If post.item(i).getAttribute("name") = "gtm:product_id" Then
Cells(2, 1).Value = post.item(i).Value
End If
If post.item(i).getAttribute("name") = "gtm:product_name" Then
Cells(2, 3).Value = post.item(i).Value
End If
If post.item(i).getAttribute("name") = "gtm:product_brand" Then
Cells(2, 4).Value = post.item(i).Value
End If
On Error GoTo 0
Next i
Set post = Nothing
Set post = .getElementsByTagName("link")
For i = 0 To post.Length - 1
On Error Resume Next
If post.item(i).getAttribute("rel") = "canonical" Then
Cells(2, 2).Value = post.item(i).href
End If
On Error GoTo 0
Next i
'I am stuck here
'Set post = .querySelector("table div span[itemprop='lowPrice']")
'Debug.Print .getElementsByTagName("table")(0).innerHTML
End With
End Sub
尝试一下:
With CreateObject("HTMLFile")
.Open
.write resp
.Close
For Each tbl In .getElementsByTagName("table")
For Each dv In tbl.getElementsByTagName("div")
If dv.getattribute("itemprop") = "offers" Then '<<EDIT
For Each spn In dv.getElementsByTagName("span")
attr = ""
attr = spn.getattribute("itemprop")
If Len(attr) > 0 Then
If attr = "lowPrice" Then
Debug.Print spn.outerhtml
Debug.Print spn.innerText
End If
End If
Next spn
End If
Next dv
Next tbl
End With