vba使用单元格数据在站点上检查匹配的单词然后下载它们

问题描述 投票:0回答:1

excel单元格中有单词,它会检查给定网站的单词,屏幕打印网站并使用所用单词的名称保存屏幕打印,然后将webaddress插入到使用过的单词旁边的单元格中并按下单页列表直到空。我想要vba做的是使用开源的ocr tesseract,我认为这个工作非常需要。

到目前为止,我已设法将单词或汽车注册表插入到网站然后提取数据,我不知道从上面的段落项目开始我已经设置了自己。

Private Sub CommandButton1_Click()

    'Sub TAXandMOTcheck()

    'dimension (declare or set aside memory for) our variables
    Dim objIE As InternetExplorer                'special object variable representing the IE browser

    'Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
    'Dim liEle As HTMLLinkElement 'special object variable for an <li> (link) element
    'Dim pEle As HTMLLinkElement 'special object variable for an <a> (link) element

    Dim y As Integer                             'integer variable we'll use as a counter
    'Dim result As String 'string variable that will hold our result link

    '''''''''''''''''''''''''''''''''''''''''''
    'open internet

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = False

    '''''''''''''''''''''''''''''''''''''''''''
    'open tax/mot page

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://vehicleenquiry.service.gov.uk/"

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    '''''''''''''''''''''''''''''''''''''''''''

    'enter details in to page

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    'in the search box put cell "b2" value, the word "in" and cell "C" value
    objIE.document.getElementById("Vrm").Value = _
                                               Sheets("INPUT DATA").Range("X3").Value

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    '''''''''''''''''''''''''''''''''''''''''''''''''''''

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    'click the 'Continue' button
    objIE.document.getElementsByClassName("button")(0).Click

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    '''''''''''''''''''''''''''''''''''''''''''''''''''''

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    'click the 'Yes' button
    objIE.document.getElementById("Correct_True").Click

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    '''''''''''''''''''''''''''''''''''''''''''''''''''''

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    'click the 'Continue' button
    objIE.document.getElementsByClassName("button")(0).Click

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    '''''''''''''''''''''''''''''''''''''''''''''''''''''

    'TAX EXPIRY DATE:
    'TaxExpiryDate = objIE.Document.getElementsByClassName("status-bar")(0).getElementsByTagName("strong")(0).innerText
    'add tax date to sheet
    'Range("G3").Value = TaxExpiryDate

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    'IN TWO LINES FOR BETTER CODE READIBILITY:
    TaxExpiryDate = objIE.document.getElementsByClassName("status-bar")(0).getElementsByTagName("strong")(0).innerText
    TaxExpiryDate = Split(TaxExpiryDate, vbNewLine)(1)

    'add tax date to sheet
    Range("Y3").Value = TaxExpiryDate

    'IN ONE LINE FOR SHORTER CODE:
    'TaxExpiryDate = Split(objIE.Document.getElementsByClassName("status-bar")(0).getElementsByTagName("strong")(0).innerText, vbNewLine)(1)
    'add tax date to sheet
    'Range("G3").Value = TaxExpiryDate

    '''''''''''''''''''''''''''''''''''''''''''''''''''''

    'wait 5 seconds
    Application.Wait Now + TimeValue("00:00:05")

    'MOT EXPIRY DATE:
    MotExpiryDate = objIE.document.getElementsByClassName("status-bar")(0).getElementsByTagName("strong")(1).innerText
    MotExpiryDate = Split(MotExpiryDate, vbNewLine)(1)

    'add mot date to sheet
    Range("Z3").Value = MotExpiryDate

    '''''''''''''''''''''''''''''''''''''''''''''''''''''

    'take tax and mot dates and insert in to cells next to each other
    'the first search result will go in row 2
    y = 2

    'TAKE TAX EXPIRY DATE AND PUT IN CELL
    'I have tried reading up on extracting data from li elements, parent and child elements but struggling
    'For Each itemEle In objIE.Document.getElementsByClassName("top-section-list")
    'data = itemEle.getElementsByTagName("li")(0).innerText

    'TAKE MOT EXPIRY DATE AND PUT IN CELL
    'I have tried reading up on extracting data from li elements, parent and child elements but struggling
    'For Each itemEle In objIE.Document.getElementsByClassName("top-section-list")
    'data = itemEle.getElementsByTagName("li")(0).innerText

    '''''''''''''''''''''''''''''''''''''''''''''''''''''''''

    'increment our row counter, so the next result goes below
    y = y + 1

    'take next car reg and do the same as above until there are no cells in rows with a car reg
    'Next y

    'Range("A3").Value = data

    '''''''''''''''''''''''''''''''''''''''''''

    'close the browser
    objIE.Quit

    '''''''''''''''''''''''''''''''''''''''''''

    'exit our SearchBot subroutine and start new row for new website data
End Sub
excel vba web-scraping ocr tesseract
1个回答
0
投票

据我所知,除了代码中已经实现的内容之外,您可能只需要保存站点的屏幕截图并循环遍历汽车注册号码(已在excel表中列出)。也无法理解,发现在特定网站上注释掉了Class Id ("top-section-list")

由于我没有英国有效的车辆登记号码,我只使用了一些虚数(幸运地在试错中找到)并且在循环中重复尝试了代码。路径,表格和范围等可以根据您的要求进行修改。应用程序等待时间可以进一步减少。

修改代码:

    Private Declare Sub keybd_event Lib "user32" (ByVal bVk As Byte, ByVal bScan As Byte, ByVal dwFlags As Long, ByVal dwExtraInfo As Long)
    Private Declare Function ShowWindow Lib "user32" (ByVal hwnd As Long, ByVal nCmdShow As Long) As Long
    Private Const VK_SNAPSHOT As Byte = 44
    Private Const SW_SHOWMAXIMIZED = 3
    Sub TAXandMOT()
    Dim objIE As InternetExplorer
    Dim y As Integer
    Dim CarReg As String
    Dim hwnd As Long, IECaption As String
    Dim TaxExpiryDate, MotExpiryDate
    Dim Shp As Shape, Cht As Chart, Ws As Worksheet
    Dim Path As String

    Path = "C:\users\user\Desktop\"
    Set Ws = ThisWorkbook.Sheets("Input Data")

    Set objIE = CreateObject("InternetExplorer.Application")
    objIE.Visible = True
    y = 1
    CarReg = Ws.Range("A" & y).Value

        Do While CarReg <> ""
        objIE.navigate "https://vehicleenquiry.service.gov.uk/"
        Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
        Application.Wait Now + TimeValue("00:00:05")
        objIE.document.getElementById("Vrm").Value = CarReg
        objIE.document.getElementsByClassName("button")(0).Click
        Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
        Application.Wait Now + TimeValue("00:00:05")
        objIE.document.getElementById("Correct_True").Click
        objIE.document.getElementsByClassName("button")(0).Click
        Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
        Application.Wait Now + TimeValue("00:00:05")
        TaxExpiryDate = objIE.document.getElementsByClassName("status-bar")(0).getElementsByTagName("strong")(0).innerText
        TaxExpiryDate = Split(TaxExpiryDate, vbNewLine)(1)
        Ws.Range("B" & y).Value = TaxExpiryDate
        MotExpiryDate = objIE.document.getElementsByClassName("status-bar")(0).getElementsByTagName("strong")(1).innerText
        MotExpiryDate = Split(MotExpiryDate, vbNewLine)(1)
        Ws.Range("C" & y).Value = MotExpiryDate


        'SendKeys "(%{1068})"                  'another option to take screen Shot 
         ShowWindow objIE.hwnd, SW_SHOWMAXIMIZED
         Delay 3
         Call keybd_event(VK_SNAPSHOT, 0, 0, 0)
         Delay 1
        Set Cht = Charts.Add
        Cht.Paste
        Cht.Export FileName:=Path & CarReg & y & ".jpg", FilterName:="JPG" ' may not use y (i have to use Y as I have only one car no to repeat for trial)
        Application.DisplayAlerts = False
        Cht.Delete
        Application.DisplayAlerts = True

        y = y + 1
        CarReg = Ws.Range("A" & y).Value
        Loop
    objIE.Quit
    End Sub

Sub Delay(Sec As Integer)
tm = Timer
    Do While Timer < tm + Sec
    DoEvents
    Loop
End Sub
© www.soinside.com 2019 - 2024. All rights reserved.