Python 的 Google 地图网页抓取

问题描述 投票:0回答:1

“我正在致力于使用 Selenium 自动检索 Google 地图上位置之间的距离和旅行时间的过程。以前,我的脚本工作正常,但现在遇到了问题。具体来说,它会抛出一个错误,指出没有这样的元素找到类名“tactile-searchbox-input”或类似名称,我将不胜感激任何解决此问题的指导或解决方案!”

from selenium import webdriver
import time
from time import sleep
from selenium.webdriver.common.by import By
from datetime import datetime as dt
import openpyxl
import sys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# set Chrome options to run headless
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

# create Chrome instance with the above options
driver = webdriver.Chrome(options=chrome_options)
  
# assign url in the webdriver object
driver.get("https://www.google.com/maps/@33.9650923,71.5293553,9942m/data=!3m1!1e3?entry=ttu&g_ep=EgoyMDI0MTIxMS4wIKXMDSoJLDEwMjExMjMzSAFQAw%3D%3D")
sleep(5)
  
# search locations
def searchplace():
    
    Place = driver.find_element(By.CLASS_NAME, "tactile-searchbox-input")
    Place.send_keys("33.983515, 71.447372")
    submit = driver.find_element(By.XPATH,"/html/body/div[1]/div[3]/div[8]/div[3]/div[1]/div[1]/div/div[2]/div[1]/button")
    submit.click()


# get directions
def directions():

    directions = driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[4]/div[1]/button")
    directions.click()
  

# Start Point
def Start_Point():
    Starting_Location=driver.find_element(By.XPATH,"/html/body/div[3]/div[9]/div[3]/div[1]/div[2]/div/div[3]/div[1]/div[1]/div[2]/div[1]/div/input")
    Starting_Location.clear()
    Starting_Location.send_keys("33.99891877029553, 71.40166320892853") # Starting Location Input
    Search = driver.find_element(By.XPATH,"/html/body/div[3]/div[9]/div[3]/div[1]/div[2]/div/div[3]/div[1]/div[1]/div[2]/button[1]")
    Search.click()



# Printing Data
def Print_info():
        Distance = driver.find_element(By.XPATH,"/html/body/div[3]/div[9]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[4]/div[1]/div[1]/div/div[1]/div[1]")
        Travel_Time = driver.find_element(By.XPATH,"/html/body/div[3]/div[9]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[4]/div[1]/div[1]/div/div[1]/div[2]")
        return Distance.text,Travel_Time.text
# Set start time
start_time_str = '11:05 AM'  # Change to your desired start time
start_time = dt.strptime(start_time_str, '%I:%M %p').time()

# Set end time
end_time_str = '6:50 PM'  # Change to your desired end time
end_time = dt.strptime(end_time_str, '%I:%M %p').time()
i=0
while True:
    # Get current time
    now = dt.now().time()

    # Check if current time is within the specified time range
    if start_time <= now <= end_time:
    
        # get the current minute
            minute = int(time.strftime('%M'))
            Time = time.strftime("%H:%M %p")
            Date = time.strftime("%d-%b-%Y")
            # check if the minute is 15, 30, 45, or 0
            if minute in [5, 10, 38, 40,47,49,53,59]:
                sleep(3)
                searchplace() 
                sleep(20)
                directions()
                sleep(20)
                Start_Point()
                sleep(20)
                result = Print_info()
                sleep(25)
                # Open the workbook
                workbook = openpyxl.load_workbook('D:\To be Managed\Dr.Rashid Work\Google Map\Google Map Data.xlsx')

                # Select the active sheet 
                sheet = workbook.active
                value1 = result[0]
                value2 = result[1]
                sheet.cell(row = i+2,column=1).value = Date
                sheet.cell(row = i+2,column=2).value = Time
                sheet.cell(row = i+2,column=3).value = value1
                sheet.cell(row = i+2,column=4).value = value2
                i = i+1
                        

                # Save the workbook
                workbook.save('D:\To be Managed\Dr.Rashid Work\Google Map\Google Map Data.xlsx')
                driver.close()
                driver = webdriver.Chrome(options=chrome_options)
                driver.get("https://www.google.com/maps/@33.9886201,71.4011512,12.53z")
                sleep(60)
    elif now > end_time:
        # Exit the script when the current time is greater than end time
        sys.exit()   
python selenium-webdriver web-scraping
1个回答
0
投票

就像已经说过的那样,抓取谷歌并不是一个好主意。但这是可能的。

首先,脚本需要接受 cookie。例如,按“接受 Cookie”按钮。

def accept_cookies():
    cookie_button = driver.find_element(By.CSS_SELECTOR, "form[action='https://consent.google.com/save']")
    cookie_button.click()
    time.sleep(2)

其次,使用 XPATH 进行网页抓取通常不是一个好主意,特别是对于像 Google 这样的网站。如果站点发生哪怕是最轻微的变化,XPATH 就会中断。请改用类名称或 CSS 选择器。

您无需使用搜索按钮,只需按 Enter 键即可进行搜索。这样就不再需要另一个将来可能会损坏的按钮。

def searchplace():
    place = driver.find_element(By.ID, "searchboxinput")
    place.send_keys("33.983515, 71.447372")
    place.send_keys(Keys.ENTER)

def Start_Point():
    starting_location = driver.find_element(By.CLASS_NAME, "tactile-searchbox-input")
    starting_location.clear()
    starting_location.send_keys("33.99891877029553, 71.40166320892853")  # Starting Location Input
    starting_location.send_keys(Keys.ENTER)

收集距离信息的方法可以通过首先找到行程元素并比距离信息来固定。这样就不需要 XPATH。

def Print_info():
    first_trip = driver.find_element(By.CSS_SELECTOR, "div#section-directions-trip-0")

    Distance = first_trip.find_element(By.CLASS_NAME, "fontHeadlineSmall")
    Travel_Time = first_trip.find_element(By.CLASS_NAME,"fontBodyMedium")
    return Distance.text, Travel_Time.text

请记住,由于谷歌网站的区域变化,此代码将来很可能会被破坏,或者可能已经被破坏。

© www.soinside.com 2019 - 2024. All rights reserved.