Errorno 61使用Selenium Python进行Web Scraping时

问题描述 投票:1回答:1

所以当我从一个我正在创建的类中运行以下代码来抓取Craigslist.org时,我一直得到一个socket.error 61。我尝试过各种版本的Chromedriver和PhantomJS,但似乎无法让它消失。起初我以为是我的IP被标记,所以我通过代理轮换,但这没有帮助。我确信它很简单,但我似乎无法弄清楚它是什么。任何帮助将非常感激!

 def __init__(self):

    self.user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
    self.options = webdriver.ChromeOptions()
    self.options.add_argument('headless')
    self.options.add_argument('--proxy-server=http://12.221.240.25:8080')
    self.options.add_argument('user-agent={self.user_agent}')
    self.current_region = ''
    self.driver = webdriver.Chrome()
    self.driver.get('https://craigslist.org')
    self.proxy_list = ['208.95.62.81:3128', '208.95.62.80:3128', '159.203.181.50:3128', '35.196.26.166:3128']

 def scrape_test(self):
    self.scraper_wait(self.driver, '//*[@id="rightbar"]')
    rightbar = self.driver.find_element_by_xpath('//*[@id="rightbar"]')
    nearby_cl = rightbar.find_element_by_xpath('//*[@id="rightbar"]/ul/li[1]')
    while True:
        child_items = nearby_cl.find_elements_by_class_name('s')
        random = randint(1, len(child_items))
        try:
            time.sleep(10)
            print("Clicking {}".format(child_items[random].text))
            child_items[random].click()
            housing = self.driver.find_element_by_xpath('//*[@id="hhh"]/h4/a')
            housing.click()
            self.driver.back()
            time.sleep(5)
        except WebDriverException:
            continue
        except Exception as e:
            print(e.message)
            return
        finally:
            self.driver.quit()

堆栈跟踪也如下:

    File "scraper.py", line 131, in <module>
    cl.scrape_test()
    File "scraper.py", line 81, in scrape_test
    child_items = nearby_cl.find_elements_by_class_name('s')
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 299, in find_elements_by_class_name
return self.find_elements(by=By.CLASS_NAME, value=name)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 527, in find_elements
{"using": by, "value": value})['value']
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 493, in _execute
return self._parent.execute(command, params)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 250, in execute
response = self.command_executor.execute(driver_command, params)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 464, in execute
return self._request(command_info[0], url, body=data)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 487, in _request
self._conn.request(method, parsed_url.path, body, headers)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1057, in request
self._send_request(method, url, body, headers)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1097, in _send_request
self.endheaders(body)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1053, in endheaders
self._send_output(message_body)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 897, in _send_output
self.send(msg)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 859, in send
self.connect()
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 836, in connect
self.timeout, self.source_address)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 575, in create_connection
raise err
  socket.error: [Errno 61] Connection refused
python selenium selenium-chromedriver
1个回答
0
投票

在你完成使用之前,你将在第一次通过while循环结束时拆除驱动程序。

相反,将调用driver.quit()移动到您确定使用驱动程序完成的某个地方,例如:

def scrape_test(self):
    try:
        # ...
        while True:
            # ...
    finally:
        self.driver.quit()
© www.soinside.com 2019 - 2024. All rights reserved.