如何编辑驱动程序当前URL以便链接从:http://centrebet.com/Sports/12313443
到以下内容:http://centrebet.com/#Sports/12313443
http://centrebet.com/
和/Sports/
是不变的
我已经找到了许多静态链接的例子,但我不知道如何用一个抓取的当前网址列表来做到这一点。
码:
driver = webdriver.Chrome()
url = "http://centrebet.com/"
driver.get(url)
def page_counter():
for x in range(1000):
yield x
count = page_counter()
driver.get(url)
sports = driver.find_element_by_id("accordionMenu1_ulSports")
links = [url + link.get_attribute("onclick").replace("menulink('", "").replace("')", "") for link in sports.find_elements_by_xpath('//ul[@id="accordionMenu1_ulSports"]//li//ul//li//ul//li//a[starts-with(@onclick, "menulink")]')]
links = dict((next(count) + 1, e) for e in links)
desc_links = collections.OrderedDict(sorted(links.items(), reverse=True))
for key, value in desc_links.items():
try:
driver.get(value)
...
langs4 = driver.find_elements_by_css_selector("tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody > tr > td:nth-child(2) > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > table > tbody > tr > td:nth-child(1) > div > div")
langs4_text = []
for lang in langs4:
# print(lang.text)
langs4_text.append(lang.text)
url1 = driver.current_url
try:
import urlparse
from urllib import urlencode
except:
import urllib.parse as urlparse
from urllib.parse import urlencode
url = "http://centrebet.com/"
params = {'#':'#','Sports':'Sports'}
url_parts = list(urlparse.urlparse(url))
query = dict(urlparse.parse_qsl(url_parts[4]))
query.update(params)
url_parts[4] = urlencode(query)
print(urlparse.urlunparse(url_parts))
with open('C:\\O131.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs4_text):
writer.writerow(row + (url1,))
except TimeoutException as ex:
pass
仍然不确定我完全理解你想做什么。但如果它只是在URL中添加#
那么你可以简单地使用这个解决方案:
url = "http://centrebet.com/"
current_url = driver.current_url # http://centrebet.com/Sports/12313443
new_url = url + "#".join(current_url.split(url)) # http://centrebet.com/#Sports/12313443
要么
url = "http://centrebet.com/"
current_url = driver.current_url # http://centrebet.com/Sports/12313443
new_url = current_url.replace(url, url + "#") # http://centrebet.com/#Sports/12313443