我在网络爬虫中编写了以下循环。
它会在几秒钟后耗尽。我无法弄清楚为什么。
def crawlweb(seed):
crawled = []
tocrawl = [seed]
page = tocrawl[0]
while tocrawl:
if page not in crawled:
tocrawl = tocrawl[1:] + (get_links(get_page(page)))
crawled.append(page)
return crawled, tocrawl
def crawl_web(seed)
tocrawl = [seed]
crawled = []
while tocrawl:
page = tocrawl.pop()
if page not in crawled:
union(tocrawl, get_all_links(get_page(page)))
crawled.append(page)
return crawled