注意事项:这是我第一次使用 asyncio,所以我可能做了一些非常愚蠢的事情。
场景如下:
我需要“http-ping”一个巨大的 url 列表来检查它们是否响应 200 或任何其他值。尽管使用 gobuster report 200,403 等工具,我的每个请求都会超时。
我的代码与此类似:
import asyncio,aiohttp
import datetime
#-------------------------------------------------------------------------------------
async def get_data_coroutine(session,url,follow_redirects,timeout_seconds,retries):
#print('#DEBUG '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+url)
try:
async with session.get(url,allow_redirects=False,timeout=timeout_seconds) as response:
status = response.status
#res = await response.text()
if( status==404):
pass
elif(300<=status and status<400):
location = str(response).split("Location': \'")[1].split("\'")[0]
print('#HIT '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+str(status)+' '+url+' ---> '+location)
if(follow_redirects==True):
return await get_data_coroutine(session,location,follow_redirects,timeout_seconds,retries)
else:
print('#HIT '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+str(status)+' '+url)
return None
except asyncio.exceptions.TimeoutError as e:
print('#ERROR '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+' '+' '+url+' TIMEOUT '+str(e))
return None
#---------------------------------------------------------------------------
async def main(loop):
base_url = 'http://192.168.59.37'
extensions = ['','.html','php']
fd = open('/usr/share/wordlists/dirb/common.txt','r')
words_without_suffix = [x.strip() for x in fd.readlines()]#[-5:] #DEBUG!
words_with_suffix = [base_url+'/'+x+y for x in words_without_suffix for y in extensions]
follow = True
total_timeout = aiohttp.ClientTimeout(total=60*60*24)
timeout_seconds = 10
retries = 1
async with aiohttp.ClientSession(loop=loop,timeout=total_timeout) as session:
tasks = [get_data_coroutine(session,url,follow,timeout_seconds,retries) for url in words_with_suffix]
await asyncio.gather(*tasks)
print('DONE')
#---------------------------------------------------------------------------
if(__name__=='__main__'):
loop = asyncio.get_event_loop()
result = loop.run_until_complete(main(loop))
我真的做错了什么吗?
有什么建议吗?
非常感谢!
实际上,我最终在 aio-libs/aiohttp 中发现了一个未解决的问题: https://github.com/aio-libs/aiohttp/issues/3203
这样,他们提出了一种可以满足我的需求的解决方法:
session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
async with aiohttp.ClientSession(timeout=session_timeout) as session:
async with session.get(url,allow_redirects=False,timeout=1) as response:
...
回答你的问题 - 不,你没有做错任何事。在 http 请求/响应/超时处理方面,我看不出您的代码有任何问题。
如果您的所有请求确实都超时到主机(http://192.168.59.37),我怀疑您遇到的问题很可能取决于您的网络如何解析请求(或者您的代码如何构建网址)。
您可以使用curl等工具确认请求是否独立成功/失败,例如:
curl "http://192.168.59.37/abc.html"
我在本地测试了使用
python3 -m http.server 8080
并将空文件“abc”和“abc.html”放在同一目录中,更新base_url
base_url = "http://127.0.0.1:8080"
我的小更新(下面的代码)是输出。
http://127.0.0.1:8080/.bashrc.php
#404
http://127.0.0.1:8080/.bashrc
#404
http://127.0.0.1:8080/.bashrc.html
#404
http://127.0.0.1:8080/abc
#HIT 2020-11-03 12:57:33 200 http://127.0.0.1:8080/abc
http://127.0.0.1:8080/zt.php
#404
http://127.0.0.1:8080/zt.html
#404
http://127.0.0.1:8080/zt
#404
http://127.0.0.1:8080/abc.html
#HIT 2020-11-03 12:57:33 200 http://127.0.0.1:8080/abc.html
http://127.0.0.1:8080/abc.php
#404
DONE
我的更新大部分都很小,但可能有助于进一步调试。
abcphp
,而不是 abc.php
。response.ok
测试成功的 http 响应,您的代码没有处理 500 个错误(而是返回命中)。import asyncio
import aiohttp
import datetime
async def get_data_coroutine(session, url, follow_redirects, timeout_seconds, retries):
try:
async with session.get(
url, allow_redirects=False, timeout=timeout_seconds
) as response:
print(url)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if response.ok:
print(f"#HIT {now} {response.status} {url}")
else:
status = response.status
if status == 404:
print("#404")
elif 300 <= status and status < 400:
location = str(response).split("Location': '")[1].split("'")[0]
print(f"#HIT {now} {status} {url} ---> {location}")
if follow_redirects is True:
return await get_data_coroutine(
session, location, follow_redirects, timeout_seconds, retries
)
else:
print("#ERROR ", response.status)
return None
except asyncio.TimeoutError as e:
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"#ERROR {now} {url} TIMEOUT ", e)
return None
async def main(loop):
base_url = "http://127.0.0.1:8080"
extensions = ["", ".html", ".php"]
fd = open("/usr/share/wordlists/dirb/common.txt", "r")
words_without_suffix = [x.strip() for x in fd.readlines()]
words_with_suffix = [
base_url + "/" + x + y for x in words_without_suffix for y in extensions
]
follow = True
total_timeout = aiohttp.ClientTimeout(total=60 * 60 * 24)
timeout_seconds = 10
retries = 1
async with aiohttp.ClientSession(loop=loop, timeout=total_timeout) as session:
tasks = [
get_data_coroutine(session, url, follow, timeout_seconds, retries)
for url in words_with_suffix
]
await asyncio.gather(*tasks)
print("DONE")
if __name__ == "__main__":
loop = asyncio.get_event_loop()
result = loop.run_until_complete(main(loop))
也基于 aiohttp 问题。这对我有用:
async def fetch_status_codes(urls):
connector = aiohttp.TCPConnector(limit=None)
async with aiohttp.ClientSession(connector=connector) as session:
tasks = (fetch_status_code(session, url) for url in urls)
responses = await asyncio.gather(*tasks, return_exceptions=True)
return responses
async def fetch_status_code(session, url):
try:
async with session.get(url, timeout=your_timeout) as response:
return response.status
except asyncio.TimeoutError:
return None
except Exception as e:
return None