我目前在Joblib运行多重处理或并行程序时遇到问题。我以前能够做到这一点,总共达到了1分钟的时间,但是,我去了很多,改变了很多,弄乱了一些东西。我已经发布了准系统代码,因为我收到了同样的错误。我试图遍历所有150个股票代码,并使用Yahoo Finance接收每个股票的期权链。我正在尝试这样做。我也尝试过其他类似asyncio的库,但都没有成功。任何建议将不胜感激。
import yfinance as yf
def background(f):
def wrapped(*args, **kwargs):
return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs)
return wrapped
done = []
@background
def downloadChain(ticker):
print(ticker)
df = pd.DataFrame()
daysOut = 100
chain = 0
try:
yf_ticker = yf.Ticker(ticker)
expiration_dates = yf_ticker.options
for expiration_date in expiration_dates:
if (datetime.fromisoformat(expiration_date) - datetime.now()).days <= daysOut:
try:
chain = yf_ticker.option_chain(expiration_date)
df = df.append(chain)
except Exception as e:
pass
except Exception as e:
pass
done.append(ticker)
主要功能:
symbols = ["WATT","TSLA","UVXY","VXX","KEYS","EGO","GLD","WORK","BYND","BLK","PINS","LYFT","SPCE","PAYC","WDAY","UBER","CHGG","SHAK","CMG","CTL","ACB","TLRY","CGC","MJ","ORCL","GRUB","RNG","JWN","TTWO","ADI","ATVI","EA","SNE","GAMR","TXN","TMUS","MCHP","TSM","XBI","ETFC","MS","IWM","EXPD","RCL","CCL","MOMO","BABA","VMW","CRM","ULTA","SKYY","SPLK","FLWS","AVGO","TWTR","PANW","RJF","SABR","LOW","RS","ON","VEEV","DOCU","FB","SNAP","HPQ","RACE","F","AMAT","MRO","STM","AAL","DAL","VICR","XLC","CRON","DELL","T","VZ","S","MELI","CVM","REGN","NVAX","APT","CODX","LAKE","MRNA","EBS","INO", "SPY","SH","QQQ","XLF","KRE","XLV","HYG","LQD","NET","NFLX","ROKU","SHOP","AMZN","AAPL","MSFT","GOOGL","GOOG","NVDA","MU","AMD","INTC","MRVL","QCOMM","SQ","PYPL","TTD","TSLA","ZM","TDOC","LVGO","MDB","HD","VNQ","ARI","ACC","IIPR","EQR","EPR","SPG","PLD","ACB","WHR","NVAX","APT","MDT","CLRX","COST","SDC","LK","PVH","KSS","M","LULU","NKE","KO","BAC","JPM","CS","WFC","ARKW","ARKK","MGM","AMAT","WYNN","TGT","ITT","FXI"]
for ticker in symbols:
downloadChain(ticker)
我添加了一个单独的循环来查看“完成”数组的大小,该数组包含所有已完成的符号。我不确定自己所做的更改,但是现在当需要1分钟时,此循环将在大约10-15分钟内完成。
while True:
clear_output(wait=True)
print(len(done))
“修复”有两个版本。将它们添加为答案,而不是将评论用作聊天:)
import asyncio
import pandas as pd
import yfinance as yf
from concurrent.futures import ThreadPoolExecutor
def background(f):
def wrapped(*args, **kwargs):
return asyncio.get_event_loop().run_in_executor(executor, f, *args, **kwargs)
return wrapped
done = []
@background
def downloadChain(ticker):
print(ticker)
df = pd.DataFrame()
daysOut = 100
chain = 0
try:
yf_ticker = yf.Ticker(ticker)
expiration_dates = yf_ticker.options
for expiration_date in expiration_dates:
if (datetime.fromisoformat(expiration_date) - datetime.now()).days <= daysOut:
try:
chain = yf_ticker.option_chain(expiration_date)
df = df.append(chain)
except Exception as e:
pass
except Exception as e:
pass
done.append(ticker)
symbols = ["WATT","TSLA","UVXY","VXX","KEYS","EGO","GLD","WORK","BYND","BLK","PINS","LYFT","SPCE","PAYC","WDAY","UBER","CHGG","SHAK","CMG","CTL","ACB","TLRY","CGC","MJ","ORCL","GRUB","RNG","JWN","TTWO","ADI","ATVI","EA","SNE","GAMR","TXN","TMUS","MCHP","TSM","XBI","ETFC","MS","IWM","EXPD","RCL","CCL","MOMO","BABA","VMW","CRM","ULTA","SKYY","SPLK","FLWS","AVGO","TWTR","PANW","RJF","SABR","LOW","RS","ON","VEEV","DOCU","FB","SNAP","HPQ","RACE","F","AMAT","MRO","STM","AAL","DAL","VICR","XLC","CRON","DELL","T","VZ","S","MELI","CVM","REGN","NVAX","APT","CODX","LAKE","MRNA","EBS","INO", "SPY","SH","QQQ","XLF","KRE","XLV","HYG","LQD","NET","NFLX","ROKU","SHOP","AMZN","AAPL","MSFT","GOOGL","GOOG","NVDA","MU","AMD","INTC","MRVL","QCOMM","SQ","PYPL","TTD","TSLA","ZM","TDOC","LVGO","MDB","HD","VNQ","ARI","ACC","IIPR","EQR","EPR","SPG","PLD","ACB","WHR","NVAX","APT","MDT","CLRX","COST","SDC","LK","PVH","KSS","M","LULU","NKE","KO","BAC","JPM","CS","WFC","ARKW","ARKK","MGM","AMAT","WYNN","TGT","ITT","FXI"]
with ThreadPoolExecutor() as executor:
for ticker in symbols:
downloadChain(ticker)
第二个是更标准的。在其中定义一个async
主目录,我们要求asyncio
用作主入口点。
import asyncio
import pandas as pd
import yfinance as yf
from concurrent.futures import ProcessPoolExecutor
symbols = ["WATT","TSLA","UVXY","VXX","KEYS","EGO","GLD","WORK","BYND","BLK","PINS","LYFT","SPCE","PAYC","WDAY","UBER","CHGG","SHAK","CMG","CTL","ACB","TLRY","CGC","MJ","ORCL","GRUB","RNG","JWN","TTWO","ADI","ATVI","EA","SNE","GAMR","TXN","TMUS","MCHP","TSM","XBI","ETFC","MS","IWM","EXPD","RCL","CCL","MOMO","BABA","VMW","CRM","ULTA","SKYY","SPLK","FLWS","AVGO","TWTR","PANW","RJF","SABR","LOW","RS","ON","VEEV","DOCU","FB","SNAP","HPQ","RACE","F","AMAT","MRO","STM","AAL","DAL","VICR","XLC","CRON","DELL","T","VZ","S","MELI","CVM","REGN","NVAX","APT","CODX","LAKE","MRNA","EBS","INO", "SPY","SH","QQQ","XLF","KRE","XLV","HYG","LQD","NET","NFLX","ROKU","SHOP","AMZN","AAPL","MSFT","GOOGL","GOOG","NVDA","MU","AMD","INTC","MRVL","QCOMM","SQ","PYPL","TTD","TSLA","ZM","TDOC","LVGO","MDB","HD","VNQ","ARI","ACC","IIPR","EQR","EPR","SPG","PLD","ACB","WHR","NVAX","APT","MDT","CLRX","COST","SDC","LK","PVH","KSS","M","LULU","NKE","KO","BAC","JPM","CS","WFC","ARKW","ARKK","MGM","AMAT","WYNN","TGT","ITT","FXI"]
done = []
def downloadChain(ticker):
print(ticker)
df = pd.DataFrame()
daysOut = 100
chain = 0
try:
yf_ticker = yf.Ticker(ticker)
expiration_dates = yf_ticker.options
for expiration_date in expiration_dates:
if (datetime.fromisoformat(expiration_date) - datetime.now()).days <= daysOut:
try:
chain = yf_ticker.option_chain(expiration_date)
df = df.append(chain)
except Exception as e:
pass
except Exception as e:
pass
done.append(ticker)
async def main():
with ProcessPoolExecutor() as executor:
for ticker in symbols:
asyncio.get_event_loop().run_in_executor(executor, downloadChain,
ticker)
if __name__ == '__main__':
asyncio.run(main())
在这里,您还可以更精确地控制要使用的执行器。基本上,我们在要处理的事件循环下以及为执行者添加工作的基础上显式编码。本地测试并未显示ProcessPoolExecutor
和ThreadPoolExecutor
之间的巨大差异。
您可以尝试一个名为yahooquery的软件包。您可以检索选项链数据以及异步检索它。您可以传递所有150个符号或在它们之间循环:
from yahooquery import Ticker
symbols = ["WATT","TSLA","UVXY","VXX","KEYS","EGO","GLD","WORK","BYND","BLK","PINS","LYFT","SPCE","PAYC","WDAY","UBER","CHGG","SHAK","CMG","CTL","ACB","TLRY","CGC","MJ","ORCL","GRUB","RNG","JWN","TTWO","ADI","ATVI","EA","SNE","GAMR","TXN","TMUS","MCHP","TSM","XBI","ETFC","MS","IWM","EXPD","RCL","CCL","MOMO","BABA","VMW","CRM","ULTA","SKYY","SPLK","FLWS","AVGO","TWTR","PANW","RJF","SABR","LOW","RS","ON","VEEV","DOCU","FB","SNAP","HPQ","RACE","F","AMAT","MRO","STM","AAL","DAL","VICR","XLC","CRON","DELL","T","VZ","S","MELI","CVM","REGN","NVAX","APT","CODX","LAKE","MRNA","EBS","INO", "SPY","SH","QQQ","XLF","KRE","XLV","HYG","LQD","NET","NFLX","ROKU","SHOP","AMZN","AAPL","MSFT","GOOGL","GOOG","NVDA","MU","AMD","INTC","MRVL","QCOMM","SQ","PYPL","TTD","TSLA","ZM","TDOC","LVGO","MDB","HD","VNQ","ARI","ACC","IIPR","EQR","EPR","SPG","PLD","ACB","WHR","NVAX","APT","MDT","CLRX","COST","SDC","LK","PVH","KSS","M","LULU","NKE","KO","BAC","JPM","CS","WFC","ARKW","ARKK","MGM","AMAT","WYNN","TGT","ITT","FXI"]
# Can either pass them all (probably want to use a proxy)
ticker = Ticker(symbols, asynchronous=True)
df = ticker.option_chain
# Or loop through your list
ticker = Ticker('aapl', asynchronous=True) # Replace this within the loop
n = 10
dataframes = []
for i in range(0, len(symbols), n):
ticker.symbols = symbols[i:i+n]
dataframes.append(ticker.option_chain)
df = pd.concat(dataframes)