Python 回测数据框对齐问题

Question

我正在尝试开发代码来回测我的策略，但遇到了 ChatGPT 无法解决的错误。请尝试在 Google Colab 上自行运行代码，看看是否可以运行。

错误是： ValueError：数据必须是一维的，而是得到形状为 (1006, 1) 的 ndarray

我已经尝试了 ChatGPT 所要求的一切，但仍然无法正常工作。

这是代码：

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange

# Data Retrieval
def get_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    df.reset_index(inplace=True)  # Reset the index to make it easier to work with
    df['Date'] = df['Date'].dt.date  # Convert to date type if needed
    print("Initial DataFrame head:\n", df.head())  # Print the first few rows
    print("Initial DataFrame shape:", df.shape)  # Print the shape of the DataFrame
    return df

# Indicator Calculation
def calculate_indicators(df):
    # Exponential Moving Averages (EMAs)
    df['EMA8'] = EMAIndicator(df['Close'], window=8).ema_indicator()
    df['EMA14'] = EMAIndicator(df['Close'], window=14).ema_indicator()
    df['EMA50'] = EMAIndicator(df['Close'], window=50).ema_indicator()
    
    # Print shapes to debug
    print("EMA8 shape:", df['EMA8'].shape)
    print("EMA14 shape:", df['EMA14'].shape)
    print("EMA50 shape:", df['EMA50'].shape)
    
    # Relative Strength Index (RSI)
    df['RSI14'] = RSIIndicator(df['Close'], window=14).rsi()
    print("RSI14 shape:", df['RSI14'].shape)
    
    # Stochastic RSI
    stoch_rsi = StochasticOscillator(df['Close'], window=14, smooth_window=3)
    df['StochRSI_K'] = stoch_rsi.stoch()
    df['StochRSI_D'] = stoch_rsi.stoch_signal()
    
    # Print shapes
    print("StochRSI_K shape:", df['StochRSI_K'].shape)
    print("StochRSI_D shape:", df['StochRSI_D'].shape)
    
    # Average True Range (ATR)
    atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=14)
    df['ATR14'] = atr.average_true_range()
    print("ATR14 shape:", df['ATR14'].shape)

    # Drop rows with NaN values
    df.dropna(inplace=True)
    
    return df

# Entry Conditions
def signal_generator(df):
    df['Long'] = (
        (df['Close'] > df['EMA8']) &
        (df['EMA8'] > df['EMA14']) &
        (df['EMA14'] > df['EMA50']) &
        (df['StochRSI_K'] > df['StochRSI_D'])
    )
    
    df['Short'] = (
        (df['Close'] < df['EMA8']) &
        (df['EMA8'] < df['EMA14']) &
        (df['EMA14'] < df['EMA50']) &
        (df['StochRSI_K'] < df['StochRSI_D'])
    )
    
    return df

# Position Management
def backtest_strategy(df):
    position = None
    entry_price = 0
    results = []
    
    for index, row in df.iterrows():
        if position is None:  # No open position
            if row['Long']:
                position = 'long'
                entry_price = row['Close']
                stop_loss = entry_price - (3 * row['ATR14'])
                take_profit = entry_price + (2 * row['ATR14'])
                
            elif row['Short']:
                position = 'short'
                entry_price = row['Close']
                stop_loss = entry_price + (3 * row['ATR14'])
                take_profit = entry_price - (2 * row['ATR14'])
                
        elif position == 'long':
            if row['Close'] >= take_profit or row['Close'] <= stop_loss:
                results.append(row['Close'] - entry_price)  # Profit or loss
                position = None  # Close position
            
        elif position == 'short':
            if row['Close'] <= take_profit or row['Close'] >= stop_loss:
                results.append(entry_price - row['Close'])  # Profit or loss
                position = None  # Close position
                
    return results

# Performance Metrics
def calculate_performance(results, df):
    df['Daily_Returns'] = df['Close'].pct_change()
    df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0)  # Align with dates
    
    df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
    df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()
    
    return df

# Visualization
def plot_performance(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
    plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
    plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Returns')
    plt.legend()
    plt.grid()
    plt.show()

# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'

data = get_data(ticker, start_date, end_date)

# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)

# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)

Answer 1

这是因为您有一个多索引数据集。

你可以使用这样的东西：

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange

# Data Retrieval
def get_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    df.reset_index(inplace=True)  # Reset the index to make it easier to work with
    df['Date'] = df['Date'].dt.date  # Convert to date type if needed
    print("Initial DataFrame head:\n", df.head())  # Print the first few rows
    print("Initial DataFrame shape:", df.shape)  # Print the shape of the DataFrame
    return df

# Indicator Calculation
def calculate_indicators(df):
    # Exponential Moving Averages (EMAs)
    df['EMA8'] = EMAIndicator(data['Close'][ticker], window=8).ema_indicator()
    df['EMA14'] = EMAIndicator(data['Close'][ticker], window=14).ema_indicator()
    df['EMA50'] = EMAIndicator(data['Close'][ticker], window=50).ema_indicator()
    
    # Print shapes to debug
    print("EMA8 shape:", df['EMA8'].shape)
    print("EMA14 shape:", df['EMA14'].shape)
    print("EMA50 shape:", df['EMA50'].shape)
    
    # Relative Strength Index (RSI)
    df['RSI14'] = RSIIndicator(df['Close']['IWV'], window=14).rsi()
    print("RSI14 shape:", df['RSI14'].shape)
    
    # Stochastic RSI
    stoch_rsi = StochasticOscillator(close = df['Close'][ticker], low =df['Low'][ticker],high =df['High'][ticker],   window=14, smooth_window=3)
    df['StochRSI_K'] = stoch_rsi.stoch()
    df['StochRSI_D'] = stoch_rsi.stoch_signal()
    
    # Print shapes
    print("StochRSI_K shape:", df['StochRSI_K'].shape)
    print("StochRSI_D shape:", df['StochRSI_D'].shape)
    
    # Average True Range (ATR)
    atr = AverageTrueRange(df['High'][ticker], df['Low'][ticker], df['Close'][ticker], window=14)
    df['ATR14'] = atr.average_true_range()
    print("ATR14 shape:", df['ATR14'].shape)

    # Drop rows with NaN values
    df.dropna(inplace=True)
    
    return df

# Entry Conditions
def signal_generator(df):
    df['Long'] = (
        (df['Close'][ticker] > df['EMA8']) &
        (df['EMA8'] > df['EMA14']) &
        (df['EMA14'] > df['EMA50']) &
        (df['StochRSI_K'] > df['StochRSI_D'])
    )
    
    df['Short'] = (
        (df['Close'][ticker] < df['EMA8']) &
        (df['EMA8'] < df['EMA14']) &
        (df['EMA14'] < df['EMA50']) &
        (df['StochRSI_K'] < df['StochRSI_D'])
    )
    
    return df

# Position Management
def backtest_strategy(df):
    position = None
    entry_price = 0
    results = []
    
    for index, row in df.iterrows():
        if position is None:  # No open position
            if row['Long'][0]:
                position = 'long'
                entry_price = row['Close'][0]
                stop_loss = entry_price - (3 * row['ATR14'][0])
                take_profit = entry_price + (2 * row['ATR14'][0])
                
            elif row['Short'][0]:
                position = 'short'
                entry_price = row['Close'][0]
                stop_loss = entry_price + (3 * row['ATR14'][0])
                take_profit = entry_price - (2 * row['ATR14'][0])
                
        elif position == 'long':
            if row['Close'][0] >= take_profit or row['Close'][0] <= stop_loss:
                results.append(row['Close'][0] - entry_price)  # Profit or loss
                position = None  # Close position
            
        elif position == 'short':
            if row['Close'][0] <= take_profit or row['Close'][0] >= stop_loss:
                results.append(entry_price - row['Close'][0])  # Profit or loss
                position = None  # Close position
                
    return results

# Performance Metrics
def calculate_performance(results, df):
    df['Daily_Returns'] = df['Close'].pct_change()
    df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0)  # Align with dates
    
    df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
    df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()
    
    return df

# Visualization
def plot_performance(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
    plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
    plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Returns')
    plt.legend()
    plt.grid()
    plt.show()

# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'

data = get_data(ticker, start_date, end_date)

# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)

# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)

输出：

Initial DataFrame head:
 Price         Date   Adj Close       Close        High         Low  \
Ticker                     IWV         IWV         IWV         IWV   
0       2020-01-02  178.028717  189.990005  190.000000  188.740005   
1       2020-01-03  176.848022  188.729996  189.380005  187.850006   
2       2020-01-06  177.541443  189.470001  189.470001  187.630005   
3       2020-01-07  177.044800  188.940002  189.350006  188.649994   
4       2020-01-08  177.944336  189.899994  190.649994  189.009995   

Price         Open  Volume  
Ticker         IWV     IWV  
0       189.490005  206500  
1       187.850006  286500  
2       187.630005  205300  
3       189.179993  185000  
4       189.080002  174700  
Initial DataFrame shape: (1006, 7)
Data after retrieval:
 Price         Date   Adj Close       Close        High         Low  \
Ticker                     IWV         IWV         IWV         IWV   
0       2020-01-02  178.028717  189.990005  190.000000  188.740005   
1       2020-01-03  176.848022  188.729996  189.380005  187.850006   
2       2020-01-06  177.541443  189.470001  189.470001  187.630005   
3       2020-01-07  177.044800  188.940002  189.350006  188.649994   
4       2020-01-08  177.944336  189.899994  190.649994  189.009995   

Price         Open  Volume  
Ticker         IWV     IWV  
0       189.490005  206500  
1       187.850006  286500  
2       187.630005  205300  
3       189.179993  185000  
4       189.080002  174700  
Data shape after retrieval: (1006, 7)
EMA8 shape: (1006,)
EMA14 shape: (1006,)
EMA50 shape: (1006,)
RSI14 shape: (1006,)
StochRSI_K shape: (1006,)
StochRSI_D shape: (1006,)
ATR14 shape: (1006,)

图：

Python 回测数据框对齐问题

问题描述投票：0回答：1

1个回答

最新问题

Python 回测数据框对齐问题

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1