Python 回测数据框对齐问题

问题描述 投票:0回答:1

我正在尝试开发代码来回测我的策略,但遇到了 ChatGPT 无法解决的错误。请尝试在 Google Colab 上自行运行代码,看看是否可以运行。

错误是: ValueError:数据必须是一维的,而是得到形状为 (1006, 1) 的 ndarray

我已经尝试了 ChatGPT 所要求的一切,但仍然无法正常工作。

这是代码:

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange

# Data Retrieval
def get_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    df.reset_index(inplace=True)  # Reset the index to make it easier to work with
    df['Date'] = df['Date'].dt.date  # Convert to date type if needed
    print("Initial DataFrame head:\n", df.head())  # Print the first few rows
    print("Initial DataFrame shape:", df.shape)  # Print the shape of the DataFrame
    return df

# Indicator Calculation
def calculate_indicators(df):
    # Exponential Moving Averages (EMAs)
    df['EMA8'] = EMAIndicator(df['Close'], window=8).ema_indicator()
    df['EMA14'] = EMAIndicator(df['Close'], window=14).ema_indicator()
    df['EMA50'] = EMAIndicator(df['Close'], window=50).ema_indicator()
    
    # Print shapes to debug
    print("EMA8 shape:", df['EMA8'].shape)
    print("EMA14 shape:", df['EMA14'].shape)
    print("EMA50 shape:", df['EMA50'].shape)
    
    # Relative Strength Index (RSI)
    df['RSI14'] = RSIIndicator(df['Close'], window=14).rsi()
    print("RSI14 shape:", df['RSI14'].shape)
    
    # Stochastic RSI
    stoch_rsi = StochasticOscillator(df['Close'], window=14, smooth_window=3)
    df['StochRSI_K'] = stoch_rsi.stoch()
    df['StochRSI_D'] = stoch_rsi.stoch_signal()
    
    # Print shapes
    print("StochRSI_K shape:", df['StochRSI_K'].shape)
    print("StochRSI_D shape:", df['StochRSI_D'].shape)
    
    # Average True Range (ATR)
    atr = AverageTrueRange(df['High'], df['Low'], df['Close'], window=14)
    df['ATR14'] = atr.average_true_range()
    print("ATR14 shape:", df['ATR14'].shape)

    # Drop rows with NaN values
    df.dropna(inplace=True)
    
    return df

# Entry Conditions
def signal_generator(df):
    df['Long'] = (
        (df['Close'] > df['EMA8']) &
        (df['EMA8'] > df['EMA14']) &
        (df['EMA14'] > df['EMA50']) &
        (df['StochRSI_K'] > df['StochRSI_D'])
    )
    
    df['Short'] = (
        (df['Close'] < df['EMA8']) &
        (df['EMA8'] < df['EMA14']) &
        (df['EMA14'] < df['EMA50']) &
        (df['StochRSI_K'] < df['StochRSI_D'])
    )
    
    return df

# Position Management
def backtest_strategy(df):
    position = None
    entry_price = 0
    results = []
    
    for index, row in df.iterrows():
        if position is None:  # No open position
            if row['Long']:
                position = 'long'
                entry_price = row['Close']
                stop_loss = entry_price - (3 * row['ATR14'])
                take_profit = entry_price + (2 * row['ATR14'])
                
            elif row['Short']:
                position = 'short'
                entry_price = row['Close']
                stop_loss = entry_price + (3 * row['ATR14'])
                take_profit = entry_price - (2 * row['ATR14'])
                
        elif position == 'long':
            if row['Close'] >= take_profit or row['Close'] <= stop_loss:
                results.append(row['Close'] - entry_price)  # Profit or loss
                position = None  # Close position
            
        elif position == 'short':
            if row['Close'] <= take_profit or row['Close'] >= stop_loss:
                results.append(entry_price - row['Close'])  # Profit or loss
                position = None  # Close position
                
    return results

# Performance Metrics
def calculate_performance(results, df):
    df['Daily_Returns'] = df['Close'].pct_change()
    df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0)  # Align with dates
    
    df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
    df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()
    
    return df

# Visualization
def plot_performance(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
    plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
    plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Returns')
    plt.legend()
    plt.grid()
    plt.show()

# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'

data = get_data(ticker, start_date, end_date)

# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)

# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)
python dataframe quantitative-finance back-testing
1个回答
0
投票

这是因为您有一个多索引数据集。

你可以使用这样的东西:

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ta.trend import EMAIndicator
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.volatility import AverageTrueRange

# Data Retrieval
def get_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end)
    df.reset_index(inplace=True)  # Reset the index to make it easier to work with
    df['Date'] = df['Date'].dt.date  # Convert to date type if needed
    print("Initial DataFrame head:\n", df.head())  # Print the first few rows
    print("Initial DataFrame shape:", df.shape)  # Print the shape of the DataFrame
    return df

# Indicator Calculation
def calculate_indicators(df):
    # Exponential Moving Averages (EMAs)
    df['EMA8'] = EMAIndicator(data['Close'][ticker], window=8).ema_indicator()
    df['EMA14'] = EMAIndicator(data['Close'][ticker], window=14).ema_indicator()
    df['EMA50'] = EMAIndicator(data['Close'][ticker], window=50).ema_indicator()
    
    # Print shapes to debug
    print("EMA8 shape:", df['EMA8'].shape)
    print("EMA14 shape:", df['EMA14'].shape)
    print("EMA50 shape:", df['EMA50'].shape)
    
    # Relative Strength Index (RSI)
    df['RSI14'] = RSIIndicator(df['Close']['IWV'], window=14).rsi()
    print("RSI14 shape:", df['RSI14'].shape)
    
    # Stochastic RSI
    stoch_rsi = StochasticOscillator(close = df['Close'][ticker], low =df['Low'][ticker],high =df['High'][ticker],   window=14, smooth_window=3)
    df['StochRSI_K'] = stoch_rsi.stoch()
    df['StochRSI_D'] = stoch_rsi.stoch_signal()
    
    # Print shapes
    print("StochRSI_K shape:", df['StochRSI_K'].shape)
    print("StochRSI_D shape:", df['StochRSI_D'].shape)
    
    # Average True Range (ATR)
    atr = AverageTrueRange(df['High'][ticker], df['Low'][ticker], df['Close'][ticker], window=14)
    df['ATR14'] = atr.average_true_range()
    print("ATR14 shape:", df['ATR14'].shape)

    # Drop rows with NaN values
    df.dropna(inplace=True)
    
    return df

# Entry Conditions
def signal_generator(df):
    df['Long'] = (
        (df['Close'][ticker] > df['EMA8']) &
        (df['EMA8'] > df['EMA14']) &
        (df['EMA14'] > df['EMA50']) &
        (df['StochRSI_K'] > df['StochRSI_D'])
    )
    
    df['Short'] = (
        (df['Close'][ticker] < df['EMA8']) &
        (df['EMA8'] < df['EMA14']) &
        (df['EMA14'] < df['EMA50']) &
        (df['StochRSI_K'] < df['StochRSI_D'])
    )
    
    return df

# Position Management
def backtest_strategy(df):
    position = None
    entry_price = 0
    results = []
    
    for index, row in df.iterrows():
        if position is None:  # No open position
            if row['Long'][0]:
                position = 'long'
                entry_price = row['Close'][0]
                stop_loss = entry_price - (3 * row['ATR14'][0])
                take_profit = entry_price + (2 * row['ATR14'][0])
                
            elif row['Short'][0]:
                position = 'short'
                entry_price = row['Close'][0]
                stop_loss = entry_price + (3 * row['ATR14'][0])
                take_profit = entry_price - (2 * row['ATR14'][0])
                
        elif position == 'long':
            if row['Close'][0] >= take_profit or row['Close'][0] <= stop_loss:
                results.append(row['Close'][0] - entry_price)  # Profit or loss
                position = None  # Close position
            
        elif position == 'short':
            if row['Close'][0] <= take_profit or row['Close'][0] >= stop_loss:
                results.append(entry_price - row['Close'][0])  # Profit or loss
                position = None  # Close position
                
    return results

# Performance Metrics
def calculate_performance(results, df):
    df['Daily_Returns'] = df['Close'].pct_change()
    df['Strategy_Returns'] = pd.Series(results).shift(1).fillna(0)  # Align with dates
    
    df['Cumulative_Strategy'] = (1 + df['Strategy_Returns']).cumprod()
    df['Cumulative_Buy_Hold'] = (1 + df['Daily_Returns']).cumprod()
    
    return df

# Visualization
def plot_performance(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Cumulative_Strategy'], label='Strategy Returns', color='blue')
    plt.plot(df['Cumulative_Buy_Hold'], label='Buy and Hold Returns', color='orange')
    plt.title('Cumulative Returns: Strategy vs. Buy and Hold')
    plt.xlabel('Date')
    plt.ylabel('Cumulative Returns')
    plt.legend()
    plt.grid()
    plt.show()

# Main Execution
ticker = 'IWV'
start_date = '2020-01-01'
end_date = '2024-01-01'

data = get_data(ticker, start_date, end_date)

# Check the DataFrame contents before calculating indicators
print("Data after retrieval:\n", data.head())
print("Data shape after retrieval:", data.shape)

# Now attempt to calculate indicators
data = calculate_indicators(data)
data = signal_generator(data)
results = backtest_strategy(data)
data = calculate_performance(results, data)
plot_performance(data)

输出:

Initial DataFrame head:
 Price         Date   Adj Close       Close        High         Low  \
Ticker                     IWV         IWV         IWV         IWV   
0       2020-01-02  178.028717  189.990005  190.000000  188.740005   
1       2020-01-03  176.848022  188.729996  189.380005  187.850006   
2       2020-01-06  177.541443  189.470001  189.470001  187.630005   
3       2020-01-07  177.044800  188.940002  189.350006  188.649994   
4       2020-01-08  177.944336  189.899994  190.649994  189.009995   

Price         Open  Volume  
Ticker         IWV     IWV  
0       189.490005  206500  
1       187.850006  286500  
2       187.630005  205300  
3       189.179993  185000  
4       189.080002  174700  
Initial DataFrame shape: (1006, 7)
Data after retrieval:
 Price         Date   Adj Close       Close        High         Low  \
Ticker                     IWV         IWV         IWV         IWV   
0       2020-01-02  178.028717  189.990005  190.000000  188.740005   
1       2020-01-03  176.848022  188.729996  189.380005  187.850006   
2       2020-01-06  177.541443  189.470001  189.470001  187.630005   
3       2020-01-07  177.044800  188.940002  189.350006  188.649994   
4       2020-01-08  177.944336  189.899994  190.649994  189.009995   

Price         Open  Volume  
Ticker         IWV     IWV  
0       189.490005  206500  
1       187.850006  286500  
2       187.630005  205300  
3       189.179993  185000  
4       189.080002  174700  
Data shape after retrieval: (1006, 7)
EMA8 shape: (1006,)
EMA14 shape: (1006,)
EMA50 shape: (1006,)
RSI14 shape: (1006,)
StochRSI_K shape: (1006,)
StochRSI_D shape: (1006,)
ATR14 shape: (1006,)

图: enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.