ファイナンス機械学習:バックテストの統計値 練習問題E-mini S&P500先物 ドルバー

E-mini S&P 500 の先物ドルバーを用いて、統計値をとる。

import numpy as np
import pandas as pd
from datetime import datetime,timedelta, date
import Bars as bars
from scipy.stats import norm, skew, kurtosis
import matplotlib.pylab as plt
def getBetsTiming(tPos):
    '''
    Calculates the timestamps of flattening or flipping trades from target positions series.
    
    in: tPos (pd.Series) target positions
        
    Returns: (pd.Index): bets timing
    '''
    df0 = tPos[tPos == 0].index
    df1 = tPos.shift(1)
    df1 = df1[df1 != 0].index
    bets = df0.intersection(df1)    # flattening
    df0 = tPos.iloc[1:] * tPos.iloc[:-1].values
    bets = bets.union(df0[df0 < 0].index).sort_values()    # tPos flips
    if tPos.index[-1] not in bets:
        bets = bets.append(tPos.index[-1:])    # last bet
    return bets

def getHoldingPeriod(tPos):
    '''
    Derives average holding period (in days) using average entry time pairing algo.
    
    in:tPos (pd.Series) target positions
        
    Returns: (float): holding period
    '''
    hp, tEntry = pd.DataFrame(columns=['dT', 'w']), .0
    pDiff, tDiff = tPos.diff(), (tPos.index - tPos.index[0]) / np.timedelta64(1, 'D')
    for i in range(1, tPos.shape[0]):
        if pDiff.iloc[i] * tPos.iloc[i - 1] >= 0:    # increased or unchanged
            if tPos.iloc[i] != 0:
                tEntry = (tEntry * tPos.iloc[i - 1] + tDiff[i] * pDiff.iloc[i]) / tPos.iloc[i]
        else:    # decreased
            if tPos.iloc[i] * tPos.iloc[i-1] < 0:    # flip
                hp.loc[tPos.index[i], ['dT', 'w']] = (tDiff[i] - tEntry, abs(tPos.iloc[i - 1]))
                tEntry = tDiff[i]    # reset entry time
            else:
                hp.loc[tPos.index[i], ['dT', 'w']] = (tDiff[i] - tEntry, abs(pDiff.iloc[i]))
    if hp['w'].sum() > 0:
        hp = (hp['dT'] * hp['w']).sum() / hp['w'].sum()
    else:
        hp = np.nan
    return hp

def getHHI(betRet):
    
    if betRet.shape[0] <= 2:
        return np.nan
    wght = betRet / betRet.sum()
    hhi = (wght ** 2).sum()
    hhi = (hhi - betRet.shape[0] ** (-1)) / (1.0 - betRet.shape[0] ** (-1))
    return hhi

def computeDD_TuW(series, dollars= False):
    '''
     Computes series of drawdowns and the time under water associated with them.
    
    in: series : series with either returns (dollars=False) or dollar performance (dollar=True)
        dollars : indicator charachterizing series
        
    Returns:dd : drawdown series
            tuw : time under water series
    '''
    df0 = series.to_frame('pnl')
    df0['hwm'] = series.expanding().max()
    df1 = df0.groupby('hwm').min().reset_index()
    df1.columns = ['hwm', 'min']
    df1.index = df0['hwm'].drop_duplicates(keep='first').index    # time of hwm
    df1 = df1[df1['hwm'] > df1['min']]    # hwm followed by a drawdown
    if dollars:
        dd = df1['hwm'] - df1['min']
    else:
        dd = 1 - df1['min'] / df1['hwm']
    tuw = ((df1.index[1:] - df1.index[:-1]) / np.timedelta64(1, 'D') ).values    # in days
    #tuw = ((df1.index[1:] - df1.index[:-1]) ).values 
    tuw = pd.Series(tuw, index=df1.index[:-1])
    return dd, tuw

def getExpectedMaxSR(nTrials, meanSR, stdSR):
    #Expected max SR, controlling for SBuMT
    emc = 0.477215664901532860606512090082402431042159336 #Euler-Mascheronis Constant
    sr0 = (1-emc)*norm.ppf(1-1./nTrials)+emc*norm.ppf(1-(nTrials*np.e)**-1)
    sr0 = meanSR + stdSR*sr0
    return sr0
def getProbSR(sr, sr_, t,skew=0, kurt=3):
    z=(sr-sr_)*((t-1)**.5)
    z/=(1-skew*sr+(kurt-1)/4.*sr**2)**.5
    return(ss.norm.cdf(z))
def getDeflatedSR(sr, t, Emaxsr, skew=0, kurt=3):
    return getProbSR(sr=sr,sr_=Emaxsr,t=t,skew=skew,kurt=kurt)

def EXE14_2(ret):
    import PSRDSR 
    #ret=Dbar.Close.pct_change().dropna()
    skewness = skew(ret)
    kurt = kurtosis(ret)
    sr = ret.mean() / ret.std()
    var,K,gamma = 0.5, 100   
    HHIPos=getHHI(ret[ret>=0])
    HHINeg=getHHI(ret[ret<0])
    HHI_H=getHHI(ret.groupby(pd.Grouper(freq='H')).count())
    dd, tuw = computeDD_TuW(ret.dropna(),False)
    dd95=np.quantile(dd,0.95)
    tuw95=np.percentile(tuw,95)
    yi=(Dbar.index[-1]-Dbar.index[0]).days/365.
    AnnAvrRet = (ret + 1).prod() ** (-yi) - 1
    AnnSR=sr*np.sqrt(250)
    ExpMaxSR=getExpectedMaxSR(K,sr,V**.5)
    PSR=getProbSR(sr,0,len(ret),skewness,kurt)
    DSR=getDeflatedSR(sr,len(ret),ExpMaxSR,skewness,kurt)
    print(f' HHIPos:{np.round(HHIPos,5)}')
    print(f' HHINeg:{np.round(HHINeg,5)}')
    print(f' HHI/hour:{np.round(HHI_H,5)}' )
    print(f' DD 95th:{np.round(dd95,3)}')
    print(f' TuW 95th:{np.round(tuw95,1)} days')
    print(f' Annualized Average Return:{np.round(AnnAvrRet,3)}')
    print(f' SR:{np.round(sr,3)}')
    print(f' Annualezed SR:{np.round(AnnSR,3)}')
    print(f' PSR:{np.round(PSR,3)}')
    print(f' ExpMaxSR:{np.round(EmaxSR,3)}')
    print(f' DSR:{np.round(DSR,3)} ')
SP_data = pd.read_csv('SP.csv')
SP_data = SP_data[SP_data['volume'] > 0]
SP_data['datetime'] = SP_data['date'] + "/" + SP_data['time']
SP_data['datetime'] = SP_data['datetime'].apply(lambda dt: datetime.strptime(dt, '%m/%d/%Y/%H:%M:%S.%f'))
SP_data=SP_data[SP_data['datetime'] >= datetime(2009, 1, 1)]

SP_data.index=SP_data['datetime']
SP_data.drop(['date','time'],axis=1, inplace=True)
SP_data.drop(['datetime'],axis=1, inplace=True)
SP_data.volume=SP_data.groupby(SP_data.index).volume.sum()
SP_data = SP_data[~SP_data.index.duplicated(keep='first')]
dv=100_000
Dbar=bars.getDollarBars(SP_data,dv)
ret=Dbar.Close.pct_change().dropna()
EXE14_2(ret)
S&P500

いいなと思ったら応援しよう!