ファイナンス機械学習:バックテストの統計値 練習問題E-mini S&P500先物 ドルバー
E-mini S&P 500 の先物ドルバーを用いて、統計値をとる。
import numpy as np
import pandas as pd
from datetime import datetime,timedelta, date
import Bars as bars
from scipy.stats import norm, skew, kurtosis
import matplotlib.pylab as plt
def getBetsTiming(tPos):
'''
Calculates the timestamps of flattening or flipping trades from target positions series.
in: tPos (pd.Series) target positions
Returns: (pd.Index): bets timing
'''
df0 = tPos[tPos == 0].index
df1 = tPos.shift(1)
df1 = df1[df1 != 0].index
bets = df0.intersection(df1) # flattening
df0 = tPos.iloc[1:] * tPos.iloc[:-1].values
bets = bets.union(df0[df0 < 0].index).sort_values() # tPos flips
if tPos.index[-1] not in bets:
bets = bets.append(tPos.index[-1:]) # last bet
return bets
def getHoldingPeriod(tPos):
'''
Derives average holding period (in days) using average entry time pairing algo.
in:tPos (pd.Series) target positions
Returns: (float): holding period
'''
hp, tEntry = pd.DataFrame(columns=['dT', 'w']), .0
pDiff, tDiff = tPos.diff(), (tPos.index - tPos.index[0]) / np.timedelta64(1, 'D')
for i in range(1, tPos.shape[0]):
if pDiff.iloc[i] * tPos.iloc[i - 1] >= 0: # increased or unchanged
if tPos.iloc[i] != 0:
tEntry = (tEntry * tPos.iloc[i - 1] + tDiff[i] * pDiff.iloc[i]) / tPos.iloc[i]
else: # decreased
if tPos.iloc[i] * tPos.iloc[i-1] < 0: # flip
hp.loc[tPos.index[i], ['dT', 'w']] = (tDiff[i] - tEntry, abs(tPos.iloc[i - 1]))
tEntry = tDiff[i] # reset entry time
else:
hp.loc[tPos.index[i], ['dT', 'w']] = (tDiff[i] - tEntry, abs(pDiff.iloc[i]))
if hp['w'].sum() > 0:
hp = (hp['dT'] * hp['w']).sum() / hp['w'].sum()
else:
hp = np.nan
return hp
def getHHI(betRet):
if betRet.shape[0] <= 2:
return np.nan
wght = betRet / betRet.sum()
hhi = (wght ** 2).sum()
hhi = (hhi - betRet.shape[0] ** (-1)) / (1.0 - betRet.shape[0] ** (-1))
return hhi
def computeDD_TuW(series, dollars= False):
'''
Computes series of drawdowns and the time under water associated with them.
in: series : series with either returns (dollars=False) or dollar performance (dollar=True)
dollars : indicator charachterizing series
Returns:dd : drawdown series
tuw : time under water series
'''
df0 = series.to_frame('pnl')
df0['hwm'] = series.expanding().max()
df1 = df0.groupby('hwm').min().reset_index()
df1.columns = ['hwm', 'min']
df1.index = df0['hwm'].drop_duplicates(keep='first').index # time of hwm
df1 = df1[df1['hwm'] > df1['min']] # hwm followed by a drawdown
if dollars:
dd = df1['hwm'] - df1['min']
else:
dd = 1 - df1['min'] / df1['hwm']
tuw = ((df1.index[1:] - df1.index[:-1]) / np.timedelta64(1, 'D') ).values # in days
#tuw = ((df1.index[1:] - df1.index[:-1]) ).values
tuw = pd.Series(tuw, index=df1.index[:-1])
return dd, tuw
def getExpectedMaxSR(nTrials, meanSR, stdSR):
#Expected max SR, controlling for SBuMT
emc = 0.477215664901532860606512090082402431042159336 #Euler-Mascheronis Constant
sr0 = (1-emc)*norm.ppf(1-1./nTrials)+emc*norm.ppf(1-(nTrials*np.e)**-1)
sr0 = meanSR + stdSR*sr0
return sr0
def getProbSR(sr, sr_, t,skew=0, kurt=3):
z=(sr-sr_)*((t-1)**.5)
z/=(1-skew*sr+(kurt-1)/4.*sr**2)**.5
return(ss.norm.cdf(z))
def getDeflatedSR(sr, t, Emaxsr, skew=0, kurt=3):
return getProbSR(sr=sr,sr_=Emaxsr,t=t,skew=skew,kurt=kurt)
def EXE14_2(ret):
import PSRDSR
#ret=Dbar.Close.pct_change().dropna()
skewness = skew(ret)
kurt = kurtosis(ret)
sr = ret.mean() / ret.std()
var,K,gamma = 0.5, 100
HHIPos=getHHI(ret[ret>=0])
HHINeg=getHHI(ret[ret<0])
HHI_H=getHHI(ret.groupby(pd.Grouper(freq='H')).count())
dd, tuw = computeDD_TuW(ret.dropna(),False)
dd95=np.quantile(dd,0.95)
tuw95=np.percentile(tuw,95)
yi=(Dbar.index[-1]-Dbar.index[0]).days/365.
AnnAvrRet = (ret + 1).prod() ** (-yi) - 1
AnnSR=sr*np.sqrt(250)
ExpMaxSR=getExpectedMaxSR(K,sr,V**.5)
PSR=getProbSR(sr,0,len(ret),skewness,kurt)
DSR=getDeflatedSR(sr,len(ret),ExpMaxSR,skewness,kurt)
print(f' HHIPos:{np.round(HHIPos,5)}')
print(f' HHINeg:{np.round(HHINeg,5)}')
print(f' HHI/hour:{np.round(HHI_H,5)}' )
print(f' DD 95th:{np.round(dd95,3)}')
print(f' TuW 95th:{np.round(tuw95,1)} days')
print(f' Annualized Average Return:{np.round(AnnAvrRet,3)}')
print(f' SR:{np.round(sr,3)}')
print(f' Annualezed SR:{np.round(AnnSR,3)}')
print(f' PSR:{np.round(PSR,3)}')
print(f' ExpMaxSR:{np.round(EmaxSR,3)}')
print(f' DSR:{np.round(DSR,3)} ')
SP_data = pd.read_csv('SP.csv')
SP_data = SP_data[SP_data['volume'] > 0]
SP_data['datetime'] = SP_data['date'] + "/" + SP_data['time']
SP_data['datetime'] = SP_data['datetime'].apply(lambda dt: datetime.strptime(dt, '%m/%d/%Y/%H:%M:%S.%f'))
SP_data=SP_data[SP_data['datetime'] >= datetime(2009, 1, 1)]
SP_data.index=SP_data['datetime']
SP_data.drop(['date','time'],axis=1, inplace=True)
SP_data.drop(['datetime'],axis=1, inplace=True)
SP_data.volume=SP_data.groupby(SP_data.index).volume.sum()
SP_data = SP_data[~SP_data.index.duplicated(keep='first')]
dv=100_000
Dbar=bars.getDollarBars(SP_data,dv)
ret=Dbar.Close.pct_change().dropna()
EXE14_2(ret)