#################################################
# Nikkei SVM Trading Strategy
#################################################
# clear environment
rm(list = ls())
# require libraries
library(xts)
library(ggplot2)
library(grid)
library(gridExtra)
library(dplyr)
library(lazyeval)
library(zoo)
library(tidyquant)
library(rdrop2)
library(httpuv)
library(caret)
library(kernlab)
# load data from Bloomberg consisting of daily prices, 30D RSI and Volume saved in RData file
load("daily_data.RData")
# feature engineering is conducted here
input <- xts()
input$cur_ret <- ROC(prices$PX_LAST)
input$ret1 <- lag(ROC(prices$PX_LAST),1)
input$ret2 <- lag(ROC(prices$PX_LAST),2)
input$ret3 <- lag(ROC(prices$PX_LAST),3)
input$ret4 <- lag(ROC(prices$PX_LAST),4)
input$ret5 <- lag(ROC(prices$PX_LAST),5)
input$rsi <- prices$RSI_30D
input$volume <- prices$PX_VOLUME
input$vol3d <- TTR::volatility(prices$PX_LAST, n=3, N=252)
input$vol5d <- TTR::volatility(prices$PX_LAST, n=5, N=252)
input$vol10d <- TTR::volatility(prices$PX_LAST, n=10, N=252)
input$vol15d <- TTR::volatility(prices$PX_LAST, n=15, N=252)
input$vol20d <- TTR::volatility(prices$PX_LAST, n=20, N=252)
input$vnp1 <- lag(prices$PX_LAST,1) / input$vol20d
input$vnp2 <- lag(prices$PX_LAST,2) / input$vol20d
input$vnp3 <- lag(prices$PX_LAST,3) / input$vol20d
input$vnp4 <- lag(prices$PX_LAST,4) / input$vol20d
input$vnp5 <- lag(prices$PX_LAST,5) / input$vol20d
input$sma10 <- SMA(prices$PX_LAST, n=10)
input$sma50 <- SMA(prices$PX_LAST, n=50)
input$sma100 <- SMA(prices$PX_LAST, n=100)
# clean data and slice timeframe needed
input <- na.omit(input)
input <- input["2013-09-15/"] # select exactly fours years of data
input_original <- input
regime <- input$cur_ret
# select training and testing data
input$cur_ret <- ifelse(input$cur_ret>0,1,-1)
training <- input[1:as.integer(nrow(input)*0.75),]
testing <- input[(nrow(training)+1):nrow(input),]
training_df <- as.data.frame(training)
testing_df <- as.data.frame(testing)
training_predictor <- training_df[, -1]
testing_predictor <- testing_df[, -1]
training_target <- as.factor(training_df$cur_ret)
testing_target <- as.factor(testing_df$cur_ret)
# plot pca variance
pca <- prcomp(training_predictor, scale. = T)
prop_var <- data.frame(num=0:20,var=c(0,summary(pca)$importance[3,]))
p <- ggplot(data = prop_var) +
geom_line(aes(x=num, y=var), color = "blue", size = 1) +
geom_vline(xintercept = 6, linetype = "dashed", size = 1) +
scale_x_continuous(breaks = seq(0,20,2), expand = c(0,0)) +
scale_y_continuous(breaks = c(1:10)/10, limits = c(0,1), labels = scales::percent, expand = c(0,0)) +
scale_color_manual(values = c("Change"="black")) +
theme_tq() +
labs(x="Number of PCA Axes", y = "Cumulative Variance (%)", title = "Proportion of Variance Explained By Varying Number of PCA Factors") +
theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5, face = "bold"), legend.position = "none")
# insert footnote
grid.newpage()
footnote <- paste("Computations: Golden Compass Quant", sep = "")
g <- arrangeGrob(p, bottom = textGrob(footnote, x = 0, hjust = -0.1, vjust = 0.2, gp = gpar(fontface = "italic", fontsize = 9)))
grid.draw(g)
ggsave(paste("proportion of variance.png", sep = ""),plot = g, width = 8, height = 6, dpi = 300)
# fit SVM
preProc <- preProcess(training_predictor, method="pca", pcaComp = 6)
trainPC <- predict(preProc, training_predictor)
modelFit <- train(x=trainPC, y=training_target, method = "svmPoly")
testPC <- predict(preProc, testing_predictor)
prediction <- predict(modelFit, testPC)
confusionMatrix(testing_target, predict(modelFit, testPC))
# visualizing svm using the same method
test <- trainPC
test$ret <- training_target
m <- ksvm(ret~., data = test, kernel = "polydot", kpar = list(degree = 3, scale = 0.1, offset = 1 ))
plot(m, data=test, grid = 50, slice=c("PC3"=0,"PC4"=0,"PC5"=0,"PC6"=0)) # visualize PC1 and PC2
# backtest
bt <- regime[(nrow(training)+1):nrow(input),]
bt$pred <- as.numeric(levels(prediction))[prediction]
colnames(bt) <- c("actual", "prediction")
bt$cumret <- cumprod((bt$actual * bt$prediction) + 1)
bt$ret <- bt$actual * bt$prediction
# compute drawdowns
bt$dd <- cummax(bt$cumret)
bt$dd <- (bt$cumret - bt$dd) / bt$dd
# add price series
bt$price <- prices["2016-09-20/"]$PX_LAST
# add entry and exit markers
bt$delta <- diff(bt$prediction)
bt$delta[1] <- 0
bt$ent <- ifelse(bt$delta==2,bt$price,NA)
bt$ext <- ifelse(bt$delta==-2,bt$price,NA)
# plot equity curve and drawdowns
p1 <- ggplot(data = bt) +
geom_line(aes(x= index(bt), y=price), color = "black", size = 1) +
geom_point(aes(x= index(bt), y=ent), colour = "green", shape = 2) +
geom_point(aes(x= index(bt), y=ext), colour = "red", shape = 6) +
scale_x_datetime(expand = c(0,0)) +
scale_y_continuous(limits = c(min(bt$price)*.9, max(bt$price)*1.1), expand = c(0,0)) +
theme_tq() +
labs(x="", y="", title = paste("Price Series and Entry/Exit Points", sep = "")) +
theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5, face = "bold"), legend.position = "right")
p2 <- ggplot(data = bt) +
geom_line(aes(x= index(bt), y=prediction), color = "blue") +
scale_x_datetime(expand = c(0,0)) +
scale_y_continuous(limits = c(-1.2,1.2), breaks = c(-1,0,1), expand = c(0,0)) +
theme_tq() +
labs(x="", y="", title = paste("Positions", sep = "")) +
theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5, face = "bold"), legend.position = "right")
p3 <- ggplot(data = bt) +
geom_line(aes(x= index(bt), y=cumret), color = "darkgreen", size = 1) +
scale_x_datetime(expand = c(0,0)) +
scale_y_continuous(limits = c(0.9,1.4), expand = c(0,0)) +
geom_hline(yintercept = 1, linetype = "dashed", size=1) +
theme_tq() +
labs(x="", y="", title = paste("Cumulative Equity Holdings", sep = "")) +
theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5, face = "bold"), legend.position = "right")
p4 <- ggplot(data = bt) +
geom_ribbon(aes(ymin=dd, ymax=0, x=index(bt)), fill="red", alpha = 0.6) +
geom_line(aes(x=index(bt), y=dd), color = "red", size = 1) +
scale_x_datetime(expand = c(0,0)) +
scale_y_continuous(limits = c(-0.1,0), labels = scales::percent, expand = c(0,0)) +
theme_tq() +
labs(x="", y="", title = paste("Drawdowns (%)", sep = "")) +
theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5, face = "bold"), legend.position = "right")
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)
g3 <- ggplotGrob(p3)
g4 <- ggplotGrob(p4)
grid.newpage()
footnote <- "Computations: Golden Compass Quant"
g <- grid.arrange(arrangeGrob(p1,p2,p3,p4, heights = c(2,1,2,1)), top = textGrob("SVM Strategy - JPX Nikkei 225 Mini Futures", gp = gpar(fontface = "bold", fontsize = 15)),
bottom = textGrob(footnote, x = 0, hjust = -0.1, vjust = 0.2, gp = gpar(fontface = "italic", fontsize = 10)))
grid.draw(g)
ggsave(paste("bt_output_svm.png", sep = ""),plot = g, width = 8, height = 12, dpi = 300)
# one-year return
last(bt$cumret) - first(bt$cumret)
# total number of trades
nrow(bt$delta[bt$delta!=0])
# expectancy
mean(bt$ret)
# extreme returns
max(bt$ret)
min(bt$ret)
# average holding time
mean(diff(index(bt$delta[bt$delta!=0])))
# win rate
success <- bt[,c("prediction","delta","cumret")]
success <- success[success$delta!=0,]
success$trade <- lag(success$prediction)
success$ret <- ROC(success$cumret)
success <- na.omit(success)
success <- success[success$prediction!=0,]
success <- success[success$ret!=0,]
length(success$ret[success$ret>0])/nrow(success)
# expectancy
mean(success$ret)
table.Drawdowns(bt$ret)
# charts.PerformanceSummary(bt$ret)
SharpeRatio(bt$ret)
좋은 글 감사드립니다.
혹시 첫번째 논문 “잘못된 라벨링” 내용중 삼중베리어로 라벨링하는 부분이 있는데 그림믈 봐도 이해가 잘 되지 않는데 설명을 부탁드려도 될는지요?상하 두영역은 알겠는데 수직영역은 어디를 의미하며,이미지에 샘플로 라벨링된 영역이 왜 1,1,1 인지 정말 궁금합니다.감사합니다.