Исследование полного лога заявок
Full Order Book – «Стакан» заявок
Исторические данные, содержащие информацию о «жизни» каждой заявки и позволяющие воссоздавать «стакан» заявок на любой момент времени.
Все изменения в данных записаны с точностью до миллисекунд.
Cтакан заявок позволяет:
• Проводить исследования с высокой точностью и анализировать глубину рынка
• Тестировать и налаживать работу HFT алгоритмов
Код
library(ggplot2)
library(data.table)
library(bit64)
options(digits.secs=3)
fname<-"~/repos/DATA/"
setwd(fname)
#Header
# Received;ExchTime;OrderId;Price;Amount;AmountRest;DealId;DealPrice;OI;Flags
fname<-"G:/QSH/RTS/6.20.2020/RTS-6.20.2020-04-07.OrdLog.{1-OrdLog}.txt"
orderlog<-fread(fname,skip=3, sep=";",stringsAsFactors=FALSE, header=FALSE)# nrows=1000000)
header<-c("Received",
"ExchTime",
"OrderId",
"Price",
"Amount",
"AmountRest",
"DealId",
"DealPrice",
"OI",
"Flags")
setnames(orderlog, header)
flags<-c("NonZeroReplAct",
"SessIdChanged",
"Add",
"Fill",
"Buy",
"Sell",
"Quote",
"Counter",
"NonSystem",
"EndOfTransaction",
"FillOrKill",
"Moved",
"Canceled",
"CanceledGroup",
"CrossTrade")
orderlog[,c(flags):= lapply(c(flags), function(x) grepl(x,Flags))]
orderlog[,"Fill" := grepl("Fill,",Flags)]
dtFormat<-"%d.%m.%Y %H:%M:%OS"
orderlog[,"datetime":=as.POSIXct(strptime(ExchTime,dtFormat))]
orderlog<-orderlog[datetime>=as.POSIXct(paste(format(orderlog[.N,datetime], "%Y-%m-%d"),
"10:00:00.000"))]
olCancelled<-orderlog[,oCanceled:=sum(Canceled)>=1 |
sum(CanceledGroup)>=1 |
sum(Moved)>=1,
by=OrderId][oCanceled==TRUE]
olCancelledGr<-olCancelled[,.(.SD[Add==TRUE,datetime],
.SD[Add==TRUE,Buy],.SD[Add==TRUE,Sell],
.SD[Canceled==TRUE | CanceledGroup==TRUE | Moved==TRUE,datetime]-
.SD[Add==TRUE,datetime],.SD[Add==TRUE,Price],
.SD[Add==TRUE,Amount]),by=OrderId]
setnames(olCancelledGr,c("Id","datetime","buy","sell","lifetime", "price","volume"))
olCancelledGr[,lifetime:=as.numeric(lifetime)]
olCancelledGr[,buysell:=ifelse(buy==TRUE,"buy","sell")]
olCancelledGr[lifetime<0.01]
#' Вопросы
#' 1. В какой срок снимается большинство заявок?
olCancelledGr[,.(.N,Vol=sum(volume)),by=.(buysell,lifetime)][order(-N)][N>100000]
olCancelledGr[,.(.N,Vol=sum(volume)),by=.(buysell,lifetime)][order(-Vol)][N>100000]
olCancelledGr[,.(.N),by=.(buysell,lifetime, volume)][order(-N)][N>100000]
ggplot(olCancelledGr[,.N,
by=.(buysell,lifetime)][order(-N)][N>100000])+
geom_bar(aes(round(lifetime,3),weight=N, fill=buysell),position="dodge",width=.001)
#' 2. Как распределена активность снятия завок во времени?
olCancelledGr[lifetime<0.005,.(.N,sum(volume)),by=.(buysell,tid=format(datetime, "%H%M%S"))][order(-N)]
ggplot(data=olCancelledGr[lifetime<0.005,.(.N,Vol=sum(volume)),by=.(buysell,tid=format(datetime, "%H%M%S"))],
aes(x=tid,y=Vol,group=buysell,colour=buysell))+
geom_line()+geom_point()+scale_x_discrete(breaks = seq(100000,230000,10000))
#' 3. Какая привязка к ценам (исполнения, стакана)?
tick<-orderlog[][DealId>0 & EndOfTransaction,.(datetime, DealPrice, Amount), by=DealId]
write.table(tick, file = "G:/QSH/RTS/6.20.2020/Tick/RTS-6.20.2020-04-07.OrdLog.{Deals}.txt",col.names=T,sep = ",",quote = FALSE,row.names = FALSE)
#'
##########NEW################################3
getBA<-function(orderlogDT){
orderlogDT[, Active:=sum(Fill)==0 &
sum(Canceled)==0 &
sum(CrossTrade)==0 &
sum(AmountRest==0)==0, by=OrderId][Active==TRUE,as.list(c(.SD[Buy==TRUE][,sum(AmountRest), by=Price][order(-Price)][1:3,c(Price,V1)],
.SD[Sell==TRUE][,sum(AmountRest), by=Price][order(Price)][1:3,c(Price,V1)]))]
}
startTime<-Sys.time()
#baDT<-orderlog[][,getBA(orderlog[datetime<.BY[[1]]]), by=datetime]
setkey(orderlog, datetime)
baDT<-unique(orderlog, by="datetime",fromLast=TRUE)[,pid:=id][,getBA(orderlog[1:pid,]),by=datetime]
tickDT<-orderlog[][DealId>0 & EndOfTransaction,.(datetime, DealPrice, Amount), by=DealId]
banames<-c("datetime", "bidprice0","bidprice1", "bidprice2",
"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
setnames(baDT, banames)
Sys.time()-startTime
setkey(tickDT, datetime)
setkey(baDT, datetime)
tbaDT<-baDT[tickDT,roll=T]
library(ggplot2)
ggplot(data=tbaDT)+
geom_line(aes(datetime,DealPrice), colour="darkgrey")+
geom_line(aes(datetime,askprice0), coloordur="lightcoral", alpha=I(0.5))+
geom_line(aes(datetime,bidprice0), colour="mediumaquamarine",alpha=I(0.5))
# makeBidAsk<-function(orderlogrow, depth=3, bytick=TRUE){
# orderbook<<-rbindlist(list(orderbook, orderlogrow))
# if(orderlogrow[,Fill]==bytick){
# orderbook<<-orderbook[, Active:=sum(Fill)==0 &
# sum(Canceled)==0 &
# sum(CrossTrade)==0 &
# sum(AmountRest==0)==0, by=OrderId][Active==TRUE]
#
# cat("\r",paste(100*orderlogrow[,pid]/nrow(orderlog),"%"))
#
# bidaskrow<-c(orderbook[Buy==TRUE][,sum(AmountRest), by=Price][order(-Price)][1:3][,c(t(Price),t(V1))],
# orderbook[Sell==TRUE][,sum(AmountRest),by=Price][order(Price)][1:3][,c(t(Price),t(V1))])
# as.list(bidaskrow)
# #tickbidaskdt<-rbindlist(list(tickbidaskdt, as.list(bidaskrow)))
# }
# }
# orderbook<-data.table()
# tickbidaskdt<-orderlog[,makeBidAsk(.SD, bytick=FALSE), by=id]
# ticks<-orderlog[Fill==TRUE]
banames<-c("id", "bidprice0","bidprice1", "bidprice2",
"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
setnames(tickbidaskdt, banames)
tickbidaskdt<-cbind(tickbidaskdt,ticks)
tickbidaskdt<-tickbidaskdt[NonSystem!=TRUE]
dtFormat<-"%d.%m.%Y %H:%M:%OS"
tickbidaskdt[,"datetime":=as.POSIXct(strptime(ExchTime,dtFormat))]
tickbidaskdt[,buysell:=ifelse(Buy==TRUE, "Buy", "Sell")]
tbanames<-c("datetime", "DealPrice","Amount","buysell", "bidprice0","bidprice1", "bidprice2",
"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
dfplaza<-tickbidaskdt[,.SD,.SDcols=tbanames]
dfnames<-c("datetime", "price","volume","buysell", "bidprice0","bidprice1", "bidprice2",
"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
setnames(dfplaza, dfnames)
rm(tickbidaskdt,ticks)
gc()
dfdate<-format(dfplaza[.N,datetime], "%Y-%m-%d")
downlimit<-as.POSIXct(paste(dfdate,"10:00:00.000"))
uplimit<-as.POSIXct(paste(dfdate,"18:00:00.000"))
dfplaza<-dfplaza[datetime>downlimit & datetime<uplimit]
save(dfplaza, file="dfplaza.RData")