Исследование полного лога заявок
R
Full Order Book – «Стакан» заявок
Исторические данные, содержащие информацию о «жизни» каждой заявки и позволяющие воссоздавать «стакан» заявок на любой момент времени.
Все изменения в данных записаны с точностью до миллисекунд.
Cтакан заявок позволяет:
• Проводить исследования с высокой точностью и анализировать глубину рынка
• Тестировать и налаживать работу HFT алгоритмов
Код
library(ggplot2)
library(data.table)
library(bit64)
options(digits.secs=3)
<-"~/repos/DATA/"
fnamesetwd(fname)
#Header
# Received;ExchTime;OrderId;Price;Amount;AmountRest;DealId;DealPrice;OI;Flags
<-"G:/QSH/RTS/6.20.2020/RTS-6.20.2020-04-07.OrdLog.{1-OrdLog}.txt"
fname<-fread(fname,skip=3, sep=";",stringsAsFactors=FALSE, header=FALSE)# nrows=1000000)
orderlog
<-c("Received",
header"ExchTime",
"OrderId",
"Price",
"Amount",
"AmountRest",
"DealId",
"DealPrice",
"OI",
"Flags")
setnames(orderlog, header)
<-c("NonZeroReplAct",
flags"SessIdChanged",
"Add",
"Fill",
"Buy",
"Sell",
"Quote",
"Counter",
"NonSystem",
"EndOfTransaction",
"FillOrKill",
"Moved",
"Canceled",
"CanceledGroup",
"CrossTrade")
c(flags):= lapply(c(flags), function(x) grepl(x,Flags))]
orderlog[,"Fill" := grepl("Fill,",Flags)]
orderlog[,<-"%d.%m.%Y %H:%M:%OS"
dtFormat"datetime":=as.POSIXct(strptime(ExchTime,dtFormat))]
orderlog[,<-orderlog[datetime>=as.POSIXct(paste(format(orderlog[.N,datetime], "%Y-%m-%d"),
orderlog"10:00:00.000"))]
<-orderlog[,oCanceled:=sum(Canceled)>=1 |
olCancelledsum(CanceledGroup)>=1 |
sum(Moved)>=1,
=OrderId][oCanceled==TRUE]
by
<-olCancelled[,.(.SD[Add==TRUE,datetime],
olCancelledGr==TRUE,Buy],.SD[Add==TRUE,Sell],
.SD[Add==TRUE | CanceledGroup==TRUE | Moved==TRUE,datetime]-
.SD[Canceled==TRUE,datetime],.SD[Add==TRUE,Price],
.SD[Add==TRUE,Amount]),by=OrderId]
.SD[Add
setnames(olCancelledGr,c("Id","datetime","buy","sell","lifetime", "price","volume"))
:=as.numeric(lifetime)]
olCancelledGr[,lifetime:=ifelse(buy==TRUE,"buy","sell")]
olCancelledGr[,buysell<0.01]
olCancelledGr[lifetime
#' Вопросы
#' 1. В какой срок снимается большинство заявок?
Vol=sum(volume)),by=.(buysell,lifetime)][order(-N)][N>100000]
olCancelledGr[,.(.N,Vol=sum(volume)),by=.(buysell,lifetime)][order(-Vol)][N>100000]
olCancelledGr[,.(.N,=.(buysell,lifetime, volume)][order(-N)][N>100000]
olCancelledGr[,.(.N),by
ggplot(olCancelledGr[,.N,
by=.(buysell,lifetime)][order(-N)][N>100000])+
geom_bar(aes(round(lifetime,3),weight=N, fill=buysell),position="dodge",width=.001)
#' 2. Как распределена активность снятия завок во времени?
<0.005,.(.N,sum(volume)),by=.(buysell,tid=format(datetime, "%H%M%S"))][order(-N)]
olCancelledGr[lifetimeggplot(data=olCancelledGr[lifetime<0.005,.(.N,Vol=sum(volume)),by=.(buysell,tid=format(datetime, "%H%M%S"))],
aes(x=tid,y=Vol,group=buysell,colour=buysell))+
geom_line()+geom_point()+scale_x_discrete(breaks = seq(100000,230000,10000))
#' 3. Какая привязка к ценам (исполнения, стакана)?
<-orderlog[][DealId>0 & EndOfTransaction,.(datetime, DealPrice, Amount), by=DealId]
tick
write.table(tick, file = "G:/QSH/RTS/6.20.2020/Tick/RTS-6.20.2020-04-07.OrdLog.{Deals}.txt",col.names=T,sep = ",",quote = FALSE,row.names = FALSE)
#'
##########NEW################################3
<-function(orderlogDT){
getBA:=sum(Fill)==0 &
orderlogDT[, Activesum(Canceled)==0 &
sum(CrossTrade)==0 &
sum(AmountRest==0)==0, by=OrderId][Active==TRUE,as.list(c(.SD[Buy==TRUE][,sum(AmountRest), by=Price][order(-Price)][1:3,c(Price,V1)],
==TRUE][,sum(AmountRest), by=Price][order(Price)][1:3,c(Price,V1)]))]
.SD[Sell
}
<-Sys.time()
startTime#baDT<-orderlog[][,getBA(orderlog[datetime<.BY[[1]]]), by=datetime]
setkey(orderlog, datetime)
<-unique(orderlog, by="datetime",fromLast=TRUE)[,pid:=id][,getBA(orderlog[1:pid,]),by=datetime]
baDT<-orderlog[][DealId>0 & EndOfTransaction,.(datetime, DealPrice, Amount), by=DealId]
tickDT
<-c("datetime", "bidprice0","bidprice1", "bidprice2",
banames"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
setnames(baDT, banames)
Sys.time()-startTime
setkey(tickDT, datetime)
setkey(baDT, datetime)
<-baDT[tickDT,roll=T]
tbaDT
library(ggplot2)
ggplot(data=tbaDT)+
geom_line(aes(datetime,DealPrice), colour="darkgrey")+
geom_line(aes(datetime,askprice0), coloordur="lightcoral", alpha=I(0.5))+
geom_line(aes(datetime,bidprice0), colour="mediumaquamarine",alpha=I(0.5))
# makeBidAsk<-function(orderlogrow, depth=3, bytick=TRUE){
# orderbook<<-rbindlist(list(orderbook, orderlogrow))
# if(orderlogrow[,Fill]==bytick){
# orderbook<<-orderbook[, Active:=sum(Fill)==0 &
# sum(Canceled)==0 &
# sum(CrossTrade)==0 &
# sum(AmountRest==0)==0, by=OrderId][Active==TRUE]
#
# cat("\r",paste(100*orderlogrow[,pid]/nrow(orderlog),"%"))
#
# bidaskrow<-c(orderbook[Buy==TRUE][,sum(AmountRest), by=Price][order(-Price)][1:3][,c(t(Price),t(V1))],
# orderbook[Sell==TRUE][,sum(AmountRest),by=Price][order(Price)][1:3][,c(t(Price),t(V1))])
# as.list(bidaskrow)
# #tickbidaskdt<-rbindlist(list(tickbidaskdt, as.list(bidaskrow)))
# }
# }
# orderbook<-data.table()
# tickbidaskdt<-orderlog[,makeBidAsk(.SD, bytick=FALSE), by=id]
# ticks<-orderlog[Fill==TRUE]
<-c("id", "bidprice0","bidprice1", "bidprice2",
banames"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
setnames(tickbidaskdt, banames)
<-cbind(tickbidaskdt,ticks)
tickbidaskdt<-tickbidaskdt[NonSystem!=TRUE]
tickbidaskdt
<-"%d.%m.%Y %H:%M:%OS"
dtFormat"datetime":=as.POSIXct(strptime(ExchTime,dtFormat))]
tickbidaskdt[,
:=ifelse(Buy==TRUE, "Buy", "Sell")]
tickbidaskdt[,buysell
<-c("datetime", "DealPrice","Amount","buysell", "bidprice0","bidprice1", "bidprice2",
tbanames"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
<-tickbidaskdt[,.SD,.SDcols=tbanames]
dfplaza
<-c("datetime", "price","volume","buysell", "bidprice0","bidprice1", "bidprice2",
dfnames"bidvolume0","bidvolume1","bidvolume2","askprice0","askprice1","askprice2",
"askvolume0","askvolume1","askvolume2")
setnames(dfplaza, dfnames)
rm(tickbidaskdt,ticks)
gc()
<-format(dfplaza[.N,datetime], "%Y-%m-%d")
dfdate<-as.POSIXct(paste(dfdate,"10:00:00.000"))
downlimit<-as.POSIXct(paste(dfdate,"18:00:00.000"))
uplimit<-dfplaza[datetime>downlimit & datetime<uplimit]
dfplaza
save(dfplaza, file="dfplaza.RData")