Skip to content

Commit 9cd41e5

Browse files
authored
Merge pull request #227 from Arthur-Null/high-freq-execution
High freq execution
2 parents e23022e + ebbbec2 commit 9cd41e5

10 files changed

Lines changed: 524 additions & 15 deletions

File tree

examples/trade/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Universal Trading for Order Execution with Oracle Policy Distillation
2+
This is the experiment code for our AAAI 2021 paper "[Universal Trading for Order Execution with Oracle Policy Distillation](https://seqml.github.io/opd/opd_aaai21.pdf)", including the implementations of all the compared methods in the paper and a general reinforcement learning framework for order execution in quantitative finance.
3+
4+
## Abstract
5+
As a fundamental problem in algorithmic trading, order execution aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument. Towards effective execution strategy, recent years have witnessed the shift from the analytical view with model-based market assumptions to model-free perspective, i.e., reinforcement learning, due to its nature of sequential decision optimization. However, the noisy and yet imperfect market information that can be leveraged by the policy has made it quite challenging to build up sample efficient reinforcement learning methods to achieve effective order execution. In this paper, we propose a novel universal trading policy optimization framework to bridge the gap between the noisy yet imperfect market states and the optimal action sequences for order execution. Particularly, this framework leverages a policy distillation method that can better guide the learning of the common policy towards practically optimal execution by an oracle teacher with perfect information to approximate the optimal trading strategy. The extensive experiments have shown significant improvements of our method over various strong baselines, with reasonable trading actions.
6+
7+
### Citation
8+
You are more than welcome to cite our paper:
9+
```
10+
@inproceedings{fang2021universal,
11+
title={Universal Trading for Order Execution with Oracle Policy Distillation},
12+
author={Fang, Yuchen and Ren, Kan and Liu, Weiqing and Zhou, Dong and Zhang, Weinan and Bian, Jiang and Yu, Yong and Liu, Tie-Yan},
13+
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
14+
year={2021}
15+
}
16+
```

examples/trade/agent/basic.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from joblib import Parallel, delayed
2-
from numba import njit, prange
31
from tianshou.policy import BasePolicy
42
from tianshou.data import Batch
53
import numpy as np

examples/trade/executor.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import json
66
import os
77
import agent
8-
import model
8+
import network
99
import policy
1010
import random
1111
import tianshou as ts
@@ -48,7 +48,15 @@ def setup_seed(seed):
4848

4949
class BaseExecutor(object):
5050
def __init__(
51-
self, log_dir, resources, env_conf, optim=None, policy_conf=None, network=None, policy_path=None, seed=None,
51+
self,
52+
log_dir,
53+
resources,
54+
env_conf,
55+
optim=None,
56+
policy_conf=None,
57+
network_conf=None,
58+
policy_path=None,
59+
seed=None,
5260
):
5361
"""A base class for executor
5462
@@ -62,8 +70,8 @@ def __init__(
6270
:type optim: dict, optional
6371
:param policy_conf: Configurations for the RL algorithm, defaults to None
6472
:type policy_conf: dict, optional
65-
:param network: Configurations for policy network, defaults to None
66-
:type network: dict, optional
73+
:param network_conf: Configurations for policy network_conf, defaults to None
74+
:type network_conf: dict, optional
6775
:param policy_path: If is not None, would load the policy from this path, defaults to None
6876
:type policy_path: string, optional
6977
:param seed: Random seed, defaults to None
@@ -90,17 +98,23 @@ def __init__(
9098
self.policy = getattr(agent, policy_conf["name"])(policy_conf["config"])
9199
# print(self.policy)
92100
else:
93-
assert not network is None
94-
if "extractor" in network.keys():
95-
net = getattr(model, network["extractor"]["name"] + "_Extractor")(
96-
device=self.device, **network["config"]
101+
assert not network_conf is None
102+
if "extractor" in network_conf.keys():
103+
net = getattr(network, network_conf["extractor"]["name"] + "_Extractor")(
104+
device=self.device, **network_conf["config"]
97105
)
98106
else:
99-
net = getattr(model, network["name"] + "_Extractor")(device=self.device, **network["config"])
107+
net = getattr(network, network_conf["name"] + "_Extractor")(
108+
device=self.device, **network_conf["config"]
109+
)
100110
net.to(self.device)
101-
actor = getattr(model, network["name"] + "_Actor")(extractor=net, device=self.device, **network["config"])
111+
actor = getattr(network, network_conf["name"] + "_Actor")(
112+
extractor=net, device=self.device, **network_conf["config"]
113+
)
102114
actor.to(self.device)
103-
critic = getattr(model, network["name"] + "_Critic")(extractor=net, device=self.device, **network["config"])
115+
critic = getattr(network, network_conf["name"] + "_Critic")(
116+
extractor=net, device=self.device, **network_conf["config"]
117+
)
104118
critic.to(self.device)
105119
self.optim = torch.optim.Adam(
106120
list(actor.parameters()) + list(critic.parameters()),
@@ -180,7 +194,7 @@ def __init__(
180194
io_conf,
181195
optim=None,
182196
policy_conf=None,
183-
network=None,
197+
network_conf=None,
184198
policy_path=None,
185199
seed=None,
186200
share_memory=False,
@@ -210,7 +224,7 @@ def __init__(
210224
:param buffer_size: The size of replay buffer, defaults to 200000
211225
:type buffer_size: int, optional
212226
"""
213-
super().__init__(log_dir, resources, env_conf, optim, policy_conf, network, policy_path, seed)
227+
super().__init__(log_dir, resources, env_conf, optim, policy_conf, network_conf, policy_path, seed)
214228
single_env = getattr(env, env_conf["name"])
215229
env_conf = merge_dicts(env_conf, train_paths)
216230
env_conf["log"] = True

examples/trade/network/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .ppo import *
2+
from .qmodel import *
3+
from .teacher import *
4+
from .util import *
5+
from .opd import *

examples/trade/network/opd.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import torch
2+
import numpy as np
3+
from torch import nn
4+
import torch.nn.functional as F
5+
from copy import deepcopy
6+
import sys
7+
8+
from tianshou.data import to_torch
9+
10+
11+
class OPD_Extractor(nn.Module):
12+
def __init__(self, device="cpu", **kargs):
13+
super().__init__()
14+
self.device = device
15+
hidden_size = kargs["hidden_size"]
16+
fc_size = kargs["fc_size"]
17+
self.cnn_shape = kargs["cnn_shape"]
18+
19+
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
20+
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
21+
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
22+
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
23+
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
24+
25+
self.fc = nn.Sequential(
26+
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
27+
)
28+
29+
def forward(self, inp):
30+
inp = to_torch(inp, dtype=torch.float32, device=self.device)
31+
teacher_action = inp[:, 0]
32+
inp = inp[:, 1:]
33+
seq_len = inp[:, -1].to(torch.long)
34+
batch_size = inp.shape[0]
35+
raw_in = inp[:, : 6 * 240]
36+
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
37+
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
38+
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
39+
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
40+
rnn_in = self.raw_fc(cnn_out)
41+
rnn2_in = self.dnn(dnn_in)
42+
rnn2_out = self.rnn2(rnn2_in)[0]
43+
rnn_out = self.rnn(rnn_in)[0]
44+
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
45+
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
46+
# dnn_out = self.dnn(dnn_in)
47+
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
48+
feature = self.fc(fc_in)
49+
return feature, teacher_action / 2
50+
51+
52+
class OPD_Actor(nn.Module):
53+
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
54+
super().__init__()
55+
self.extractor = extractor
56+
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
57+
self.device = device
58+
59+
def forward(self, obs, state=None, info={}):
60+
feature, self.teacher_action = self.extractor(obs)
61+
out = self.layer_out(feature)
62+
return out, state
63+
64+
65+
class OPD_Critic(nn.Module):
66+
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
67+
super().__init__()
68+
self.extractor = extractor
69+
self.value_out = nn.Linear(32, 1)
70+
self.device = device
71+
72+
def forward(self, obs, state=None, info={}):
73+
feature, self.teacher_action = self.extractor(obs)
74+
return self.value_out(feature).squeeze(dim=-1)

examples/trade/network/ppo.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import torch
2+
import numpy as np
3+
from torch import nn
4+
import torch.nn.functional as F
5+
from copy import deepcopy
6+
import sys
7+
8+
from tianshou.data import to_torch
9+
10+
11+
class PPO_Extractor(nn.Module):
12+
def __init__(self, device="cpu", **kargs):
13+
super().__init__()
14+
self.device = device
15+
hidden_size = kargs["hidden_size"]
16+
fc_size = kargs["fc_size"]
17+
self.cnn_shape = kargs["cnn_shape"]
18+
19+
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
20+
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
21+
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
22+
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
23+
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
24+
25+
self.fc = nn.Sequential(
26+
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
27+
)
28+
29+
def forward(self, inp):
30+
inp = to_torch(inp, dtype=torch.float32, device=self.device)
31+
# inp = torch.from_numpy(inp).to(torch.device('cpu'))
32+
seq_len = inp[:, -1].to(torch.long)
33+
batch_size = inp.shape[0]
34+
raw_in = inp[:, : 6 * 240]
35+
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
36+
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
37+
dnn_in = inp[:, -19:-1].reshape(batch_size, -1, 2)
38+
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
39+
assert not torch.isnan(cnn_out).any()
40+
rnn_in = self.raw_fc(cnn_out)
41+
assert not torch.isnan(rnn_in).any()
42+
rnn2_in = self.dnn(dnn_in)
43+
assert not torch.isnan(rnn2_in).any()
44+
rnn2_out = self.rnn2(rnn2_in)[0]
45+
assert not torch.isnan(rnn2_out).any()
46+
rnn_out = self.rnn(rnn_in)[0]
47+
assert not torch.isnan(rnn_out).any()
48+
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
49+
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
50+
# dnn_out = self.dnn(dnn_in)
51+
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
52+
self.feature = self.fc(fc_in)
53+
return self.feature
54+
55+
56+
class PPO_Actor(nn.Module):
57+
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
58+
super().__init__()
59+
self.extractor = extractor
60+
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
61+
self.device = device
62+
63+
def forward(self, obs, state=None, info={}):
64+
self.feature = self.extractor(obs)
65+
assert not (torch.isnan(self.feature).any() | torch.isinf(self.feature).any()), f"{self.feature}"
66+
out = self.layer_out(self.feature)
67+
return out, state
68+
69+
70+
class PPO_Critic(nn.Module):
71+
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
72+
super().__init__()
73+
self.extractor = extractor
74+
self.value_out = nn.Linear(32, 1)
75+
self.device = device
76+
77+
def forward(self, obs, state=None, info={}):
78+
self.feature = self.extractor(obs)
79+
return self.value_out(self.feature).squeeze(dim=-1)

examples/trade/network/qmodel.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import torch
2+
import numpy as np
3+
from torch import nn
4+
import torch.nn.functional as F
5+
from copy import deepcopy
6+
import sys
7+
8+
from tianshou.data import to_torch
9+
10+
11+
class RNNQModel(nn.Module):
12+
def __init__(self, device="cpu", out_shape=10, **kargs):
13+
super().__init__()
14+
self.device = device
15+
hidden_size = kargs["hidden_size"]
16+
fc_size = kargs["fc_size"]
17+
self.cnn_shape = kargs["cnn_shape"]
18+
19+
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
20+
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
21+
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
22+
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
23+
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
24+
25+
self.fc = nn.Sequential(
26+
nn.Linear(hidden_size * 2, hidden_size),
27+
nn.ReLU(),
28+
nn.Linear(hidden_size, 32),
29+
nn.ReLU(),
30+
nn.Linear(32, out_shape),
31+
)
32+
33+
def forward(self, obs, state=None, info={}):
34+
inp = to_torch(obs, dtype=torch.float32, device=self.device)
35+
inp = inp[:, 182:]
36+
seq_len = inp[:, -1].to(torch.long)
37+
batch_size = inp.shape[0]
38+
raw_in = inp[:, : 6 * 240]
39+
raw_in = torch.cat((torch.zeros_like(inp[:, : 6 * 30]), raw_in), dim=-1)
40+
raw_in = raw_in.reshape(-1, 30, 6).transpose(1, 2)
41+
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
42+
cnn_out = self.cnn(raw_in).view(batch_size, 9, -1)
43+
rnn_in = self.raw_fc(cnn_out)
44+
rnn2_in = self.dnn(dnn_in)
45+
rnn2_out = self.rnn2(rnn2_in)[0]
46+
rnn_out = self.rnn(rnn_in)[0]
47+
rnn_out = rnn_out[torch.arange(rnn_out.size(0)), seq_len]
48+
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
49+
# dnn_out = self.dnn(dnn_in)
50+
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
51+
out = self.fc(fc_in)
52+
return out, state

examples/trade/network/teacher.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import torch
2+
import numpy as np
3+
from torch import nn
4+
import torch.nn.functional as F
5+
from copy import deepcopy
6+
import sys
7+
8+
from tianshou.data import to_torch
9+
10+
11+
class Teacher_Extractor(nn.Module):
12+
def __init__(self, device="cpu", feature_size=180, **kargs):
13+
super().__init__()
14+
self.device = device
15+
hidden_size = kargs["hidden_size"]
16+
fc_size = kargs["fc_size"]
17+
self.cnn_shape = kargs["cnn_shape"]
18+
19+
self.rnn = nn.GRU(64, hidden_size, batch_first=True)
20+
self.rnn2 = nn.GRU(64, hidden_size, batch_first=True)
21+
self.dnn = nn.Sequential(nn.Linear(2, 64), nn.ReLU(),)
22+
self.cnn = nn.Sequential(nn.Conv1d(self.cnn_shape[1], 3, 3), nn.ReLU(),)
23+
self.raw_fc = nn.Sequential(nn.Linear((self.cnn_shape[0] - 2) * 3, 64), nn.ReLU(),)
24+
25+
self.fc = nn.Sequential(
26+
nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 32), nn.ReLU(),
27+
)
28+
29+
def forward(self, inp):
30+
inp = to_torch(inp, dtype=torch.float32, device=self.device)
31+
inp = inp[:, 182:]
32+
seq_len = inp[:, -1].to(torch.long)
33+
batch_size = inp.shape[0]
34+
raw_in = inp[:, : 6 * 240].reshape(-1, 30, 6).transpose(1, 2)
35+
dnn_in = inp[:, 6 * 240 : -1].reshape(batch_size, -1, 2)
36+
cnn_out = self.cnn(raw_in).view(batch_size, 8, -1)
37+
rnn_in = self.raw_fc(cnn_out)
38+
rnn2_in = self.dnn(dnn_in)
39+
rnn2_out = self.rnn2(rnn2_in)[0]
40+
rnn_out = self.rnn(rnn_in)[0][:, -1, :]
41+
rnn2_out = rnn2_out[torch.arange(rnn2_out.size(0)), seq_len]
42+
# dnn_out = self.dnn(dnn_in)
43+
fc_in = torch.cat((rnn_out, rnn2_out), dim=-1)
44+
self.feature = self.fc(fc_in)
45+
return self.feature
46+
47+
48+
class Teacher_Actor(nn.Module):
49+
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
50+
super().__init__()
51+
self.extractor = extractor
52+
self.layer_out = nn.Sequential(nn.Linear(32, out_shape), nn.Softmax(dim=-1))
53+
self.device = device
54+
55+
def forward(self, obs, state=None, info={}):
56+
self.feature = self.extractor(obs)
57+
out = self.layer_out(self.feature)
58+
return out, state
59+
60+
61+
class Teacher_Critic(nn.Module):
62+
def __init__(self, extractor, out_shape, device=torch.device("cpu"), **kargs):
63+
super().__init__()
64+
self.extractor = extractor
65+
self.value_out = nn.Linear(32, 1)
66+
self.device = device
67+
68+
def forward(self, obs, state=None, info={}):
69+
self.feature = self.extractor(obs)
70+
return self.value_out(self.feature).squeeze(-1)

0 commit comments

Comments
 (0)