反复强化学习与股票交易

一、简介
本项目是对论文《Stock Trading with Recurrent Reinforcement Learning (RRL)》的代码重现。
论文URL:http://cs229.stanford.edu/proj2006/Molina-StockTradingWithRecurrentReinforcementLearning.pdf

二、结论
先看结论吧：

这种方法的主要困难在于某些股票事件没有显示结构。从上面的第二个示例中可以看出，强化学习者无法预测股价的急剧下跌，并且像人类一样脆弱。如果与预测这种急剧下降的机制结合使用，可能会更有效。该模型还可以在其他方面作出修改，比如将成交量包含进来，作为预测涨跌的特征。

另外，可以尝试使用固定交易成本以及降低交易频率。例如，可以创建一个模型，该模型从长时间的数据中学习，但只能定期做出决策。这将反映一个散户交易者以固定交易成本参与较小数量交易的情况。因为对于散户而言，以固定的交易成本在每个期间进行交易都太昂贵了，所以具有定期交易策略的模型对于此类用户更为可行。

三、小知识
1.Sharp夏普比率的计算方法

四、代码

1# -*- coding: utf-8 -*-
2import time
3import pickle
4import numpy as np
5import pandas as pd
6from datetime import datetime as dt
7import matplotlib.pyplot as plt
8 
9 
10def main():
11 
12    fname = "../data/USDJPY30.csv"
13    init_t = 6000
14 
15    T = 1000
16    M = 200
17    mu = 10000
18    sigma = 0.04
19    rho = 1.0
20    n_epoch = 10000
21 
22    # RRL agent with initial weight.
23    ini_rrl = TradingRRL(T, M, init_t, mu, sigma, rho, n_epoch)
24    ini_rrl.load_csv(fname)
25    ini_rrl.set_t_p_r()
26    ini_rrl.calc_dSdw()
27    # RRL agent for training 
28    rrl = TradingRRL(T, M, init_t,  mu, sigma, rho, n_epoch)
29    rrl.all_t = ini_rrl.all_t
30    rrl.all_p = ini_rrl.all_p
31    rrl.set_t_p_r()
32    rrl.fit()
33 
34    # Plot results.
35    # Training for initial term T.
36    plt.plot(range(len(rrl.epoch_S)),rrl.epoch_S)
37    plt.title("Sharp's ratio optimization")
38    plt.xlabel("Epoch times")
39    plt.ylabel("Sharp's ratio")
40    plt.grid(True)
41    plt.savefig("sharp's ratio optimization.png", dpi=300)
42    plt.close
43 
44    fig, ax = plt.subplots(nrows=3, figsize=(15, 10))
45    t = np.linspace(1, rrl.T, rrl.T)[::-1]
46    ax[0].plot(t, rrl.p[:rrl.T])
47    ax[0].set_xlabel("time")
48    ax[0].set_ylabel("USDJPY")
49    ax[0].grid(True)
50 
51    ax[1].plot(t, ini_rrl.F[:rrl.T], color="blue", label="With initial weights")
52    ax[1].plot(t, rrl.F[:rrl.T], color="red", label="With optimized weights")
53    ax[1].set_xlabel("time")
54    ax[1].set_ylabel("F")
55    ax[1].legend(loc="upper left")
56    ax[1].grid(True)
57 
58    ax[2].plot(t, ini_rrl.sumR, color="blue", label="With initial weights")
59    ax[2].plot(t, rrl.sumR, color="red", label="With optimized weights")
60    ax[2].set_xlabel("time")
61    ax[2].set_ylabel("Sum of reward[yen]")
62    ax[2].legend(loc="upper left")
63    ax[2].grid(True)
64    plt.savefig("rrl_train.png", dpi=300)
65    fig.clear()
66 
67 
68    # Prediction for next term T with optimized weight.
69    # RRL agent with initial weight.
70    ini_rrl_f = TradingRRL(T, M, init_t-T, mu, sigma, rho, n_epoch)
71    ini_rrl_f.all_t = ini_rrl.all_t
72    ini_rrl_f.all_p = ini_rrl.all_p
73    ini_rrl_f.set_t_p_r()
74    ini_rrl_f.calc_dSdw()
75    # RRL agent with optimized weight.
76    rrl_f = TradingRRL(T, M, init_t-T, mu, sigma, rho, n_epoch)
77    rrl_f.all_t = ini_rrl.all_t
78    rrl_f.all_p = ini_rrl.all_p
79    rrl_f.set_t_p_r()
80    rrl_f.w = rrl.w
81    rrl_f.calc_dSdw()
82 
83    fig, ax = plt.subplots(nrows=3, figsize=(15, 10))
84    t_f = np.linspace(rrl.T+1, rrl.T+rrl.T, rrl.T)[::-1]
85    ax[0].plot(t_f, rrl_f.p[:rrl_f.T])
86    ax[0].set_xlabel("time")
87    ax[0].set_ylabel("USDJPY")
88    ax[0].grid(True)
89 
90    ax[1].plot(t_f, ini_rrl_f.F[:rrl_f.T], color="blue", label="With initial weights")
91    ax[1].plot(t_f, rrl_f.F[:rrl_f.T], color="red", label="With optimized weights")
92    ax[1].set_xlabel("time")
93    ax[1].set_ylabel("F")
94    ax[1].legend(loc="lower right")
95    ax[1].grid(True)
96 
97    ax[2].plot(t_f, ini_rrl_f.sumR, color="blue", label="With initial weights")
98    ax[2].plot(t_f, rrl_f.sumR, color="red", label="With optimized weights")
99    ax[2].set_xlabel("time")
100    ax[2].set_ylabel("Sum of reward[yen]")
101    ax[2].legend(loc="lower right")
102    ax[2].grid(True)
103    plt.savefig("rrl_prediction.png", dpi=300)
104    fig.clear()
105 
106class TradingRRL(object):
107    def __init__(self, T=1000, M=200, init_t=10000, mu=10000, sigma=0.04, rho=1.0, n_epoch=10000):
108        self.T = T
109        self.M = M
110        self.init_t = init_t
111        self.mu = mu
112        self.sigma = sigma
113        self.rho = rho
114        self.all_t = None
115        self.all_p = None
116        self.t = None
117        self.p = None
118        self.r = None
119        self.x = np.zeros([T, M+2])
120        self.F = np.zeros(T+1)
121        self.R = np.zeros(T)
122        self.w = np.ones(M+2)
123        self.w_opt = np.ones(M+2)
124        self.epoch_S = np.empty(0)
125        self.n_epoch = n_epoch
126        self.progress_period = 100
127        self.q_threshold = 0.7
128 
129    def load_csv(self, fname):
130        tmp = pd.read_csv(fname, header=None)
131        tmp_tstr = tmp[0] +" " + tmp[1]
132        tmp_t = [dt.strptime(tmp_tstr[i], '%Y.%m.%d %H:%M') for i in range(len(tmp_tstr))]
133        tmp_p = list(tmp[5])
134        self.all_t = np.array(tmp_t[::-1])
135        self.all_p = np.array(tmp_p[::-1])
136 
137    def quant(self, f):
138        fc = f.copy()
139        fc[np.where(np.abs(fc) < self.q_threshold)] = 0
140        return np.sign(fc)
141 
142    def set_t_p_r(self):
143        self.t = self.all_t[self.init_t:self.init_t+self.T+self.M+1]
144        self.p = self.all_p[self.init_t:self.init_t+self.T+self.M+1]
145        self.r = -np.diff(self.p)
146 
147    def set_x_F(self):
148        for i in range(self.T-1, -1 ,-1):
149            self.x[i] = np.zeros(self.M+2)
150            self.x[i][0] = 1.0
151            self.x[i][self.M+2-1] = self.F[i+1]
152            for j in range(1, self.M+2-1, 1):
153                self.x[i][j] = self.r[i+j-1]
154            self.F[i] = np.tanh(np.dot(self.w, self.x[i]))
155 
156    def calc_R(self):
157        self.R = self.mu * (self.F[1:] * self.r[:self.T] - self.sigma * np.abs(-np.diff(self.F)))
158 
159    def calc_sumR(self):
160        self.sumR  = np.cumsum(self.R[::-1])[::-1]
161        self.sumR2  = np.cumsum((self.R**2)[::-1])[::-1]
162 
163    def calc_dSdw(self):
164        self.set_x_F()
165        self.calc_R()
166        self.calc_sumR()
167        self.A      =  self.sumR[0] / self.T
168        self.B      =  self.sumR2[0] / self.T
169        self.S      =  self.A / np.sqrt(self.B - self.A**2)
170        self.dSdA   =  self.S * (1 + self.S**2) / self.A
171        self.dSdB   = -self.S**3 / 2 / self.A**2
172        self.dAdR   =  1.0 / self.T
173        self.dBdR   =  2.0 / self.T * self.R
174        self.dRdF   = -self.mu * self.sigma * np.sign(-np.diff(self.F))
175        self.dRdFp  =  self.mu * self.r[:self.T] + self.mu * self.sigma * np.sign(-np.diff(self.F))
176        self.dFdw   = np.zeros(self.M+2)
177        self.dFpdw  = np.zeros(self.M+2)
178        self.dSdw   = np.zeros(self.M+2)
179        for i in range(self.T-1, -1 ,-1):
180            if i != self.T-1:
181                self.dFpdw = self.dFdw.copy()
182            self.dFdw  = (1 - self.F[i]**2) * (self.x[i] + self.w[self.M+2-1] * self.dFpdw)
183            self.dSdw += (self.dSdA * self.dAdR + self.dSdB * self.dBdR[i]) * (self.dRdF[i] * self.dFdw + self.dRdFp[i] * self.dFpdw)
184 
185    def update_w(self):
186        self.w += self.rho * self.dSdw
187 
188    def fit(self):
189         
190        pre_epoch_times = len(self.epoch_S)
191 
192        self.calc_dSdw()
193        print("Epoch loop start. Initial sharp's ratio is " + str(self.S) + ".")
194        self.S_opt = self.S
195         
196        tic = time.clock()
197        for e_index in range(self.n_epoch):
198            self.calc_dSdw()
199            if self.S > self.S_opt:
200                self.S_opt = self.S
201                self.w_opt = self.w.copy()
202            self.epoch_S = np.append(self.epoch_S, self.S)
203            self.update_w()
204            if e_index % self.progress_period  == self.progress_period-1:
205                toc = time.clock()
206                print("Epoch: " + str(e_index + pre_epoch_times + 1) + "/" + str(self.n_epoch + pre_epoch_times) +". Shape's ratio: " + str(self.S) + ". Elapsed time: " + str(toc-tic) + " sec.")
207        toc = time.clock()
208        print("Epoch: " + str(e_index + pre_epoch_times + 1) + "/" + str(self.n_epoch + pre_epoch_times) +". Shape's ratio: " + str(self.S) + ". Elapsed time: " + str(toc-tic) + " sec.")
209        self.w = self.w_opt.copy()
210        self.calc_dSdw()
211        print("Epoch loop end. Optimized sharp's ratio is " + str(self.S_opt) + ".")
212 
213    def save_weight(self):
214        pd.DataFrame(self.w).to_csv("w.csv", header=False, index=False)
215        pd.DataFrame(self.epoch_S).to_csv("epoch_S.csv", header=False, index=False)
216         
217    def load_weight(self):
218        tmp = pd.read_csv("w.csv", header=None)
219        self.w = tmp.T.values[0]
220 
221 
222def plot_hist(n_tick, R):
223    rnge = max(R) - min(R)
224    tick = rnge / n_tick
225    tick_min = [min(R) - tick * 0.5 + i * tick for i in range(n_tick)]
226    tick_max = [min(R) + tick * 0.5 + i * tick for i in range(n_tick)]
227    tick_center = [min(R) + i * tick for i in range(n_tick)]
228    tick_val = [0.0] * n_tick
229    for i in range(n_tick ):
230        tick_val[i] = len(set(np.where(tick_min[i] < np.array(R))[0].tolist()).intersection(np.where(np.array(R) <= tick_max[i])[0]))
231    plt.bar(tick_center, tick_val, width=tick)
232    plt.grid()
233    plt.show()
234 
235 
236if __name__ == "__main__":
237    main()

运行截图：

可参考：
https://zhuanlan.zhihu.com/p/36632686

代码地址：https://github.com/darden1/tradingrrl

蜗牛之路

最新

反复强化学习与股票交易

暧昧帖

发表评论点击这里取消回复。

最新

推荐

暧昧帖

发表评论 点击这里取消回复。

发表评论点击这里取消回复。