一、简介
本项目是对论文《Stock Trading with Recurrent Reinforcement Learning (RRL)》的代码重现。
论文URL:http://cs229.stanford.edu/proj2006/Molina-StockTradingWithRecurrentReinforcementLearning.pdf
二、结论
先看结论吧:
这种方法的主要困难在于某些股票事件没有显示结构。从上面的第二个示例中可以看出,强化学习者无法预测股价的急剧下跌,并且像人类一样脆弱。如果与预测这种急剧下降的机制结合使用,可能会更有效。该模型还可以在其他方面作出修改,比如将成交量包含进来,作为预测涨跌的特征。
另外,可以尝试使用固定交易成本以及降低交易频率。例如,可以创建一个模型,该模型从长时间的数据中学习,但只能定期做出决策。这将反映一个散户交易者以固定交易成本参与较小数量交易的情况。因为对于散户而言,以固定的交易成本在每个期间进行交易都太昂贵了,所以具有定期交易策略的模型对于此类用户更为可行。
三、小知识
1.Sharp夏普比率的计算方法

四、代码
6 | from datetime import datetime as dt |
7 | import matplotlib.pyplot as plt |
12 | fname = "../data/USDJPY30.csv" |
22 | # RRL agent with initial weight. |
23 | ini_rrl = TradingRRL(T, M, init_t, mu, sigma, rho, n_epoch) |
24 | ini_rrl.load_csv(fname) |
27 | # RRL agent for training |
28 | rrl = TradingRRL(T, M, init_t, mu, sigma, rho, n_epoch) |
29 | rrl.all_t = ini_rrl.all_t |
30 | rrl.all_p = ini_rrl.all_p |
35 | # Training for initial term T. |
36 | plt.plot(range(len(rrl.epoch_S)),rrl.epoch_S) |
37 | plt.title( "Sharp's ratio optimization" ) |
38 | plt.xlabel( "Epoch times" ) |
39 | plt.ylabel( "Sharp's ratio" ) |
41 | plt.savefig( "sharp's ratio optimization.png" , dpi=300) |
44 | fig, ax = plt.subplots(nrows=3, figsize=(15, 10)) |
45 | t = np.linspace(1, rrl.T, rrl.T)[::-1] |
46 | ax[0].plot(t, rrl.p[:rrl.T]) |
47 | ax[0].set_xlabel( "time" ) |
48 | ax[0].set_ylabel( "USDJPY" ) |
51 | ax[1].plot(t, ini_rrl.F[:rrl.T], color= "blue" , label= "With initial weights" ) |
52 | ax[1].plot(t, rrl.F[:rrl.T], color= "red" , label= "With optimized weights" ) |
53 | ax[1].set_xlabel( "time" ) |
55 | ax[1].legend(loc= "upper left" ) |
58 | ax[2].plot(t, ini_rrl.sumR, color= "blue" , label= "With initial weights" ) |
59 | ax[2].plot(t, rrl.sumR, color= "red" , label= "With optimized weights" ) |
60 | ax[2].set_xlabel( "time" ) |
61 | ax[2].set_ylabel( "Sum of reward[yen]" ) |
62 | ax[2].legend(loc= "upper left" ) |
64 | plt.savefig( "rrl_train.png" , dpi=300) |
68 | # Prediction for next term T with optimized weight. |
69 | # RRL agent with initial weight. |
70 | ini_rrl_f = TradingRRL(T, M, init_t-T, mu, sigma, rho, n_epoch) |
71 | ini_rrl_f.all_t = ini_rrl.all_t |
72 | ini_rrl_f.all_p = ini_rrl.all_p |
75 | # RRL agent with optimized weight. |
76 | rrl_f = TradingRRL(T, M, init_t-T, mu, sigma, rho, n_epoch) |
77 | rrl_f.all_t = ini_rrl.all_t |
78 | rrl_f.all_p = ini_rrl.all_p |
83 | fig, ax = plt.subplots(nrows=3, figsize=(15, 10)) |
84 | t_f = np.linspace(rrl.T+1, rrl.T+rrl.T, rrl.T)[::-1] |
85 | ax[0].plot(t_f, rrl_f.p[:rrl_f.T]) |
86 | ax[0].set_xlabel( "time" ) |
87 | ax[0].set_ylabel( "USDJPY" ) |
90 | ax[1].plot(t_f, ini_rrl_f.F[:rrl_f.T], color= "blue" , label= "With initial weights" ) |
91 | ax[1].plot(t_f, rrl_f.F[:rrl_f.T], color= "red" , label= "With optimized weights" ) |
92 | ax[1].set_xlabel( "time" ) |
94 | ax[1].legend(loc= "lower right" ) |
97 | ax[2].plot(t_f, ini_rrl_f.sumR, color= "blue" , label= "With initial weights" ) |
98 | ax[2].plot(t_f, rrl_f.sumR, color= "red" , label= "With optimized weights" ) |
99 | ax[2].set_xlabel( "time" ) |
100 | ax[2].set_ylabel( "Sum of reward[yen]" ) |
101 | ax[2].legend(loc= "lower right" ) |
103 | plt.savefig( "rrl_prediction.png" , dpi=300) |
106 | class TradingRRL(object): |
107 | def __init__(self, T=1000, M=200, init_t=10000, mu=10000, sigma=0.04, rho=1.0, n_epoch=10000): |
119 | self.x = np.zeros([T, M+2]) |
120 | self.F = np.zeros(T+1) |
122 | self.w = np.ones(M+2) |
123 | self.w_opt = np.ones(M+2) |
124 | self.epoch_S = np. empty (0) |
125 | self.n_epoch = n_epoch |
126 | self.progress_period = 100 |
127 | self.q_threshold = 0.7 |
129 | def load_csv(self, fname): |
130 | tmp = pd.read_csv(fname, header=None) |
131 | tmp_tstr = tmp[0] + " " + tmp[1] |
132 | tmp_t = [dt. strptime (tmp_tstr[i], '%Y.%m.%d %H:%M' ) for i in range(len(tmp_tstr))] |
134 | self.all_t = np. array (tmp_t[::-1]) |
135 | self.all_p = np. array (tmp_p[::-1]) |
139 | fc[np.where(np. abs (fc) < self.q_threshold)] = 0 |
143 | self.t = self.all_t[self.init_t:self.init_t+self.T+self.M+1] |
144 | self.p = self.all_p[self.init_t:self.init_t+self.T+self.M+1] |
145 | self.r = -np.diff(self.p) |
148 | for i in range(self.T-1, -1 ,-1): |
149 | self.x[i] = np.zeros(self.M+2) |
151 | self.x[i][self.M+2-1] = self.F[i+1] |
152 | for j in range(1, self.M+2-1, 1): |
153 | self.x[i][j] = self.r[i+j-1] |
154 | self.F[i] = np.tanh(np.dot(self.w, self.x[i])) |
157 | self.R = self.mu * (self.F[1:] * self.r[:self.T] - self.sigma * np. abs (-np.diff(self.F))) |
160 | self.sumR = np.cumsum(self.R[::-1])[::-1] |
161 | self.sumR2 = np.cumsum((self.R**2)[::-1])[::-1] |
167 | self.A = self.sumR[0] / self.T |
168 | self.B = self.sumR2[0] / self.T |
169 | self.S = self.A / np.sqrt(self.B - self.A**2) |
170 | self.dSdA = self.S * (1 + self.S**2) / self.A |
171 | self.dSdB = -self.S**3 / 2 / self.A**2 |
172 | self.dAdR = 1.0 / self.T |
173 | self.dBdR = 2.0 / self.T * self.R |
174 | self.dRdF = -self.mu * self.sigma * np.sign(-np.diff(self.F)) |
175 | self.dRdFp = self.mu * self.r[:self.T] + self.mu * self.sigma * np.sign(-np.diff(self.F)) |
176 | self.dFdw = np.zeros(self.M+2) |
177 | self.dFpdw = np.zeros(self.M+2) |
178 | self.dSdw = np.zeros(self.M+2) |
179 | for i in range(self.T-1, -1 ,-1): |
181 | self.dFpdw = self.dFdw. copy () |
182 | self.dFdw = (1 - self.F[i]**2) * (self.x[i] + self.w[self.M+2-1] * self.dFpdw) |
183 | self.dSdw += (self.dSdA * self.dAdR + self.dSdB * self.dBdR[i]) * (self.dRdF[i] * self.dFdw + self.dRdFp[i] * self.dFpdw) |
186 | self.w += self.rho * self.dSdw |
190 | pre_epoch_times = len(self.epoch_S) |
193 | print ( "Epoch loop start. Initial sharp's ratio is " + str(self.S) + "." ) |
197 | for e_index in range(self.n_epoch): |
199 | if self.S > self.S_opt: |
201 | self.w_opt = self.w. copy () |
202 | self.epoch_S = np.append(self.epoch_S, self.S) |
204 | if e_index % self.progress_period == self.progress_period-1: |
206 | print ( "Epoch: " + str(e_index + pre_epoch_times + 1) + "/" + str(self.n_epoch + pre_epoch_times) + ". Shape's ratio: " + str(self.S) + ". Elapsed time: " + str(toc-tic) + " sec." ) |
208 | print ( "Epoch: " + str(e_index + pre_epoch_times + 1) + "/" + str(self.n_epoch + pre_epoch_times) + ". Shape's ratio: " + str(self.S) + ". Elapsed time: " + str(toc-tic) + " sec." ) |
209 | self.w = self.w_opt. copy () |
211 | print ( "Epoch loop end. Optimized sharp's ratio is " + str(self.S_opt) + "." ) |
213 | def save_weight(self): |
214 | pd.DataFrame(self.w).to_csv( "w.csv" , header=False, index=False) |
215 | pd.DataFrame(self.epoch_S).to_csv( "epoch_S.csv" , header=False, index=False) |
217 | def load_weight(self): |
218 | tmp = pd.read_csv( "w.csv" , header=None) |
219 | self.w = tmp.T.values[0] |
222 | def plot_hist(n_tick, R): |
223 | rnge = max(R) - min(R) |
225 | tick_min = [min(R) - tick * 0.5 + i * tick for i in range(n_tick)] |
226 | tick_max = [min(R) + tick * 0.5 + i * tick for i in range(n_tick)] |
227 | tick_center = [min(R) + i * tick for i in range(n_tick)] |
228 | tick_val = [0.0] * n_tick |
229 | for i in range(n_tick ): |
230 | tick_val[i] = len(set(np.where(tick_min[i] < np. array (R))[0].tolist()).intersection(np.where(np. array (R) <= tick_max[i])[0])) |
231 | plt.bar(tick_center, tick_val, width=tick) |
236 | if __name__ == "__main__" : |
运行截图:

可参考:
https://zhuanlan.zhihu.com/p/36632686
代码地址:https://github.com/darden1/tradingrrl