# 機械学習を使ってAMDの1ヶ月先の株価を予測する(GRU-Seq2Seq編)

このサイトを参考にしながら、bidirectional GRU seq2seqモデルを使って、1ヶ月先のAMDの株価の予測をしてみる。

スポンサーリンク

## 株価データの用意¶

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
from pandas_datareader import data as pdr
import yfinance as yf
sns.set()
tf.compat.v1.random.set_random_seed(1234)

tickers = (['AMD'])

import datetime

stocks_start = datetime.datetime(2018, 1, 1)
stocks_end = datetime.datetime(2019, 8, 20)

def get(tickers, startdate, enddate):
def data(ticker):
return (pdr.get_data_yahoo(ticker, start=startdate, end=enddate))
datas = map(data, tickers)
return(pd.concat(datas, keys=tickers, names=['Ticker', 'Date']))

all_data = get(tickers, stocks_start, stocks_end)

df = all_data[['Open','High','Low','Close','Adj Close','Volume']]
df.reset_index(level='Ticker',drop=True,inplace=True)
df.reset_index(inplace=True)
df.tail()

Date Open High Low Close Adj Close Volume
405 2019-08-13 32.360001 33.139999 31.719999 32.110001 32.110001 102009700
406 2019-08-14 31.000000 31.049999 29.510000 30.240000 30.240000 127521500
407 2019-08-15 30.629999 30.730000 29.209999 29.670000 29.670000 71674400
408 2019-08-16 30.309999 31.480000 30.209999 31.180000 31.180000 70469800
409 2019-08-19 32.000000 32.189999 31.420000 31.480000 31.480000 67596900
minmax = MinMaxScaler().fit(df.iloc[:, 4:5].astype('float32')) # Close index
df_log = minmax.transform(df.iloc[:, 4:5].astype('float32')) # Close index
df_log = pd.DataFrame(df_log)

0
0 0.058327
1 0.081255
2 0.104183
3 0.094529
4 0.110619
スポンサーリンク

## データを学習用とテスト用に分割¶

test_size = 30
simulation_size = 10

df_train = df_log.iloc[:-test_size]
df_test = df_log.iloc[-test_size:]
df.shape, df_train.shape, df_test.shape

((410, 7), (380, 1), (30, 1))
class Model:
def __init__(
self,
learning_rate,
num_layers,
size,
size_layer,
output_size,
forget_bias = 0.1,
):
def lstm_cell(size_layer):
return tf.nn.rnn_cell.GRUCell(size_layer)

backward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
[lstm_cell(size_layer) for _ in range(num_layers)],
state_is_tuple = False,
)
forward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
[lstm_cell(size_layer) for _ in range(num_layers)],
state_is_tuple = False,
)
self.X = tf.placeholder(tf.float32, (None, None, size))
self.Y = tf.placeholder(tf.float32, (None, output_size))
drop_backward = tf.contrib.rnn.DropoutWrapper(
backward_rnn_cells, output_keep_prob = forget_bias
)
forward_backward = tf.contrib.rnn.DropoutWrapper(
forward_rnn_cells, output_keep_prob = forget_bias
)
self.backward_hidden_layer = tf.placeholder(
tf.float32, shape = (None, num_layers * size_layer)
)
self.forward_hidden_layer = tf.placeholder(
tf.float32, shape = (None, num_layers * size_layer)
)
_, last_state = tf.nn.bidirectional_dynamic_rnn(
forward_backward,
drop_backward,
self.X,
initial_state_fw = self.forward_hidden_layer,
initial_state_bw = self.backward_hidden_layer,
dtype = tf.float32,
)

with tf.variable_scope('decoder', reuse = False):
backward_rnn_cells_decoder = tf.nn.rnn_cell.MultiRNNCell(
[lstm_cell(size_layer) for _ in range(num_layers)],
state_is_tuple = False,
)
forward_rnn_cells_decoder = tf.nn.rnn_cell.MultiRNNCell(
[lstm_cell(size_layer) for _ in range(num_layers)],
state_is_tuple = False,
)
drop_backward_decoder = tf.contrib.rnn.DropoutWrapper(
backward_rnn_cells_decoder, output_keep_prob = forget_bias
)
forward_backward_decoder = tf.contrib.rnn.DropoutWrapper(
forward_rnn_cells_decoder, output_keep_prob = forget_bias
)
self.outputs, self.last_state = tf.nn.bidirectional_dynamic_rnn(
forward_backward_decoder, drop_backward_decoder, self.X,
initial_state_fw = last_state[0],
initial_state_bw = last_state[1],
dtype = tf.float32
)
self.outputs = tf.concat(self.outputs, 2)
self.logits = tf.layers.dense(self.outputs[-1], output_size)
self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
self.cost
)

def calculate_accuracy(real, predict):
real = np.array(real) + 1
predict = np.array(predict) + 1
percentage = 1 - np.sqrt(np.mean(np.square((real - predict) / real)))
return percentage * 100

def anchor(signal, weight):
buffer = []
last = signal[0]
for i in signal:
smoothed_val = last * weight + (1 - weight) * i
buffer.append(smoothed_val)
last = smoothed_val
return buffer

num_layers = 1
size_layer = 128
timestamp = 5
epoch = 300
dropout_rate = 0.8
future_day = test_size
learning_rate = 0.01

def forecast():
tf.reset_default_graph()
modelnn = Model(
learning_rate, num_layers, df_log.shape[1], size_layer, df_log.shape[1], dropout_rate
)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
date_ori = pd.to_datetime(df.iloc[:, 0]).tolist()

pbar = tqdm(range(epoch), desc = 'train loop')
for i in pbar:
init_value_forward = np.zeros((1, num_layers * size_layer))
init_value_backward = np.zeros((1, num_layers * size_layer))
total_loss, total_acc = [], []
for k in range(0, df_train.shape[0] - 1, timestamp):
index = min(k + timestamp, df_train.shape[0] - 1)
batch_x = np.expand_dims(
df_train.iloc[k : index, :].values, axis = 0
)
batch_y = df_train.iloc[k + 1 : index + 1, :].values
logits, last_state, _, loss = sess.run(
[modelnn.logits, modelnn.last_state, modelnn.optimizer, modelnn.cost],
feed_dict = {
modelnn.X: batch_x,
modelnn.Y: batch_y,
modelnn.backward_hidden_layer: init_value_backward,
modelnn.forward_hidden_layer: init_value_forward,
},
)
init_value_forward = last_state[0]
init_value_backward = last_state[1]
total_loss.append(loss)
total_acc.append(calculate_accuracy(batch_y[:, 0], logits[:, 0]))
pbar.set_postfix(cost = np.mean(total_loss), acc = np.mean(total_acc))

future_day = test_size

output_predict = np.zeros((df_train.shape[0] + future_day, df_train.shape[1]))
output_predict[0] = df_train.iloc[0]
upper_b = (df_train.shape[0] // timestamp) * timestamp
init_value_forward = np.zeros((1, num_layers * size_layer))
init_value_backward = np.zeros((1, num_layers * size_layer))

for k in range(0, (df_train.shape[0] // timestamp) * timestamp, timestamp):
out_logits, last_state = sess.run(
[modelnn.logits, modelnn.last_state],
feed_dict = {
modelnn.X: np.expand_dims(
df_train.iloc[k : k + timestamp], axis = 0
),
modelnn.backward_hidden_layer: init_value_backward,
modelnn.forward_hidden_layer: init_value_forward,
},
)
init_value_forward = last_state[0]
init_value_backward = last_state[1]
output_predict[k + 1 : k + timestamp + 1] = out_logits

if upper_b != df_train.shape[0]:
out_logits, last_state = sess.run(
[modelnn.logits, modelnn.last_state],
feed_dict = {
modelnn.X: np.expand_dims(df_train.iloc[upper_b:], axis = 0),
modelnn.backward_hidden_layer: init_value_backward,
modelnn.forward_hidden_layer: init_value_forward,
},
)
output_predict[upper_b + 1 : df_train.shape[0] + 1] = out_logits
future_day -= 1
date_ori.append(date_ori[-1] + timedelta(days = 1))

init_value_forward = last_state[0]
init_value_backward = last_state[1]

for i in range(future_day):
o = output_predict[-future_day - timestamp + i:-future_day + i]
out_logits, last_state = sess.run(
[modelnn.logits, modelnn.last_state],
feed_dict = {
modelnn.X: np.expand_dims(o, axis = 0),
modelnn.backward_hidden_layer: init_value_backward,
modelnn.forward_hidden_layer: init_value_forward,
},
)
init_value_forward = last_state[0]
init_value_backward = last_state[1]
output_predict[-future_day + i] = out_logits[-1]
date_ori.append(date_ori[-1] + timedelta(days = 1))

output_predict = minmax.inverse_transform(output_predict)
deep_future = anchor(output_predict[:, 0], 0.3)

return deep_future[-test_size:]

スポンサーリンク

## 予測シミュレーション¶

results = []
for i in range(simulation_size):
print('simulation %d'%(i + 1))
results.append(forecast())

WARNING: Logging before flag parsing goes to stderr.
W0820 14:26:53.048468 139902564276032 deprecation.py:323] From <ipython-input-11-2500790da2db>:12: GRUCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
W0820 14:26:53.051277 139902564276032 deprecation.py:323] From <ipython-input-11-2500790da2db>:16: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.

simulation 1

W0820 14:26:58.087230 139902564276032 lazy_loader.py:50]
The TensorFlow contrib module will not be included in TensorFlow 2.0.
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

W0820 14:26:58.104156 139902564276032 deprecation.py:323] From <ipython-input-11-2500790da2db>:42: bidirectional_dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use keras.layers.Bidirectional(keras.layers.RNN(cell)), which is equivalent to this API
W0820 14:26:58.105021 139902564276032 deprecation.py:323] From /root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py:464: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use keras.layers.RNN(cell), which is equivalent to this API
W0820 14:26:58.631040 139902564276032 deprecation.py:506] From /root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0820 14:26:58.640178 139902564276032 deprecation.py:506] From /root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py:564: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0820 14:26:58.652508 139902564276032 deprecation.py:506] From /root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py:574: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0820 14:26:59.334117 139902564276032 deprecation.py:323] From <ipython-input-11-2500790da2db>:67: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
train loop: 100%|██████████| 300/300 [01:45<00:00,  2.83it/s, acc=98, cost=0.000971]

simulation 2

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:47<00:00,  2.79it/s, acc=97.5, cost=0.00157]

simulation 3

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:51<00:00,  2.68it/s, acc=98.2, cost=0.000775]

simulation 4

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:47<00:00,  2.79it/s, acc=28.6, cost=1.15]

simulation 5

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:49<00:00,  2.74it/s, acc=98.2, cost=0.000843]

simulation 6

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:50<00:00,  2.70it/s, acc=31.9, cost=1.07]

simulation 7

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:50<00:00,  2.72it/s, acc=98, cost=0.000969]

simulation 8

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:48<00:00,  2.77it/s, acc=97.7, cost=0.00128]

simulation 9

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:47<00:00,  2.79it/s, acc=42.1, cost=0.765]

simulation 10

/root/.pyenv/versions/miniconda3-latest/envs/py368/lib/python3.6/site-packages/tensorflow/python/client/session.py:1735: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call InteractiveSession.close() to release resources held by the other session(s).
warnings.warn('An interactive session is already active. This can '
train loop: 100%|██████████| 300/300 [01:49<00:00,  2.75it/s, acc=98.3, cost=0.000701]


accepted_results = []
for r in results:
if (np.array(r[-test_size:]) < np.min(df['Close'])).sum() == 0 and \
(np.array(r[-test_size:]) > np.max(df['Close']) * 2).sum() == 0:
accepted_results.append(r)
len(accepted_results)

5

accepted_results2 = []
for r in accepted_results:
if calculate_accuracy(df['Close'].iloc[-test_size:].values, r)>88:
accepted_results2.append(r)
len(accepted_results2)

3
accuracies = [calculate_accuracy(df['Close'].iloc[-test_size:].values, r) for r in accepted_results2]

plt.figure(figsize = (25, 15))
for no, r in enumerate(accepted_results2):
plt.plot(r, label = 'forecast %d'%(no + 1))
plt.plot(df['Close'].iloc[-test_size:].values, label = 'true trend', c = 'black',lw=2)
plt.legend(prop={'size': 25})
plt.rc('xtick', labelsize=30)
plt.rc('ytick', labelsize=25)
plt.title('average accuracy: %.4f'%(np.mean(accuracies)),fontsize=25)
plt.show()




スポンサーリンク

フォローする