Multi layer RNN with LSTM in Tensorflow
I have coded a single layer RNN with LSTM in Tensorflow (ver 1.5) by Python (ver 3.6). I would like to add 3 hidden layers to this RNN (i.e one input layer, one output layer, and three hidden layers). I have read about cell's state, stack, unstack and etc. but I still confuse how to put these things togather and upgrade my code. Below is my code in single layer RNN. Could you please help me to upgrade the code (Note: I am so new to Tensorflow and Python :) ). `
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import sklearn.metrics as sm
inputs = 12 #number of columns input
num_hidden = 800 #number of neurons in the layer
outputs = 1 #number of columns output
num_epochs = 100
batch_size = 80
learning_rate = 0.00001
# Training data
input1 = []
output1 = []
with open('train1_leading.csv', 'r') as csv_f:
data = csv.reader (csv_f)
for row in data:
input1.append (row [0:inputs])
output1.append (row [inputs])
csv_f.close()
input11 = []
for i in range(0, len(input1)):
input11.append([])
for j in range(0, inputs):
input11[i].append(float(input1[i][j]))
output1 = [float(x) for x in output1]
input2 = np.array(input11)
output2 = np.array(output1)
x_data = input2[:(len(input2)(len(input2) % batch_size))]
x_batches = x_data.reshape(1, batch_size, inputs)
y_data = output2[:(len(output2)(len(output2) % batch_size))]
y_batches = y_data.reshape(1, batch_size, outputs)
# Testing data
inputt = []
outputt = []
with open('valid1_leading.csv', 'r') as csv_f:
data = csv.reader (csv_f)
for row in data:
inputt.append (row [0:inputs])
outputt.append (row [inputs])
csv_f.close()
inputtt = []
for i in range(0, len(inputt)):
inputtt.append([])
for j in range(0, inputs):
inputtt[i].append(float(inputt[i][j]))
outputt = [float(x) for x in outputt]
inputt1 = np.array(inputtt)
output1 = np.array(outputt)
X_test = inputt1[:batch_size].reshape(1, batch_size, inputs)
Y_test = output1[(batch_size):].reshape(1, batch_size, outputs)
# Configure RNN
tf.reset_default_graph() #reset graphs
X = tf.placeholder(tf.float32, [None, batch_size, inputs]) #create variables
Y = tf.placeholder(tf.float32, [None, batch_size, outputs]) #create variables
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=num_hidden, activation=tf.nn.softsign) #create RNN object
rnn_output, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)
stacked_rnn_output = tf.reshape(rnn_output, [1, num_hidden])
weight = tf.Variable(tf.random_normal([num_hidden, outputs]))
bias = tf.Variable(tf.random_normal([outputs]))
stacked_outputs = tf.matmul(stacked_rnn_output, weight) + bias
outputRNN = tf.reshape(stacked_outputs, [1, batch_size, outputs]) #results
loss = tf.losses.mean_squared_error(outputRNN, Y) #cost function
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer()) #initialize all the variables
for ep in range(num_epochs):
sess.run(training_op, feed_dict={X: x_batches, Y: y_batches})
mse = loss.eval(feed_dict={X: x_batches, Y: y_batches})
print(ep, "\tMSE:", mse)
y_pred = sess.run(outputRNN, feed_dict={X: X_test})
plt.title("Forecast vs Actual", fontsize=14)
plt.plot(pd.Series(np.ravel(Y_test)), "b", markersize=10, label="Actual")
plt.plot(pd.Series(np.ravel(y_pred)), "r", markersize=10, label="Forecast")
plt.legend(loc="upper left")
plt.xlabel("Time Periods")
plt.show()
tt = sm.mean_squared_error(np.ravel(Y_test), np.ravel(y_pred))
print ('MSE of Test data', tt)`
2 answers

I've used a three layer LSTM with a
dynamic_rnn
below, I'm sure you can adopt this use case to whatever is required.import tensorflow as tf num_layers = 3 state_size = 100 init_state = tf.placeholder(tf.float32, [num_layers, 2, None, state_size]) # None is for batch_size state_per_layer_list = tf.unstack(init_state, axis=0) rnn_tuple_state = tuple( [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) for idx in range(num_layers)] ) cell = tf.contrib.rnn.LSTMCell(state_size, state_is_tuple=True) cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True) x = tf.placeholder(tf.float32, [None, None, state_size]) current_pred, new_states = tf.nn.dynamic_rnn(cell, x, initial_state=rnn_tuple_state) new_states = tf.stack(new_states)
new_states
isTensor("stack:0", shape=(3, 2, ?, 100), dtype=float32)
, which can now be feed into theinit_state
placeholder for the next run. 
Here is code that works for me. Check out https://www.tensorflow.org/tutorials/recurrent for more info on this topic. The dynamic_rnn handles the passing of states and inputs.
def rnn_cell(): return tf.contrib.rnn.BasicRNNCell(num_units=num_hidden, activation=tf.nn.softsign) self.stacked_rnn = tf.contrib.rnn.MultiRNNCell([rnn_cell() for _ in range(num_layers)]) final_outputs, final_state = tf.nn.dynamic_rnn(cell=self.stacked_rnn, inputs=self.input_x, dtype=tf.float32)
If you want more visibility you can follow the example from the link above
def lstm_cell(): return tf.contrib.rnn.BasicLSTMCell(lstm_size) stacked_lstm = tf.contrib.rnn.MultiRNNCell( [lstm_cell() for _ in range(number_of_layers)]) initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32) for i in range(num_steps): # The value of state is updated after processing each batch of words. output, state = stacked_lstm(words[:, i], state) # The rest of the code. # ... final_state = state