Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
227 changes: 227 additions & 0 deletions test/cpp/api/rnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <torch/optim/adam.h>
#include <torch/types.h>
#include <torch/utils.h>
#include <ATen/core/grad_mode.h>

#include <test/cpp/api/support.h>

Expand Down Expand Up @@ -247,3 +248,229 @@ TEST_F(RNNTest, BidirectionalFlattenParameters) {
GRU gru(GRUOptions(100, 256).layers(2).bidirectional(true));
gru->flatten_parameters();
}

template <typename Impl>
void copyParameters(torch::nn::ModuleHolder<Impl>& target, size_t t_i,
const torch::nn::ModuleHolder<Impl>& source, size_t s_i) {
at::NoGradGuard guard;
target->w_ih[t_i].copy_(source->w_ih[s_i]);
target->w_hh[t_i].copy_(source->w_hh[s_i]);
target->b_ih[t_i].copy_(source->b_ih[s_i]);
target->b_hh[t_i].copy_(source->b_hh[s_i]);
}

// This test is a port of python code introduced here:
// https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66
// Reverse forward of bidrectional GRU should act
// as regular forward of unidirectional GRU
void BidirectionalGRUReverseForward(bool cuda) {
auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false)
.device(cuda ? torch::kCUDA : torch::kCPU);
auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1});
auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1});

auto gru_options = GRUOptions(1, 1).layers(1).batch_first(false);
GRU bi_grus {gru_options.bidirectional(true)};
GRU reverse_gru {gru_options.bidirectional(false)};

if (cuda) {
bi_grus->to(torch::kCUDA);
reverse_gru->to(torch::kCUDA);
}

// Now make sure the weights of the reverse gru layer match
// ones of the (reversed) bidirectional's:
copyParameters(reverse_gru, 0, bi_grus, 1);

auto bi_output = bi_grus->forward(input);
auto reverse_output = reverse_gru->forward(input_reversed);

if (cuda) {
bi_output.output = bi_output.output.to(torch::kCPU);
bi_output.state = bi_output.state.to(torch::kCPU);
reverse_output.output = reverse_output.output.to(torch::kCPU);
reverse_output.state = reverse_output.state.to(torch::kCPU);
}

ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0));
auto size = bi_output.output.size(0);
for (int i = 0; i < size; i++) {
ASSERT_EQ(bi_output.output[i][0][1].item<float>(),
reverse_output.output[size - 1 - i][0][0].item<float>());
}
// The hidden states of the reversed GRUs sits
// in the odd indices in the first dimension.
ASSERT_EQ(bi_output.state[1][0][0].item<float>(),
reverse_output.state[0][0][0].item<float>());
}

TEST_F(RNNTest, BidirectionalGRUReverseForward) {
BidirectionalGRUReverseForward(false);
}

TEST_F(RNNTest, BidirectionalGRUReverseForward_CUDA) {
BidirectionalGRUReverseForward(true);
}

// Reverse forward of bidrectional LSTM should act
// as regular forward of unidirectional LSTM
void BidirectionalLSTMReverseForwardTest(bool cuda) {
auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false)
.device(cuda ? torch::kCUDA : torch::kCPU);
auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1});
auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1});

auto lstm_opt = GRUOptions(1, 1).layers(1).batch_first(false);

LSTM bi_lstm {lstm_opt.bidirectional(true)};
LSTM reverse_lstm {lstm_opt.bidirectional(false)};

if (cuda) {
bi_lstm->to(torch::kCUDA);
reverse_lstm->to(torch::kCUDA);
}

// Now make sure the weights of the reverse lstm layer match
// ones of the (reversed) bidirectional's:
copyParameters(reverse_lstm, 0, bi_lstm, 1);

auto bi_output = bi_lstm->forward(input);
auto reverse_output = reverse_lstm->forward(input_reversed);

if (cuda) {
bi_output.output = bi_output.output.to(torch::kCPU);
bi_output.state = bi_output.state.to(torch::kCPU);
reverse_output.output = reverse_output.output.to(torch::kCPU);
reverse_output.state = reverse_output.state.to(torch::kCPU);
}

ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0));
auto size = bi_output.output.size(0);
for (int i = 0; i < size; i++) {
ASSERT_EQ(bi_output.output[i][0][1].item<float>(),
reverse_output.output[size - 1 - i][0][0].item<float>());
}
// The hidden states of the reversed LSTM sits
// in the odd indices in the first dimension.
ASSERT_EQ(bi_output.state[0][1][0][0].item<float>(),
reverse_output.state[0][0][0][0].item<float>());
ASSERT_EQ(bi_output.state[1][1][0][0].item<float>(),
reverse_output.state[1][0][0][0].item<float>());
}

TEST_F(RNNTest, BidirectionalLSTMReverseForward) {
BidirectionalLSTMReverseForwardTest(false);
}

TEST_F(RNNTest, BidirectionalLSTMReverseForward_CUDA) {
BidirectionalLSTMReverseForwardTest(true);
}

TEST_F(RNNTest, BidirectionalMultilayerGRU_CPU_vs_CUDA) {
// Create two GRUs with the same options
auto opt = GRUOptions(2, 4).layers(3).batch_first(false).bidirectional(true);
GRU gru_cpu {opt};
GRU gru_cuda {opt};

// Copy weights and biases from CPU GRU to CUDA GRU
{
at::NoGradGuard guard;
const auto num_directions = gru_cpu->options.bidirectional_ ? 2 : 1;
for (int64_t layer = 0; layer < gru_cpu->options.layers_; layer++) {
for (auto direction = 0; direction < num_directions; direction++) {
const auto layer_idx = (layer * num_directions) + direction;
copyParameters(gru_cuda, layer_idx, gru_cpu, layer_idx);
}
}
}

gru_cpu->flatten_parameters();
gru_cuda->flatten_parameters();

// Move GRU to CUDA
gru_cuda->to(torch::kCUDA);

// Create the same inputs
auto input_opt = torch::TensorOptions()
.dtype(torch::kFloat32).requires_grad(false);
auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt)
.reshape({3, 1, 2});
auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt)
.reshape({3, 1, 2}).to(torch::kCUDA);

// Call forward on both GRUs
auto output_cpu = gru_cpu->forward(input_cpu);
auto output_cuda = gru_cuda->forward(input_cuda);

output_cpu.output = output_cpu.output.to(torch::kCPU);
output_cpu.state = output_cpu.state.to(torch::kCPU);

// Assert that the output and state are equal on CPU and CUDA
ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim());
for (int i = 0; i < output_cpu.output.dim(); i++) {
ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i));
}
for (int i = 0; i < output_cpu.output.size(0); i++) {
for (int j = 0; j < output_cpu.output.size(1); j++) {
for (int k = 0; k < output_cpu.output.size(2); k++) {
ASSERT_NEAR(
output_cpu.output[i][j][k].item<float>(),
output_cuda.output[i][j][k].item<float>(), 1e-5);
}
}
}
}

TEST_F(RNNTest, BidirectionalMultilayerLSTM_CPU_vs_CUDA) {
// Create two LSTMs with the same options
auto opt = LSTMOptions(2, 4).layers(3).batch_first(false).bidirectional(true);
LSTM lstm_cpu {opt};
LSTM lstm_cuda {opt};

// Copy weights and biases from CPU LSTM to CUDA LSTM
{
at::NoGradGuard guard;
const auto num_directions = lstm_cpu->options.bidirectional_ ? 2 : 1;
for (int64_t layer = 0; layer < lstm_cpu->options.layers_; layer++) {
for (auto direction = 0; direction < num_directions; direction++) {
const auto layer_idx = (layer * num_directions) + direction;
copyParameters(lstm_cuda, layer_idx, lstm_cpu, layer_idx);
}
}
}

lstm_cpu->flatten_parameters();
lstm_cuda->flatten_parameters();

// Move LSTM to CUDA
lstm_cuda->to(torch::kCUDA);

auto options = torch::TensorOptions()
.dtype(torch::kFloat32).requires_grad(false);
auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, options)
.reshape({3, 1, 2});
auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, options)
.reshape({3, 1, 2}).to(torch::kCUDA);

// Call forward on both LSTMs
auto output_cpu = lstm_cpu->forward(input_cpu);
auto output_cuda = lstm_cuda->forward(input_cuda);

output_cpu.output = output_cpu.output.to(torch::kCPU);
output_cpu.state = output_cpu.state.to(torch::kCPU);

// Assert that the output and state are equal on CPU and CUDA
ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim());
for (int i = 0; i < output_cpu.output.dim(); i++) {
ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i));
}
for (int i = 0; i < output_cpu.output.size(0); i++) {
for (int j = 0; j < output_cpu.output.size(1); j++) {
for (int k = 0; k < output_cpu.output.size(2); k++) {
ASSERT_NEAR(
output_cpu.output[i][j][k].item<float>(),
output_cuda.output[i][j][k].item<float>(), 1e-5);
}
}
}
}
19 changes: 17 additions & 2 deletions torch/csrc/api/src/nn/modules/rnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,18 @@ void RNNImplBase<Derived>::to(torch::Dtype dtype, bool non_blocking) {
template <typename Derived>
void RNNImplBase<Derived>::to(torch::Device device, bool non_blocking) {
nn::Module::to(device, non_blocking);
const auto num_directions = options.bidirectional_ ? 2 : 1;
for (int64_t layer = 0; layer < options.layers_; layer++) {
for (auto direction = 0; direction < num_directions; direction++) {
const auto layer_idx = (layer * num_directions) + direction;
w_ih[layer_idx] = w_ih[layer_idx].to(device, non_blocking);
w_hh[layer_idx] = w_hh[layer_idx].to(device, non_blocking);
if (options.with_bias_) {
b_ih[layer_idx] = b_ih[layer_idx].to(device, non_blocking);
b_hh[layer_idx] = b_hh[layer_idx].to(device, non_blocking);
}
}
}
flatten_parameters();
}

Expand Down Expand Up @@ -144,8 +156,10 @@ RNNOutput RNNImplBase<Derived>::generic_forward(
if (!state.defined()) {
// #layers, batch size, state size
const auto batch_size = input.size(options.batch_first_ ? 0 : 1);
const auto num_directions = options.bidirectional_ ? 2 : 1;
state = torch::zeros(
{options.layers_, batch_size, options.hidden_size_}, input.options());
{options.layers_ * num_directions, batch_size, options.hidden_size_},
input.options());
}
Tensor output, new_state;
std::tie(output, new_state) = function(
Expand Down Expand Up @@ -269,8 +283,9 @@ RNNOutput LSTMImpl::forward(const Tensor& input, Tensor state) {
if (!state.defined()) {
// 2 for hidden state and cell state, then #layers, batch size, state size
const auto batch_size = input.size(options.batch_first_ ? 0 : 1);
const auto num_directions = options.bidirectional_ ? 2 : 1;
state = torch::zeros(
{2, options.layers_, batch_size, options.hidden_size_},
{2, options.layers_ * num_directions, batch_size, options.hidden_size_},
input.options());
}
Tensor output, hidden_state, cell_state;
Expand Down