|
5 | 5 | #include <torch/optim/adam.h> |
6 | 6 | #include <torch/types.h> |
7 | 7 | #include <torch/utils.h> |
| 8 | +#include <ATen/core/grad_mode.h> |
8 | 9 |
|
9 | 10 | #include <test/cpp/api/support.h> |
10 | 11 |
|
@@ -247,3 +248,229 @@ TEST_F(RNNTest, BidirectionalFlattenParameters) { |
247 | 248 | GRU gru(GRUOptions(100, 256).layers(2).bidirectional(true)); |
248 | 249 | gru->flatten_parameters(); |
249 | 250 | } |
| 251 | + |
| 252 | +template <typename Impl> |
| 253 | +void copyParameters(torch::nn::ModuleHolder<Impl>& target, size_t t_i, |
| 254 | + const torch::nn::ModuleHolder<Impl>& source, size_t s_i) { |
| 255 | + at::NoGradGuard guard; |
| 256 | + target->w_ih[t_i].copy_(source->w_ih[s_i]); |
| 257 | + target->w_hh[t_i].copy_(source->w_hh[s_i]); |
| 258 | + target->b_ih[t_i].copy_(source->b_ih[s_i]); |
| 259 | + target->b_hh[t_i].copy_(source->b_hh[s_i]); |
| 260 | +} |
| 261 | + |
| 262 | +// This test is a port of python code introduced here: |
| 263 | +// https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66 |
| 264 | +// Reverse forward of bidrectional GRU should act |
| 265 | +// as regular forward of unidirectional GRU |
| 266 | +void BidirectionalGRUReverseForward(bool cuda) { |
| 267 | + auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false) |
| 268 | + .device(cuda ? torch::kCUDA : torch::kCPU); |
| 269 | + auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1}); |
| 270 | + auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1}); |
| 271 | + |
| 272 | + auto gru_options = GRUOptions(1, 1).layers(1).batch_first(false); |
| 273 | + GRU bi_grus {gru_options.bidirectional(true)}; |
| 274 | + GRU reverse_gru {gru_options.bidirectional(false)}; |
| 275 | + |
| 276 | + if (cuda) { |
| 277 | + bi_grus->to(torch::kCUDA); |
| 278 | + reverse_gru->to(torch::kCUDA); |
| 279 | + } |
| 280 | + |
| 281 | + // Now make sure the weights of the reverse gru layer match |
| 282 | + // ones of the (reversed) bidirectional's: |
| 283 | + copyParameters(reverse_gru, 0, bi_grus, 1); |
| 284 | + |
| 285 | + auto bi_output = bi_grus->forward(input); |
| 286 | + auto reverse_output = reverse_gru->forward(input_reversed); |
| 287 | + |
| 288 | + if (cuda) { |
| 289 | + bi_output.output = bi_output.output.to(torch::kCPU); |
| 290 | + bi_output.state = bi_output.state.to(torch::kCPU); |
| 291 | + reverse_output.output = reverse_output.output.to(torch::kCPU); |
| 292 | + reverse_output.state = reverse_output.state.to(torch::kCPU); |
| 293 | + } |
| 294 | + |
| 295 | + ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0)); |
| 296 | + auto size = bi_output.output.size(0); |
| 297 | + for (int i = 0; i < size; i++) { |
| 298 | + ASSERT_EQ(bi_output.output[i][0][1].item<float>(), |
| 299 | + reverse_output.output[size - 1 - i][0][0].item<float>()); |
| 300 | + } |
| 301 | + // The hidden states of the reversed GRUs sits |
| 302 | + // in the odd indices in the first dimension. |
| 303 | + ASSERT_EQ(bi_output.state[1][0][0].item<float>(), |
| 304 | + reverse_output.state[0][0][0].item<float>()); |
| 305 | +} |
| 306 | + |
| 307 | +TEST_F(RNNTest, BidirectionalGRUReverseForward) { |
| 308 | + BidirectionalGRUReverseForward(false); |
| 309 | +} |
| 310 | + |
| 311 | +TEST_F(RNNTest, BidirectionalGRUReverseForward_CUDA) { |
| 312 | + BidirectionalGRUReverseForward(true); |
| 313 | +} |
| 314 | + |
| 315 | +// Reverse forward of bidrectional LSTM should act |
| 316 | +// as regular forward of unidirectional LSTM |
| 317 | +void BidirectionalLSTMReverseForwardTest(bool cuda) { |
| 318 | + auto opt = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false) |
| 319 | + .device(cuda ? torch::kCUDA : torch::kCPU); |
| 320 | + auto input = torch::tensor({1, 2, 3, 4, 5}, opt).reshape({5, 1, 1}); |
| 321 | + auto input_reversed = torch::tensor({5, 4, 3, 2, 1}, opt).reshape({5, 1, 1}); |
| 322 | + |
| 323 | + auto lstm_opt = GRUOptions(1, 1).layers(1).batch_first(false); |
| 324 | + |
| 325 | + LSTM bi_lstm {lstm_opt.bidirectional(true)}; |
| 326 | + LSTM reverse_lstm {lstm_opt.bidirectional(false)}; |
| 327 | + |
| 328 | + if (cuda) { |
| 329 | + bi_lstm->to(torch::kCUDA); |
| 330 | + reverse_lstm->to(torch::kCUDA); |
| 331 | + } |
| 332 | + |
| 333 | + // Now make sure the weights of the reverse lstm layer match |
| 334 | + // ones of the (reversed) bidirectional's: |
| 335 | + copyParameters(reverse_lstm, 0, bi_lstm, 1); |
| 336 | + |
| 337 | + auto bi_output = bi_lstm->forward(input); |
| 338 | + auto reverse_output = reverse_lstm->forward(input_reversed); |
| 339 | + |
| 340 | + if (cuda) { |
| 341 | + bi_output.output = bi_output.output.to(torch::kCPU); |
| 342 | + bi_output.state = bi_output.state.to(torch::kCPU); |
| 343 | + reverse_output.output = reverse_output.output.to(torch::kCPU); |
| 344 | + reverse_output.state = reverse_output.state.to(torch::kCPU); |
| 345 | + } |
| 346 | + |
| 347 | + ASSERT_EQ(bi_output.output.size(0), reverse_output.output.size(0)); |
| 348 | + auto size = bi_output.output.size(0); |
| 349 | + for (int i = 0; i < size; i++) { |
| 350 | + ASSERT_EQ(bi_output.output[i][0][1].item<float>(), |
| 351 | + reverse_output.output[size - 1 - i][0][0].item<float>()); |
| 352 | + } |
| 353 | + // The hidden states of the reversed LSTM sits |
| 354 | + // in the odd indices in the first dimension. |
| 355 | + ASSERT_EQ(bi_output.state[0][1][0][0].item<float>(), |
| 356 | + reverse_output.state[0][0][0][0].item<float>()); |
| 357 | + ASSERT_EQ(bi_output.state[1][1][0][0].item<float>(), |
| 358 | + reverse_output.state[1][0][0][0].item<float>()); |
| 359 | +} |
| 360 | + |
| 361 | +TEST_F(RNNTest, BidirectionalLSTMReverseForward) { |
| 362 | + BidirectionalLSTMReverseForwardTest(false); |
| 363 | +} |
| 364 | + |
| 365 | +TEST_F(RNNTest, BidirectionalLSTMReverseForward_CUDA) { |
| 366 | + BidirectionalLSTMReverseForwardTest(true); |
| 367 | +} |
| 368 | + |
| 369 | +TEST_F(RNNTest, BidirectionalMultilayerGRU_CPU_vs_CUDA) { |
| 370 | + // Create two GRUs with the same options |
| 371 | + auto opt = GRUOptions(2, 4).layers(3).batch_first(false).bidirectional(true); |
| 372 | + GRU gru_cpu {opt}; |
| 373 | + GRU gru_cuda {opt}; |
| 374 | + |
| 375 | + // Copy weights and biases from CPU GRU to CUDA GRU |
| 376 | + { |
| 377 | + at::NoGradGuard guard; |
| 378 | + const auto num_directions = gru_cpu->options.bidirectional_ ? 2 : 1; |
| 379 | + for (int64_t layer = 0; layer < gru_cpu->options.layers_; layer++) { |
| 380 | + for (auto direction = 0; direction < num_directions; direction++) { |
| 381 | + const auto layer_idx = (layer * num_directions) + direction; |
| 382 | + copyParameters(gru_cuda, layer_idx, gru_cpu, layer_idx); |
| 383 | + } |
| 384 | + } |
| 385 | + } |
| 386 | + |
| 387 | + gru_cpu->flatten_parameters(); |
| 388 | + gru_cuda->flatten_parameters(); |
| 389 | + |
| 390 | + // Move GRU to CUDA |
| 391 | + gru_cuda->to(torch::kCUDA); |
| 392 | + |
| 393 | + // Create the same inputs |
| 394 | + auto input_opt = torch::TensorOptions() |
| 395 | + .dtype(torch::kFloat32).requires_grad(false); |
| 396 | + auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt) |
| 397 | + .reshape({3, 1, 2}); |
| 398 | + auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, input_opt) |
| 399 | + .reshape({3, 1, 2}).to(torch::kCUDA); |
| 400 | + |
| 401 | + // Call forward on both GRUs |
| 402 | + auto output_cpu = gru_cpu->forward(input_cpu); |
| 403 | + auto output_cuda = gru_cuda->forward(input_cuda); |
| 404 | + |
| 405 | + output_cpu.output = output_cpu.output.to(torch::kCPU); |
| 406 | + output_cpu.state = output_cpu.state.to(torch::kCPU); |
| 407 | + |
| 408 | + // Assert that the output and state are equal on CPU and CUDA |
| 409 | + ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim()); |
| 410 | + for (int i = 0; i < output_cpu.output.dim(); i++) { |
| 411 | + ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i)); |
| 412 | + } |
| 413 | + for (int i = 0; i < output_cpu.output.size(0); i++) { |
| 414 | + for (int j = 0; j < output_cpu.output.size(1); j++) { |
| 415 | + for (int k = 0; k < output_cpu.output.size(2); k++) { |
| 416 | + ASSERT_NEAR( |
| 417 | + output_cpu.output[i][j][k].item<float>(), |
| 418 | + output_cuda.output[i][j][k].item<float>(), 1e-5); |
| 419 | + } |
| 420 | + } |
| 421 | + } |
| 422 | +} |
| 423 | + |
| 424 | +TEST_F(RNNTest, BidirectionalMultilayerLSTM_CPU_vs_CUDA) { |
| 425 | + // Create two LSTMs with the same options |
| 426 | + auto opt = LSTMOptions(2, 4).layers(3).batch_first(false).bidirectional(true); |
| 427 | + LSTM lstm_cpu {opt}; |
| 428 | + LSTM lstm_cuda {opt}; |
| 429 | + |
| 430 | + // Copy weights and biases from CPU LSTM to CUDA LSTM |
| 431 | + { |
| 432 | + at::NoGradGuard guard; |
| 433 | + const auto num_directions = lstm_cpu->options.bidirectional_ ? 2 : 1; |
| 434 | + for (int64_t layer = 0; layer < lstm_cpu->options.layers_; layer++) { |
| 435 | + for (auto direction = 0; direction < num_directions; direction++) { |
| 436 | + const auto layer_idx = (layer * num_directions) + direction; |
| 437 | + copyParameters(lstm_cuda, layer_idx, lstm_cpu, layer_idx); |
| 438 | + } |
| 439 | + } |
| 440 | + } |
| 441 | + |
| 442 | + lstm_cpu->flatten_parameters(); |
| 443 | + lstm_cuda->flatten_parameters(); |
| 444 | + |
| 445 | + // Move LSTM to CUDA |
| 446 | + lstm_cuda->to(torch::kCUDA); |
| 447 | + |
| 448 | + auto options = torch::TensorOptions() |
| 449 | + .dtype(torch::kFloat32).requires_grad(false); |
| 450 | + auto input_cpu = torch::tensor({1, 2, 3, 4, 5, 6}, options) |
| 451 | + .reshape({3, 1, 2}); |
| 452 | + auto input_cuda = torch::tensor({1, 2, 3, 4, 5, 6}, options) |
| 453 | + .reshape({3, 1, 2}).to(torch::kCUDA); |
| 454 | + |
| 455 | + // Call forward on both LSTMs |
| 456 | + auto output_cpu = lstm_cpu->forward(input_cpu); |
| 457 | + auto output_cuda = lstm_cuda->forward(input_cuda); |
| 458 | + |
| 459 | + output_cpu.output = output_cpu.output.to(torch::kCPU); |
| 460 | + output_cpu.state = output_cpu.state.to(torch::kCPU); |
| 461 | + |
| 462 | + // Assert that the output and state are equal on CPU and CUDA |
| 463 | + ASSERT_EQ(output_cpu.output.dim(), output_cuda.output.dim()); |
| 464 | + for (int i = 0; i < output_cpu.output.dim(); i++) { |
| 465 | + ASSERT_EQ(output_cpu.output.size(i), output_cuda.output.size(i)); |
| 466 | + } |
| 467 | + for (int i = 0; i < output_cpu.output.size(0); i++) { |
| 468 | + for (int j = 0; j < output_cpu.output.size(1); j++) { |
| 469 | + for (int k = 0; k < output_cpu.output.size(2); k++) { |
| 470 | + ASSERT_NEAR( |
| 471 | + output_cpu.output[i][j][k].item<float>(), |
| 472 | + output_cuda.output[i][j][k].item<float>(), 1e-5); |
| 473 | + } |
| 474 | + } |
| 475 | + } |
| 476 | +} |
0 commit comments