pytorch · goldsborough · May 22, 2018 · May 22, 2018 · May 22, 2018
diff --git a/aten/src/ATen/TensorImpl.cpp b/aten/src/ATen/TensorImpl.cpp
@@ -1,5 +1,7 @@
 #include <ATen/TensorImpl.h>
+
 #include <ATen/Tensor.h>
+#include <ATen/optional.h>
 
 namespace at {
 Tensor& TensorImpl::grad() {
@@ -14,6 +16,13 @@ Tensor TensorImpl::detach() const {
   AT_ERROR("detach is not implemented for Tensor");
 }
 
+void TensorImpl::backward(
+    at::optional<Tensor> gradient,
+    bool keep_graph,
+    bool create_graph) {
+  AT_ERROR("backward is not implemented for Tensor");
+}
+
 void TensorImpl::set_data(Tensor new_data) {
   AT_ERROR("set_type is not implemented for Tensor");
 }

diff --git a/aten/src/ATen/TensorImpl.h b/aten/src/ATen/TensorImpl.h
@@ -11,6 +11,9 @@ class Scalar;
 struct Type;
 struct Storage;
 struct Tensor;
+
+template<typename T>
+class optional;
 } // namespace at
 
 namespace at {
@@ -71,6 +74,11 @@ struct TensorImpl : public Retainable {
     AT_ERROR("detach_ is not implemented for Tensor");
   }
 
+  AT_API virtual void backward(
+      at::optional<Tensor> gradient,
+      bool keep_graph,
+      bool create_graph);
+
   AT_API virtual void set_data(Tensor new_data);
 
 protected:

diff --git a/aten/src/ATen/templates/Tensor.h b/aten/src/ATen/templates/Tensor.h
@@ -149,6 +149,14 @@ struct Tensor : public detail::TensorBase {
     pImpl->detach_();
   }
 
+  /// Computes the gradient of current tensor w.r.t. graph leaves.
+  void backward(
+      at::optional<Tensor> gradient = at::nullopt,
+      bool keep_graph = false,
+      bool create_graph = false) {
+    pImpl->backward(std::move(gradient), keep_graph, create_graph);
+  }
+
   friend void detail::set_data(Tensor& tensor, Tensor new_data);
 
   // STOP.  Thinking of adding a method here, which only makes use
@@ -158,10 +166,10 @@ struct Tensor : public detail::TensorBase {
   //Tensor * add(Tensor & b);
   ${tensor_method_declarations}
 
-  template <typename F, typename... Args> 
+  template <typename F, typename... Args>
   auto m(F func, Args&&... params) const -> decltype(func(*this, std::forward<Args>(params)...)) {
     return func(*this, std::forward<Args>(params)...);
-  } 
+  }
 };
 
 namespace detail {

diff --git a/test/cpp/api/container.cpp b/test/cpp/api/container.cpp
@@ -46,7 +46,7 @@ TEST_CASE("containers") {
       auto y = model->forward({x})[0];
       Variable s = y.sum();
 
-      backward(s);
+      s.backward();
       REQUIRE(y.ndimension() == 3);
       REQUIRE(s.ndimension() == 0);
       for (auto i = 0; i < 3; i++) {
@@ -62,7 +62,7 @@ TEST_CASE("containers") {
         auto y = model->forward({x})[0];
         Variable s = y.sum();
 
-        backward(s);
+        s.backward();
         REQUIRE(y.ndimension() == 4);
         REQUIRE(s.ndimension() == 0);
         for (auto i = 0; i < 4; i++) {
@@ -79,7 +79,7 @@ TEST_CASE("containers") {
         auto y = model->forward({x})[0];
         Variable s = y.sum();
 
-        backward(s);
+        s.backward();
         REQUIRE(y.ndimension() == 4);
         REQUIRE(s.ndimension() == 0);
         for (auto i = 0; i < 4; i++) {
@@ -96,7 +96,7 @@ TEST_CASE("containers") {
       auto y = model->forward({x})[0];
       Variable s = y.sum();
 
-      backward(s);
+      s.backward();
       REQUIRE(y.ndimension() == 5);
       REQUIRE(s.ndimension() == 0);
       for (auto i = 0; i < 5; i++) {
@@ -114,7 +114,7 @@ TEST_CASE("containers") {
       auto y = model->forward({x})[0];
       Variable s = y.sum();
 
-      backward(s);
+      s.backward();
       REQUIRE(y.ndimension() == 2);
       REQUIRE(s.ndimension() == 0);
       REQUIRE(y.size(0) == 10);
@@ -135,7 +135,7 @@ TEST_CASE("containers") {
         x = x.clamp_min(0); // relu
       }
 
-      backward(x);
+      x.backward();
       REQUIRE(x.ndimension() == 2);
       REQUIRE(x.size(0) == 1000);
       REQUIRE(x.size(1) == 100);
@@ -153,7 +153,7 @@ TEST_CASE("containers") {
       x = l2->forward({x})[0].clamp_min(0);
       x = l3->forward({x})[0].clamp_min(0);
 
-      backward(x);
+      x.backward();
       REQUIRE(x.ndimension() == 2);
       REQUIRE(x.size(0) == 1000);
       REQUIRE(x.size(1) == 100);
@@ -171,7 +171,7 @@ TEST_CASE("containers") {
       auto y = model->forward({x})[0];
       Variable s = y.sum();
 
-      backward(s);
+      s.backward();
       REQUIRE(y.ndimension() == 2);
       REQUIRE(s.ndimension() == 0);
       REQUIRE(y.size(0) == 10);
@@ -186,7 +186,7 @@ TEST_CASE("containers") {
       auto y = model->forward({x})[0];
       Variable s = y.sum();
 
-      backward(s);
+      s.backward();
       REQUIRE(y.ndimension() == 3);
       REQUIRE(y.size(0) == 2);
       REQUIRE(y.size(1) == 3);
@@ -199,7 +199,7 @@ TEST_CASE("containers") {
     Variable x = Var(at::CPU(at::kFloat).ones(100));
     Variable y = dropout->forward({x})[0];
 
-    backward(y);
+    y.backward();
     REQUIRE(y.ndimension() == 1);
     REQUIRE(y.size(0) == 100);
     // TODO: These two tests are flaky
@@ -254,7 +254,7 @@ TEST_CASE("containers_cuda", "[cuda]") {
     auto y = model->forward({x})[0];
     Variable s = y.sum();
 
-    backward(s);
+    s.backward();
     REQUIRE(y.ndimension() == 2);
     REQUIRE(s.ndimension() == 0);
     REQUIRE(y.size(0) == 10);
@@ -271,7 +271,7 @@ TEST_CASE("containers_cuda", "[cuda]") {
     auto y = model->forward({x})[0];
     Variable s = y.sum();
 
-    backward(s);
+    s.backward();
     REQUIRE(y.ndimension() == 2);
     REQUIRE(s.ndimension() == 0);
     REQUIRE(y.size(0) == 10);

diff --git a/test/cpp/api/integration.cpp b/test/cpp/api/integration.cpp
@@ -203,7 +203,7 @@ bool test_mnist(
       Variable loss = at::nll_loss(x, y);
 
       optim->zero_grad();
-      backward(loss);
+      loss.backward();
       optim->step();
     }
   }
@@ -278,7 +278,7 @@ TEST_CASE("integration") {
       auto loss = at::stack(policy_loss).sum() + at::stack(value_loss).sum();
 
       optim->zero_grad();
-      backward(loss);
+      loss.backward();
       optim->step();
 
       rewards.clear();

diff --git a/test/cpp/api/misc.cpp b/test/cpp/api/misc.cpp
@@ -16,7 +16,7 @@ TEST_CASE("misc") {
     auto y = model->forward({x})[0];
     Variable s = y.sum();
 
-    backward(s);
+    s.backward();
     REQUIRE(!model->parameters()["weight"].grad().defined());
   }
 
@@ -45,6 +45,28 @@ TEST_CASE("misc_cuda", "[cuda]") {
   }
 }
 
+TEST_CASE("autograd") {
+  auto x = autograd::make_variable(
+      at::randn(at::CPU(at::kFloat), {3, 3}), /*requires_grad=*/true);
+  auto y = autograd::make_variable(
+      at::randn(at::CPU(at::kFloat), {3, 3}), /*requires_grad=*/false);
+  auto z = x * y;
+  SECTION("derivatives of zero-dim tensors") {
+    z.sum().backward();
+    REQUIRE(x.grad().allclose(y));
+  }
+  SECTION("derivatives of tensors") {
+    z.backward();
+    REQUIRE(x.grad().allclose(y));
+  }
+  SECTION("custom gradient inputs") {
+    z.sum().backward(
+        autograd::make_variable(at::ones(at::CPU(at::kFloat), {1}) * 2));
+    REQUIRE(x.grad().allclose(y * 2));
+  }
+  // Assume everything else is safe from PyTorch tests.
+}
+
 TEST_CASE("expanding-array") {
   SECTION("successful construction") {
     SECTION("initializer_list") {

diff --git a/test/cpp/api/module.cpp b/test/cpp/api/module.cpp
@@ -47,7 +47,7 @@ TEST_CASE("module/zero-grad") {
   auto model = Linear(3, 4).build();
   auto weight = Var(at::ones(at::CPU(at::kFloat), {8, 3}));
   auto loss = model->forward({weight}).front().sum();
-  backward(loss);
+  loss.backward();
   for (auto& parameter : model->parameters()) {
     Variable grad = parameter.second.grad();
     REQUIRE(grad.defined());

diff --git a/test/cpp/api/optim.cpp b/test/cpp/api/optim.cpp
@@ -30,7 +30,7 @@ bool test_optimizer_xor(Optimizer optim, std::shared_ptr<ContainerList> model) {
       for (auto& layer : *model)
         x = layer->forward({x})[0].sigmoid_();
       Variable loss = at::binary_cross_entropy(x, target);
-      backward(loss);
+      loss.backward();
       return at::Scalar(loss.data());
     };
 

diff --git a/test/cpp/api/rnn.cpp b/test/cpp/api/rnn.cpp
@@ -50,7 +50,7 @@ bool test_RNN_xor(Func&& model_maker, bool cuda = false) {
     Variable loss = at::mse_loss(x, y);
 
     optim->zero_grad();
-    backward(loss);
+    loss.backward();
     optim->step();
 
     running_loss = running_loss * 0.99 + loss.toCFloat() * 0.01;
@@ -92,7 +92,7 @@ TEST_CASE("rnn") {
       auto tup = model->forward({x});
       auto y = x.mean();
 
-      backward(y);
+      y.backward();
       check_lstm_sizes(tup);
 
       auto next = model->forward({x, tup[1]});
@@ -195,7 +195,7 @@ TEST_CASE("rnn_cuda", "[cuda]") {
     auto tup = model->forward({x});
     auto y = x.mean();
 
-    backward(y);
+    y.backward();
     check_lstm_sizes(tup);
 
     auto next = model->forward({x, tup[1]});

diff --git a/test/cpp/api/serialization.cpp b/test/cpp/api/serialization.cpp
@@ -189,7 +189,7 @@ TEST_CASE("serialization") {
     while (running_loss > 0.1) {
       Variable loss = getLoss(model, 4);
       optim->zero_grad();
-      backward(loss);
+      loss.backward();
       optim->step();
 
       running_loss = running_loss * 0.99 + loss.data().sum().toCFloat() * 0.01;
@@ -229,7 +229,7 @@ TEST_CASE("serialization") {
     auto step = [&](Optimizer optim, std::shared_ptr<Module> model) {
       optim->zero_grad();
       auto y = model->forward({x})[0].sum();
-      backward(y);
+      y.backward();
       optim->step();
     };
 
@@ -300,7 +300,7 @@ TEST_CASE("serialization_cuda", "[cuda]") {
     while (running_loss > 0.1) {
       Variable loss = getLoss(model, 4);
       optim->zero_grad();
-      backward(loss);
+      loss.backward();
       optim->step();
 
       running_loss = running_loss * 0.99 + loss.data().sum().toCFloat() * 0.01;

diff --git a/torch/csrc/api/include/torch/detail.h b/torch/csrc/api/include/torch/detail.h
@@ -14,10 +14,10 @@
 #define TORCH_AUTOGRAD_OPTIMIZER_CLASS(Type) \
   class Type : public torch::Optimizer_CRTP<Type>
 #define TORCH_AUTOGRAD_KWARG(CLS, TYP, NAME, DEFAULT, OPTION) \
-  TYP NAME##_ = DEFAULT;                                \
-  CLS& NAME(TYP x = OPTION) {                           \
-    NAME##_ = x;                                        \
-    return *this;                                       \
+  TYP NAME##_ = DEFAULT;                                      \
+  CLS& NAME(TYP x = OPTION) {                                 \
+    NAME##_ = x;                                              \
+    return *this;                                             \
   }
 
 namespace {
@@ -26,10 +26,6 @@ using IntVec = decltype(std::declval<at::IntList>().vec());
 } // namespace
 
 namespace torch {
-namespace detail {
-extern tag::Engine engine;
-}
-
 namespace nn {
 class Module;
 } // namespace nn
@@ -40,8 +36,6 @@ using variable_list = tag::variable_list;
 using Tensor = at::Tensor;
 using Optimizer = std::shared_ptr<OptimizerImpl>;
 
-void backward(Tensor loss, bool keep_graph = false);
-
 inline Variable Var(at::Tensor data, bool requires_grad = true) {
   return tag::make_variable(data, requires_grad);
 }

diff --git a/torch/csrc/api/src/detail.cpp b/torch/csrc/api/src/detail.cpp
@@ -8,24 +8,6 @@
 #include "torch/detail.h"
 
 namespace torch {
-namespace detail {
-tag::Engine engine;
-}
-
-void backward(Variable loss, bool keep_graph) {
-  tag::edge_list edgelst;
-  tag::variable_list varlst;
-  edgelst.emplace_back(loss.grad_fn(), loss.output_nr());
-  varlst.emplace_back(Var(at::ones_like(loss.data()), false));
-  // create_graph should be set to true when we want to support double bwd
-  detail::engine.execute(edgelst, varlst, keep_graph, false);
-}
-
-void backward(Tensor loss, bool keep_graph) {
-  Variable tmp(loss);
-  backward(tmp, keep_graph);
-}
-
 void setSeed(uint64_t seed) {
   // TODO: Move this to at::Context
   at::globalContext().defaultGenerator(at::Backend::CPU).manualSeed(seed);

diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
@@ -478,7 +478,7 @@ auto Engine::execute(const edge_list& input_roots,
 }
 
 #ifdef NO_PYTHON
-Engine& Engine::getDefaultEngine() {
+Engine& Engine::get_default_engine() {
   static Engine engine;
   return engine;
 }