Remove unboxedAutogradKernel from c10 (#26130)

smessmer · facebook-github-bot · commit 26d537d744cf · 2019-09-15T01:18:11.000-07:00
Summary: Pull Request resolved: #26130 Since we now just use TensorTypeId::VariableTensorId, there's no need to treat autograd kernels any differently. ghstack-source-id: 90130457 Test Plan: unit tests Differential Revision: D17353873 fbshipit-source-id: d4468506a5366bc5e7429144b090b3e78af9de62
diff --git a/aten/src/ATen/core/dispatch/Dispatcher.cpp b/aten/src/ATen/core/dispatch/Dispatcher.cpp
@@ -126,11 +126,6 @@ RegistrationHandleRAII Dispatcher::registerCatchallKernel(const OperatorHandle&
   return op.operatorIterator_->op.registerCatchallKernel(DispatchTableEntry{kernel_func, std::move(cache_creator_func), unboxed_kernel_func});
 }
 
-RegistrationHandleRAII Dispatcher::registerUnboxedAutogradKernel(const OperatorHandle& op, void* unboxed_autograd_kernel) {
-  // note: this doesn't need the mutex to protect the iterator because write operations on the list keep iterators intact.
-  return op.operatorIterator_->op.registerUnboxedAutogradKernel(unboxed_autograd_kernel);
-}
-
 void Dispatcher::addRegistrationListener(std::unique_ptr<OpRegistrationListener> listener) {
   std::lock_guard<std::mutex> lock(mutex_);
 
diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h
@@ -90,8 +90,6 @@ class CAFFE2_API Dispatcher final {
    */
   RegistrationHandleRAII registerCatchallKernel(const OperatorHandle& op, KernelFunction* kernel_func, KernelCacheCreatorFunction cache_creator_func, void* unboxed_kernel_func);
 
-  RegistrationHandleRAII registerUnboxedAutogradKernel(const OperatorHandle& op, void* unboxed_autograd_kernel);
-
   /**
    * Perform a dynamic dispatch and get the kernel for an operator.
    */
@@ -104,11 +102,6 @@ class CAFFE2_API Dispatcher final {
   // the (unboxed?) arguments the operator is to be called with.
   OpKernel lookup(const OperatorHandle& op, TensorTypeId dispatchKey) const;
 
-  // TODO Remove callUnboxedAutogradKernel() and instead figure out in a generic
-  // callKernel() wrapper if the autograd or the regular kernel need to be called.
-  template<class Result, class... Args>
-  Result callUnboxedAutogradKernel(const OperatorHandle& op, Args... args) const;
-
   /**
    * Add a listener that gets called whenever a new op is registered or an existing
    * op is deregistered. Immediately after registering, this listener gets called
@@ -183,14 +176,4 @@ inline OpKernel Dispatcher::lookup(const OperatorHandle& op, TensorTypeId dispat
   return op.operatorIterator_->op.lookupKernel(dispatchKey);
 }
 
-template<class Result, class... Args>
-inline Result Dispatcher::callUnboxedAutogradKernel(const OperatorHandle& op, Args... args) const {
-  void* unboxed_autograd_kernel = op.operatorIterator_->op.lookupUnboxedAutogradKernel();
-  TORCH_CHECK(nullptr != unboxed_autograd_kernel, "Tried to call Dispatcher::callUnboxedAutogradKernel() for operator ", toString(op.schema()), " that doesn't have an autograd kernel.");
-
-  using OpSignature = Result (Args...);
-  OpSignature* kernel = reinterpret_cast<OpSignature*>(unboxed_autograd_kernel);
-  return (*kernel)(std::forward<Args>(args)...);
-}
-
 } // namespace c10
diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.cpp b/aten/src/ATen/core/dispatch/OperatorEntry.cpp
@@ -95,41 +95,6 @@ void OperatorEntry::deregisterCatchallKernel_(std::list<DispatchTableEntry>::ite
   updateCatchallDispatchTable_();
 }
 
-RegistrationHandleRAII OperatorEntry::registerUnboxedAutogradKernel(void* kernel_func) {
-  std::unique_lock<std::mutex> lock(unboxedAutogradKernelsMutex_);
-
-  TORCH_INTERNAL_ASSERT(kernel_func != nullptr);
-
-  unboxedAutogradKernels_.push_front(kernel_func);
-  std::list<void*>::iterator inserted = unboxedAutogradKernels_.begin();
-
-  updateCurrentUnboxedAutogradKernel_();
-
-  return RegistrationHandleRAII([this, inserted] {
-    // list iterators stay valid even if the list changes,
-    // so we can use the iterator to deregister the kernel from the list
-    deregisterUnboxedAutogradKernel_(inserted);
-  });
-}
-
-void OperatorEntry::deregisterUnboxedAutogradKernel_(std::list<void*>::iterator kernel) {
-  std::unique_lock<std::mutex> lock(unboxedAutogradKernelsMutex_);
-
-  unboxedAutogradKernels_.erase(kernel);
-
-  updateCurrentUnboxedAutogradKernel_();
-}
-
-void OperatorEntry::updateCurrentUnboxedAutogradKernel_() {
-  // precondition: unboxedAutogradKernelsMutex_ is locked
-
-  if (unboxedAutogradKernels_.empty()) {
-    currentUnboxedAutogradKernel_ = nullptr;
-  } else {
-    currentUnboxedAutogradKernel_ = unboxedAutogradKernels_.front();
-  }
-}
-
 void OperatorEntry::updateDispatchTable_(TensorTypeId dispatch_key) {
   // precondition: kernelsMutex_ is locked
 
diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.h b/aten/src/ATen/core/dispatch/OperatorEntry.h
@@ -95,25 +95,18 @@ class OperatorEntry final {
     });
   }
 
-  void* lookupUnboxedAutogradKernel() const {
-    return currentUnboxedAutogradKernel_;
-  }
-
   void prepareForDeregistration();
 
   RegistrationHandleRAII registerKernel(TensorTypeId dispatch_key, DispatchTableEntry kernel);
   RegistrationHandleRAII registerCatchallKernel(DispatchTableEntry kernel);
 
-  RegistrationHandleRAII registerUnboxedAutogradKernel(void* kernel_func);
-
   const OperatorOptions& options() {
     return options_;
   }
 
 private:
   void deregisterKernel_(TensorTypeId dispatch_key, std::list<DispatchTableEntry>::iterator kernel);
   void deregisterCatchallKernel_(std::list<DispatchTableEntry>::iterator kernel);
-  void deregisterUnboxedAutogradKernel_(std::list<void*>::iterator kernel);
 
   FunctionSchema schema_;
 
@@ -155,33 +148,15 @@ class OperatorEntry final {
   ska::flat_hash_map<TensorTypeId, std::list<DispatchTableEntry>> kernels_;
   std::list<DispatchTableEntry> catchAllKernels_;
 
-  // unboxedAutogradKernels_ stores all autograd kernels registered for this op.
-  // An autograd kernel has the same signature as the main op kernel and
-  // internally re-dispatches to call the actual kernel.
-  // Autograd kernels are unboxed currently. We are planning to move this
-  // towards a system where ops register autograd wrappers (i.e. functions that
-  // do some wrapping code and get a pointer to the actual kernel) instead of
-  // autograd functions.
-  // This is a list because, similar to kernels_, multiple libraries could
-  // be loaded that register autograd kernels for the same op. The list is
-  // ordered by registration time descendingly, i.e. newer registrations are
-  // before older registrations and the list head is the autograd kernel
-  // which is currently used.
-  // See the comment for kernels_ above for an explanation for why we do this.
-  std::list<void*> unboxedAutogradKernels_;
-  std::atomic<void*> currentUnboxedAutogradKernel_;
-
   // Some metadata about the operator
   OperatorOptions options_;
 
   std::mutex kernelsMutex_; // protects kernels_
-  std::mutex unboxedAutogradKernelsMutex_; // protects unboxedAutogradKernels_
 
   // This function re-establishes the invariant that dispatchTable
   // contains the front element from the kernels list for a given dispatch key.
   void updateDispatchTable_(TensorTypeId dispatch_key);
   void updateCatchallDispatchTable_();
-  void updateCurrentUnboxedAutogradKernel_();
 };
 
 }
diff --git a/aten/src/ATen/core/op_registration/op_registration.cpp b/aten/src/ATen/core/op_registration/op_registration.cpp
@@ -12,7 +12,7 @@ static_assert(std::is_nothrow_move_assignable<c10::optional<RegistrationHandleRA
 // table deregisters it in the destructor.
 class RegisterOperators::OperatorRegistrar final {
 public:
-  explicit OperatorRegistrar(FunctionSchema&& schema, OperatorOptions&& operatorOptions, c10::optional<TensorTypeId> dispatch_key, KernelFunction* kernel, KernelCacheCreatorFunction&& cache_creator, void* unboxed_kernel, void* unboxed_autograd_kernel)
+  explicit OperatorRegistrar(FunctionSchema&& schema, OperatorOptions&& operatorOptions, c10::optional<TensorTypeId> dispatch_key, KernelFunction* kernel, KernelCacheCreatorFunction&& cache_creator, void* unboxed_kernel)
   : op_(Dispatcher::singleton().registerSchema(std::move(schema), std::move(operatorOptions))), kernel_registration_handle_(c10::nullopt) {
     // cache creator can only be set if the kernel is also set
     TORCH_INTERNAL_ASSERT((kernel != nullptr || unboxed_kernel != nullptr) || !static_cast<bool>(cache_creator));
@@ -24,10 +24,6 @@ class RegisterOperators::OperatorRegistrar final {
         kernel_registration_handle_ = Dispatcher::singleton().registerCatchallKernel(op_.opHandle(), kernel, std::move(cache_creator), unboxed_kernel);
       }
     }
-
-    if (unboxed_autograd_kernel != nullptr) {
-      unboxed_autograd_kernel_registration_handle_ = Dispatcher::singleton().registerUnboxedAutogradKernel(op_.opHandle(), unboxed_autograd_kernel);
-    }
   }
 
   OperatorRegistrar(OperatorRegistrar&& rhs) noexcept = default;
@@ -40,7 +36,6 @@ class RegisterOperators::OperatorRegistrar final {
 private:
   c10::SchemaRegistrationHandleRAII op_;
   c10::optional<RegistrationHandleRAII> kernel_registration_handle_;
-  c10::optional<RegistrationHandleRAII> unboxed_autograd_kernel_registration_handle_;
 };
 
 void RegisterOperators::checkSchemaAndRegisterOp_(Options&& options) {
@@ -150,10 +145,10 @@ void RegisterOperators::registerOp_(Options&& options) {
   auto operatorOptions = makeOperatorOptions_(options);
 
   if (0 == options.kernels.size()) {
-    registerSchemaOnly_(std::move(schema), std::move(operatorOptions), options.unboxedAutogradKernel_);
+    registerSchemaOnly_(std::move(schema), std::move(operatorOptions));
   } else {
     for (auto& kernel : options.kernels) {
-      registerSchemaAndKernel_(schema, std::move(kernel), std::move(operatorOptions), options.unboxedAutogradKernel_);
+      registerSchemaAndKernel_(schema, std::move(kernel), std::move(operatorOptions));
     }
   }
 
@@ -168,14 +163,14 @@ OperatorOptions RegisterOperators::makeOperatorOptions_(const RegisterOperators:
   return result;
 }
 
-void RegisterOperators::registerSchemaAndKernel_(FunctionSchema schema, Options::KernelRegistrationConfig&& kernel, OperatorOptions&& operatorOptions, void* unboxedAutogradKernel) {
+void RegisterOperators::registerSchemaAndKernel_(FunctionSchema schema, Options::KernelRegistrationConfig&& kernel, OperatorOptions&& operatorOptions) {
   TORCH_INTERNAL_ASSERT((kernel.kernel_func != nullptr || kernel.unboxed_kernel_func != nullptr), "Kernel must be set");
 
-  registrars_.emplace_back(std::move(schema), std::move(operatorOptions), kernel.dispatch_key, kernel.kernel_func, std::move(kernel.cache_creator_func), kernel.unboxed_kernel_func, unboxedAutogradKernel);
+  registrars_.emplace_back(std::move(schema), std::move(operatorOptions), kernel.dispatch_key, kernel.kernel_func, std::move(kernel.cache_creator_func), kernel.unboxed_kernel_func);
 }
 
-void RegisterOperators::registerSchemaOnly_(FunctionSchema&& schema, OperatorOptions&& operatorOptions, void* unboxedAutogradKernel) {
-  registrars_.emplace_back(std::move(schema), std::move(operatorOptions), c10::nullopt, nullptr, nullptr, nullptr, unboxedAutogradKernel);
+void RegisterOperators::registerSchemaOnly_(FunctionSchema&& schema, OperatorOptions&& operatorOptions) {
+  registrars_.emplace_back(std::move(schema), std::move(operatorOptions), c10::nullopt, nullptr, nullptr, nullptr);
 }
 
 RegisterOperators::RegisterOperators() = default;
diff --git a/aten/src/ATen/core/op_registration/op_registration.h b/aten/src/ATen/core/op_registration/op_registration.h
@@ -96,7 +96,6 @@ class CAFFE2_API RegisterOperators final {
       TORCH_CHECK(!legacyATenSchema_.has_value(), "Tried to register operator ", schemaOrName," but specified schema multiple times. You can only specify the schema once per operator registration.");
 
       if (Options::op_is_still_on_aten_dispatcher_(schemaOrName.c_str())) {
-        TORCH_CHECK(unboxedAutogradKernel_ == nullptr, "For legacy aten ops, the schema() call must happen before any kernel() calls. Operator was ", schemaOrName);
         TORCH_CHECK(kernels.size() == 0, "For legacy aten ops, the schema() call must happen before any kernel() calls. Operator was ", schemaOrName);
         legacyATenSchema_ = schemaOrName;
       } else {
@@ -353,24 +352,6 @@ class CAFFE2_API RegisterOperators final {
       return std::move(*this);
     }
 
-    template<class FuncType>
-    Options&& impl_unboxedAutogradKernel(FuncType* kernel) && {
-      static_assert(guts::is_function_type<FuncType>::value, "Wrong argument type for impl_unboxedAutogradKernel");
-
-      // TODO Infer and check schema
-      TORCH_CHECK(kernel != nullptr, "Kernel function pointer cannot be nullptr");
-      TORCH_CHECK(unboxedAutogradKernel_ == nullptr, "You can only call impl_unboxedAutogradKernel() once per operator registration.");
-      if (legacyATenSchema_.has_value()) {
-        // TODO Remove this once all ops are moved to c10.
-        TORCH_INTERNAL_ASSERT(!schemaOrName_.has_value());
-        at::globalATenDispatch().registerOp<FuncType>(TensorTypeId::VariableTensorId, legacyATenSchema_->c_str(), kernel);
-        return std::move(*this);
-      } else {
-        unboxedAutogradKernel_ = reinterpret_cast<void*>(kernel);
-        return std::move(*this);
-      }
-    }
-
   private:
     static c10::OperatorName parse_operator_name_(const char* schema) {
       // TODO Remove this function once all aten ops are on c10
@@ -474,7 +455,6 @@ class CAFFE2_API RegisterOperators final {
 
     std::vector<KernelRegistrationConfig> kernels;
     optional<AliasAnalysisKind> aliasAnalysisKind_;
-    void* unboxedAutogradKernel_; // can be nullptr, not all kernels have this
     friend class RegisterOperators;
   };
 
@@ -599,8 +579,8 @@ class CAFFE2_API RegisterOperators final {
   static c10::FunctionSchema inferSchemaFromKernels_(const OperatorName& opNameStr, const Options& options);
   void checkNoDuplicateKernels_(const Options& options);
   void registerOp_(Options&& options);
-  void registerSchemaAndKernel_(FunctionSchema schema, Options::KernelRegistrationConfig&& config, OperatorOptions&& options, void* unboxedAutogradKernel);
-  void registerSchemaOnly_(FunctionSchema&& schema, OperatorOptions&& options, void* unboxedAutogradKernel);
+  void registerSchemaAndKernel_(FunctionSchema schema, Options::KernelRegistrationConfig&& config, OperatorOptions&& options);
+  void registerSchemaOnly_(FunctionSchema&& schema, OperatorOptions&& options);
   static OperatorOptions makeOperatorOptions_(const Options& options);
 
   class OperatorRegistrar;
diff --git a/aten/src/ATen/core/op_registration/op_registration_test.cpp b/aten/src/ATen/core/op_registration/op_registration_test.cpp
@@ -19,6 +19,7 @@ using c10::RegisterOperators;
 using c10::OperatorKernel;
 using c10::Dispatcher;
 using c10::IValue;
+using c10::TensorTypeId;
 using at::Tensor;
 
 namespace {
@@ -619,37 +620,42 @@ TEST(OperatorRegistrationTest, whenRegisteringMismatchingKernelsInSameOpCall_the
   }, "Tried to register kernels for same operator that infer a different function schema");
 }
 
-int64_t increment_kernel(int64_t a) {
-  return a + 1;
+bool called_autograd = false;
+bool called_catchall = false;
+
+void catchall_kernel(Tensor a) {
+  called_catchall = true;
 }
 
-int64_t decrement_kernel(int64_t a) {
-  return a - 1;
+void autograd_kernel(Tensor a) {
+  called_autograd = true;
 }
 
 TEST(OperatorRegistrationTest, whenRegisteringAutogradKernel_thenCanCallAutogradKernel) {
-  auto registrar = c10::RegisterOperators().op("_test::dummy(int dummy) -> int", c10::RegisterOperators::options()
-    .impl_unboxedAutogradKernel(&increment_kernel));
+  auto registrar = c10::RegisterOperators().op("_test::dummy(Tensor dummy) -> ()", c10::RegisterOperators::options()
+    .impl_unboxedOnlyKernel<decltype(autograd_kernel), &autograd_kernel>(TensorTypeId::VariableTensorId));
 
   auto op = Dispatcher::singleton().findSchema({"_test::dummy", ""});
   ASSERT_TRUE(op.has_value());
-  int64_t result = c10::Dispatcher::singleton().callUnboxedAutogradKernel<int64_t, int64_t>(*op, 4);
-  EXPECT_EQ(5, result);
+
+  called_autograd = false;
+  c10::Dispatcher::singleton().lookup(*op, TensorTypeId::VariableTensorId).callUnboxed<void, Tensor>(dummyTensor(TensorTypeId::VariableTensorId));
+  EXPECT_TRUE(called_autograd);
 }
 
 TEST(OperatorRegistrationTest, whenRegisteringAutogradKernelWithRegularKernel_thenCanCallAutogradKernel) {
-  auto registrar = c10::RegisterOperators().op("_test::dummy(int dummy) -> int", c10::RegisterOperators::options()
-    .catchAllKernel<decltype(decrement_kernel), &decrement_kernel>()
-    .impl_unboxedAutogradKernel(&increment_kernel));
+  auto registrar = c10::RegisterOperators().op("_test::dummy(Tensor dummy) -> ()", c10::RegisterOperators::options()
+    .impl_unboxedOnlyCatchAllKernel<decltype(catchall_kernel), &catchall_kernel>()
+    .impl_unboxedOnlyKernel<decltype(autograd_kernel), &autograd_kernel>(TensorTypeId::VariableTensorId));
 
   auto op = Dispatcher::singleton().findSchema({"_test::dummy", ""});
   ASSERT_TRUE(op.has_value());
-  int64_t result = c10::Dispatcher::singleton().callUnboxedAutogradKernel<int64_t, int64_t>(*op, 4);
-  EXPECT_EQ(5, result);
-}
 
-// TODO Test cases that adding multiple autograd kernels, removing some, and so on works
-//      (similar to test cases above for regular kernels "_whenNewerAndThenOlderKernelDeletedAndOpCalled")
+  called_catchall = called_autograd = false;
+  c10::Dispatcher::singleton().lookup(*op, TensorTypeId::VariableTensorId).callUnboxed<void, Tensor>(dummyTensor(TensorTypeId::VariableTensorId));
+  EXPECT_FALSE(called_catchall);
+  EXPECT_TRUE(called_autograd);
+}
 
 /**
  * This is used to check that a given type works correctly when passed as input