pytorch
diff --git a/‎aten/src/ATen/cuda/detail/CUDAHooks.cpp‎
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/cuda/detail/CUDAHooks.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎binaries/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions b/‎binaries/CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎binaries/core_overhead_benchmark.cc‎
Lines changed: 58 additions & 0 deletions b/‎binaries/core_overhead_benchmark.cc‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎c10/core/TensorImpl.cpp‎
Lines changed: 6 additions & 2 deletions b/‎c10/core/TensorImpl.cpp‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎c10/macros/Macros.h‎
Lines changed: 14 additions & 3 deletions b/‎c10/macros/Macros.h‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎c10/util/Logging.cpp‎
Lines changed: 43 additions & 8 deletions b/‎c10/util/Logging.cpp‎
Lines changed: 43 additions & 8 deletions
diff --git a/‎c10/util/Logging.h‎
Lines changed: 26 additions & 0 deletions b/‎c10/util/Logging.h‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎c10/util/Registry.h‎
Lines changed: 0 additions & 12 deletions b/‎c10/util/Registry.h‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎c10/util/typeid.h‎
Lines changed: 17 additions & 17 deletions b/‎c10/util/typeid.h‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎caffe2/core/context_gpu.cu‎
Lines changed: 1 addition & 0 deletions b/‎caffe2/core/context_gpu.cu‎
Lines changed: 1 addition & 0 deletions
@@ -39,6 +39,7 @@ namespace detail {
 // compilation unit (alt is to have another method in hooks, but
 // let's not if we don't need to!)
 std::unique_ptr<THCState, void (*)(THCState*)> CUDAHooks::initCUDA() const {
+  C10_LOG_API_USAGE_ONCE("aten.init.cuda");
   THCState* thc_state = THCState_alloc();
 
   THCudaInit(thc_state);
 
@@ -16,6 +16,12 @@ caffe2_binary_target("split_db.cc")
 caffe2_binary_target("db_throughput.cc")
 
 
+if (BUILD_TEST AND NOT ANDROID)
+  # Core overhead benchmark
+  caffe2_binary_target("core_overhead_benchmark.cc")
+  target_link_libraries(core_overhead_benchmark benchmark)
+endif()
+
 if (USE_CUDA)
   caffe2_binary_target("inspect_gpu.cc")
   target_link_libraries(inspect_gpu ${CUDA_LIBRARIES})
 
@@ -0,0 +1,58 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "benchmark/benchmark.h"
+
+#include <c10/util/Logging.h>
+
+#if defined(__GNUC__)
+#define NOINLINE __attribute__((noinline))
+#else
+#define NOINLINE
+#endif
+
+NOINLINE int call(int id) {
+  C10_LOG_API_USAGE_ONCE("bla");
+  return id%2;
+}
+
+NOINLINE int call_no_logging(int id) {
+  return id%2;
+}
+
+static void BM_APILogging(benchmark::State& state) {
+  int id = 0;
+  while (state.KeepRunning()) {
+    for (int i = 0; i < 1000; ++i) {
+      id += 1 + call(id);
+    }
+  }
+  benchmark::DoNotOptimize(id);
+}
+BENCHMARK(BM_APILogging);
+
+static void BM_NoAPILogging(benchmark::State& state) {
+  int id = 0;
+  while (state.KeepRunning()) {
+    for (int i = 0; i < 1000; ++i) {
+      id += 1 + call_no_logging(id);
+    }
+  }
+  benchmark::DoNotOptimize(id);
+}
+BENCHMARK(BM_NoAPILogging);
+
+BENCHMARK_MAIN();
@@ -48,8 +48,12 @@ TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, const caffe2::Ty
       data_type_(data_type),
       device_opt_(device_opt),
       type_id_(type_id) {
-  AT_ASSERT(type_id == UndefinedTensorId() || data_type.id() ==  caffe2::TypeIdentifier::uninitialized() ||
-            device_opt_.has_value());
+  if (type_id != UndefinedTensorId()) {
+    AT_ASSERT(data_type.id() ==  caffe2::TypeIdentifier::uninitialized() ||
+              device_opt_.has_value());
+    // UndefinedTensorImpl is a singleton, so we skip logging it
+    C10_LOG_API_USAGE_ONCE("tensor.create");
+  }
   // we would also like to check that non-cpu devices have an index, but some Caffe2 operators create
   // Storages with default devices.
   strides_.push_back(1);
 
@@ -29,10 +29,21 @@
   classname(const classname&) = delete;        \
   classname& operator=(const classname&) = delete
 
-#define CONCAT_IMPL(x, y) x##y
-#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y)
+#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
+#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
+
+#define C10_MACRO_EXPAND(args) args
+
+/**
+ * C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
+ * str and ending with a number that varies with the line.
+ */
+#ifdef __COUNTER__
+#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
+#else
+#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
+#endif
 
-#define MACRO_EXPAND(args) args
 
 /// C10_NODISCARD - Warn if a type or return value is discarded.
 
 
@@ -1,8 +1,9 @@
 #include "c10/util/Logging.h"
-#include "c10/util/Flags.h"
 #include "c10/util/Backtrace.h"
+#include "c10/util/Flags.h"
 
 #include <algorithm>
+#include <cstdlib>
 #include <cstring>
 #include <iostream>
 #include <numeric>
@@ -24,7 +25,9 @@ namespace enforce_detail {
 
 namespace {
 std::function<string(void)>* GetFetchStackTrace() {
-  static std::function<string(void)> func = []() { return get_backtrace(/*frames_to_skip=*/ 1); };
+  static std::function<string(void)> func = []() {
+    return get_backtrace(/*frames_to_skip=*/1);
+  };
   return &func;
 };
 } // namespace
@@ -49,12 +52,44 @@ void ThrowEnforceNotMet(
 // PyTorch-style error message
 // (This must be defined here for access to GetFetchStackTrace)
 Error::Error(SourceLocation source_location, const std::string& msg)
-    : Error(
-          msg,
-          str(" (",
-              source_location,
-              ")\n",
-              (*GetFetchStackTrace())())) {}
+    : Error(msg, str(" (", source_location, ")\n", (*GetFetchStackTrace())())) {
+}
+
+using APIUsageLoggerType = std::function<void(const std::string&)>;
+
+namespace {
+bool IsAPIUsageDebugMode() {
+  const char* val = getenv("PYTORCH_API_USAGE_STDERR");
+  return val && *val; // any non-empty value
+}
+
+void APIUsageDebug(const string& event) {
+  // use stderr to avoid messing with glog
+  std::cerr << "PYTORCH_API_USAGE " << event << std::endl;
+}
+
+APIUsageLoggerType* GetAPIUsageLogger() {
+  static APIUsageLoggerType func =
+      IsAPIUsageDebugMode() ? &APIUsageDebug : [](const string&) {};
+  return &func;
+};
+} // namespace
+
+void SetAPIUsageLogger(std::function<void(const std::string&)> logger) {
+  TORCH_CHECK(logger);
+  *GetAPIUsageLogger() = logger;
+}
+
+void LogAPIUsage(const std::string& event) {
+  (*GetAPIUsageLogger())(event);
+}
+
+namespace detail {
+bool LogAPIUsageFakeReturn(const std::string& event) {
+  (*GetAPIUsageLogger())(event);
+  return true;
+}
+} // namespace detail
 
 } // namespace c10
 
 
@@ -257,6 +257,32 @@ BINARY_COMP_HELPER(LessEquals, <=)
 #define CAFFE_ENFORCE_GT_WITH_CALLER(x, y, ...) \
   CAFFE_ENFORCE_THAT_IMPL_WITH_CALLER(          \
       Greater((x), (y)), #x " > " #y, __VA_ARGS__)
+
+/**
+ * Very lightweight logging for the first time API usage. It's beneficial for
+ * tracking of individual functionality usage in larger applications.
+ *
+ * In order to ensure light-weightness of logging, we utilize static variable
+ * trick - LogAPIUsage will be invoked only once and further invocations will
+ * just do an atomic check.
+ *
+ * Example:
+ *   // Logs caller info with an arbitrary text event, if there is a usage.
+ *   C10_LOG_API_USAGE_ONCE("my_api");
+ */
+#define C10_LOG_API_USAGE_ONCE(...)             \
+  C10_UNUSED static bool C10_ANONYMOUS_VARIABLE(logFlag) = \
+      ::c10::detail::LogAPIUsageFakeReturn(__VA_ARGS__);
+
+// API usage logging capabilities
+C10_API void SetAPIUsageLogger(std::function<void(const std::string&)> logger);
+C10_API void LogAPIUsage(const std::string& context);
+
+namespace detail {
+// Return value is needed to do the static variable initialization trick
+C10_API bool LogAPIUsageFakeReturn(const std::string& context);
+}
+
 } // namespace c10
 
 #endif // C10_UTIL_LOGGING_H_
@@ -185,18 +185,6 @@ class Registerer {
   }
 };
 
-/**
- * C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
- * str and ending with a number that varies with the line.
- */
-#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
-#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
-#ifdef __COUNTER__
-#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
-#else
-#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
-#endif
-
 /**
  * C10_DECLARE_TYPED_REGISTRY is a macro that expands to a function
  * declaration, as well as creating a convenient typename for its corresponding
 
@@ -23,10 +23,10 @@
 #include <c10/util/Exception.h>
 #include <c10/util/Half.h>
 #include <c10/util/IdWrapper.h>
+#include <c10/util/Type.h>
+#include <c10/util/qint32.h>
 #include <c10/util/qint8.h>
 #include <c10/util/quint8.h>
-#include <c10/util/qint32.h>
-#include <c10/util/Type.h>
 
 /*
  * TypeIdentifier is a small type containing an id.
@@ -498,15 +498,15 @@ inline std::ostream& operator<<(
 #define EXPORT_IF_NOT_GCC
 #endif
 
-#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter)   \
-  namespace detail {                                                 \
-  const TypeMetaData MACRO_CONCAT(_typeMetaDataInstance_, Counter) = \
-      _makeTypeMetaDataInstance<T>(_typeName<T>(#T));                \
-  }                                                                  \
-  template <>                                                        \
-  EXPORT_IF_NOT_GCC const detail::TypeMetaData*                      \
-  TypeMeta::_typeMetaDataInstance<T>() noexcept {                    \
-    return &MACRO_CONCAT(detail::_typeMetaDataInstance_, Counter);   \
+#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter)      \
+  namespace detail {                                                    \
+  const TypeMetaData C10_CONCATENATE(_typeMetaDataInstance_, Counter) = \
+      _makeTypeMetaDataInstance<T>(_typeName<T>(#T));                   \
+  }                                                                     \
+  template <>                                                           \
+  EXPORT_IF_NOT_GCC const detail::TypeMetaData*                         \
+  TypeMeta::_typeMetaDataInstance<T>() noexcept {                       \
+    return &C10_CONCATENATE(detail::_typeMetaDataInstance_, Counter);   \
   }
 #define CAFFE_KNOWN_TYPE(T)                                               \
   template <>                                                             \
@@ -529,20 +529,20 @@ inline std::ostream& operator<<(
     return TypeIdentifier(PreallocatedId);                       \
   }                                                              \
   namespace detail {                                             \
-  C10_API extern const TypeMetaData MACRO_CONCAT(                \
+  C10_API extern const TypeMetaData C10_CONCATENATE(             \
       _typeMetaDataInstance_preallocated_,                       \
       PreallocatedId);                                           \
   }
 #define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T)         \
   namespace detail {                                                    \
-  C10_EXPORT const TypeMetaData MACRO_CONCAT(                           \
+  C10_EXPORT const TypeMetaData C10_CONCATENATE(                        \
       _typeMetaDataInstance_preallocated_,                              \
       PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
   }                                                                     \
   template <>                                                           \
   C10_EXPORT const detail::TypeMetaData*                                \
   TypeMeta::_typeMetaDataInstance<T>() noexcept {                       \
-    return &MACRO_CONCAT(                                               \
+    return &C10_CONCATENATE(                                            \
         detail::_typeMetaDataInstance_preallocated_, PreallocatedId);   \
   }
 #else // _MSC_VER
@@ -552,19 +552,19 @@ inline std::ostream& operator<<(
     return TypeIdentifier(PreallocatedId);                            \
   }                                                                   \
   namespace detail {                                                  \
-  C10_EXPORT extern const TypeMetaData MACRO_CONCAT(                  \
+  C10_EXPORT extern const TypeMetaData C10_CONCATENATE(               \
       _typeMetaDataInstance_preallocated_,                            \
       PreallocatedId);                                                \
   }                                                                   \
   template <>                                                         \
   inline const detail::TypeMetaData*                                  \
   TypeMeta::_typeMetaDataInstance<T>() noexcept {                     \
-    return &MACRO_CONCAT(                                             \
+    return &C10_CONCATENATE(                                          \
         detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \
   }
 #define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T)         \
   namespace detail {                                                    \
-  const TypeMetaData MACRO_CONCAT(                                      \
+  const TypeMetaData C10_CONCATENATE(                                   \
       _typeMetaDataInstance_preallocated_,                              \
       PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
   }
 
@@ -201,6 +201,7 @@ static void Caffe2InitializeCuda() {
     VLOG(1) << "No cuda gpu present. Skipping.";
     return;
   }
+  C10_LOG_API_USAGE_ONCE("caffe2.init.cuda");
   // Check if the number of GPUs matches the expected compile-time max number
   // of GPUs.
   CAFFE_ENFORCE_LE(
Original file line number	Diff line number	Diff line change
`@@ -201,6 +201,7 @@ static void Caffe2InitializeCuda() {`
`201`	`201`	`VLOG(1) << "No cuda gpu present. Skipping.";`
`202`	`202`	`return;`
`203`	`203`	`}`
	`204`	`+ C10_LOG_API_USAGE_ONCE("caffe2.init.cuda");`
`204`	`205`	`// Check if the number of GPUs matches the expected compile-time max number`
`205`	`206`	`// of GPUs.`
`206`	`207`	`CAFFE_ENFORCE_LE(`