Skip to content

Commit c25e337

Browse files
Dmytro Dzhulgakovfacebook-github-bot
authored andcommitted
Lightweight at-most-once logging for API usage (#20745)
Summary: Resubmit #20698 which got messed up. Idea is that when PyTorch is used in a custom build environment (e.g. Facebook), it's useful to track usage of various APIs centrally. This PR introduces a simple very lightweight mechanism to do so - only first invocation of a trigger point would be logged. This is significantly more lightweight than #18235 and thus we can allow to put logging in e.g. TensorImpl. Also adds an initial list of trigger points. Trigger points are added in such a way that no static initialization triggers them, i.e. just linking with libtorch.so will not cause any logging. Further suggestions of what to log are welcomed. Pull Request resolved: #20745 Differential Revision: D15429196 Pulled By: dzhulgakov fbshipit-source-id: a5e41a709a65b7ebccc6b95f93854e583cf20aca
1 parent 8cde4c4 commit c25e337

File tree

29 files changed

+247
-45
lines changed

29 files changed

+247
-45
lines changed

aten/src/ATen/cuda/detail/CUDAHooks.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ namespace detail {
3939
// compilation unit (alt is to have another method in hooks, but
4040
// let's not if we don't need to!)
4141
std::unique_ptr<THCState, void (*)(THCState*)> CUDAHooks::initCUDA() const {
42+
C10_LOG_API_USAGE_ONCE("aten.init.cuda");
4243
THCState* thc_state = THCState_alloc();
4344

4445
THCudaInit(thc_state);

binaries/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ caffe2_binary_target("split_db.cc")
1616
caffe2_binary_target("db_throughput.cc")
1717

1818

19+
if (BUILD_TEST AND NOT ANDROID)
20+
# Core overhead benchmark
21+
caffe2_binary_target("core_overhead_benchmark.cc")
22+
target_link_libraries(core_overhead_benchmark benchmark)
23+
endif()
24+
1925
if (USE_CUDA)
2026
caffe2_binary_target("inspect_gpu.cc")
2127
target_link_libraries(inspect_gpu ${CUDA_LIBRARIES})
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/**
2+
* Copyright (c) 2016-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "benchmark/benchmark.h"
18+
19+
#include <c10/util/Logging.h>
20+
21+
#if defined(__GNUC__)
22+
#define NOINLINE __attribute__((noinline))
23+
#else
24+
#define NOINLINE
25+
#endif
26+
27+
NOINLINE int call(int id) {
28+
C10_LOG_API_USAGE_ONCE("bla");
29+
return id%2;
30+
}
31+
32+
NOINLINE int call_no_logging(int id) {
33+
return id%2;
34+
}
35+
36+
static void BM_APILogging(benchmark::State& state) {
37+
int id = 0;
38+
while (state.KeepRunning()) {
39+
for (int i = 0; i < 1000; ++i) {
40+
id += 1 + call(id);
41+
}
42+
}
43+
benchmark::DoNotOptimize(id);
44+
}
45+
BENCHMARK(BM_APILogging);
46+
47+
static void BM_NoAPILogging(benchmark::State& state) {
48+
int id = 0;
49+
while (state.KeepRunning()) {
50+
for (int i = 0; i < 1000; ++i) {
51+
id += 1 + call_no_logging(id);
52+
}
53+
}
54+
benchmark::DoNotOptimize(id);
55+
}
56+
BENCHMARK(BM_NoAPILogging);
57+
58+
BENCHMARK_MAIN();

c10/core/TensorImpl.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,12 @@ TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, const caffe2::Ty
4848
data_type_(data_type),
4949
device_opt_(device_opt),
5050
type_id_(type_id) {
51-
AT_ASSERT(type_id == UndefinedTensorId() || data_type.id() == caffe2::TypeIdentifier::uninitialized() ||
52-
device_opt_.has_value());
51+
if (type_id != UndefinedTensorId()) {
52+
AT_ASSERT(data_type.id() == caffe2::TypeIdentifier::uninitialized() ||
53+
device_opt_.has_value());
54+
// UndefinedTensorImpl is a singleton, so we skip logging it
55+
C10_LOG_API_USAGE_ONCE("tensor.create");
56+
}
5357
// we would also like to check that non-cpu devices have an index, but some Caffe2 operators create
5458
// Storages with default devices.
5559
strides_.push_back(1);

c10/macros/Macros.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,21 @@
2929
classname(const classname&) = delete; \
3030
classname& operator=(const classname&) = delete
3131

32-
#define CONCAT_IMPL(x, y) x##y
33-
#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y)
32+
#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
33+
#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
34+
35+
#define C10_MACRO_EXPAND(args) args
36+
37+
/**
38+
* C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
39+
* str and ending with a number that varies with the line.
40+
*/
41+
#ifdef __COUNTER__
42+
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
43+
#else
44+
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
45+
#endif
3446

35-
#define MACRO_EXPAND(args) args
3647

3748
/// C10_NODISCARD - Warn if a type or return value is discarded.
3849

c10/util/Logging.cpp

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#include "c10/util/Logging.h"
2-
#include "c10/util/Flags.h"
32
#include "c10/util/Backtrace.h"
3+
#include "c10/util/Flags.h"
44

55
#include <algorithm>
6+
#include <cstdlib>
67
#include <cstring>
78
#include <iostream>
89
#include <numeric>
@@ -24,7 +25,9 @@ namespace enforce_detail {
2425

2526
namespace {
2627
std::function<string(void)>* GetFetchStackTrace() {
27-
static std::function<string(void)> func = []() { return get_backtrace(/*frames_to_skip=*/ 1); };
28+
static std::function<string(void)> func = []() {
29+
return get_backtrace(/*frames_to_skip=*/1);
30+
};
2831
return &func;
2932
};
3033
} // namespace
@@ -49,12 +52,44 @@ void ThrowEnforceNotMet(
4952
// PyTorch-style error message
5053
// (This must be defined here for access to GetFetchStackTrace)
5154
Error::Error(SourceLocation source_location, const std::string& msg)
52-
: Error(
53-
msg,
54-
str(" (",
55-
source_location,
56-
")\n",
57-
(*GetFetchStackTrace())())) {}
55+
: Error(msg, str(" (", source_location, ")\n", (*GetFetchStackTrace())())) {
56+
}
57+
58+
using APIUsageLoggerType = std::function<void(const std::string&)>;
59+
60+
namespace {
61+
bool IsAPIUsageDebugMode() {
62+
const char* val = getenv("PYTORCH_API_USAGE_STDERR");
63+
return val && *val; // any non-empty value
64+
}
65+
66+
void APIUsageDebug(const string& event) {
67+
// use stderr to avoid messing with glog
68+
std::cerr << "PYTORCH_API_USAGE " << event << std::endl;
69+
}
70+
71+
APIUsageLoggerType* GetAPIUsageLogger() {
72+
static APIUsageLoggerType func =
73+
IsAPIUsageDebugMode() ? &APIUsageDebug : [](const string&) {};
74+
return &func;
75+
};
76+
} // namespace
77+
78+
void SetAPIUsageLogger(std::function<void(const std::string&)> logger) {
79+
TORCH_CHECK(logger);
80+
*GetAPIUsageLogger() = logger;
81+
}
82+
83+
void LogAPIUsage(const std::string& event) {
84+
(*GetAPIUsageLogger())(event);
85+
}
86+
87+
namespace detail {
88+
bool LogAPIUsageFakeReturn(const std::string& event) {
89+
(*GetAPIUsageLogger())(event);
90+
return true;
91+
}
92+
} // namespace detail
5893

5994
} // namespace c10
6095

c10/util/Logging.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,32 @@ BINARY_COMP_HELPER(LessEquals, <=)
257257
#define CAFFE_ENFORCE_GT_WITH_CALLER(x, y, ...) \
258258
CAFFE_ENFORCE_THAT_IMPL_WITH_CALLER( \
259259
Greater((x), (y)), #x " > " #y, __VA_ARGS__)
260+
261+
/**
262+
* Very lightweight logging for the first time API usage. It's beneficial for
263+
* tracking of individual functionality usage in larger applications.
264+
*
265+
* In order to ensure light-weightness of logging, we utilize static variable
266+
* trick - LogAPIUsage will be invoked only once and further invocations will
267+
* just do an atomic check.
268+
*
269+
* Example:
270+
* // Logs caller info with an arbitrary text event, if there is a usage.
271+
* C10_LOG_API_USAGE_ONCE("my_api");
272+
*/
273+
#define C10_LOG_API_USAGE_ONCE(...) \
274+
C10_UNUSED static bool C10_ANONYMOUS_VARIABLE(logFlag) = \
275+
::c10::detail::LogAPIUsageFakeReturn(__VA_ARGS__);
276+
277+
// API usage logging capabilities
278+
C10_API void SetAPIUsageLogger(std::function<void(const std::string&)> logger);
279+
C10_API void LogAPIUsage(const std::string& context);
280+
281+
namespace detail {
282+
// Return value is needed to do the static variable initialization trick
283+
C10_API bool LogAPIUsageFakeReturn(const std::string& context);
284+
}
285+
260286
} // namespace c10
261287

262288
#endif // C10_UTIL_LOGGING_H_

c10/util/Registry.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -185,18 +185,6 @@ class Registerer {
185185
}
186186
};
187187

188-
/**
189-
* C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
190-
* str and ending with a number that varies with the line.
191-
*/
192-
#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
193-
#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
194-
#ifdef __COUNTER__
195-
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
196-
#else
197-
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
198-
#endif
199-
200188
/**
201189
* C10_DECLARE_TYPED_REGISTRY is a macro that expands to a function
202190
* declaration, as well as creating a convenient typename for its corresponding

c10/util/typeid.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
#include <c10/util/Exception.h>
2424
#include <c10/util/Half.h>
2525
#include <c10/util/IdWrapper.h>
26+
#include <c10/util/Type.h>
27+
#include <c10/util/qint32.h>
2628
#include <c10/util/qint8.h>
2729
#include <c10/util/quint8.h>
28-
#include <c10/util/qint32.h>
29-
#include <c10/util/Type.h>
3030

3131
/*
3232
* TypeIdentifier is a small type containing an id.
@@ -498,15 +498,15 @@ inline std::ostream& operator<<(
498498
#define EXPORT_IF_NOT_GCC
499499
#endif
500500

501-
#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \
502-
namespace detail { \
503-
const TypeMetaData MACRO_CONCAT(_typeMetaDataInstance_, Counter) = \
504-
_makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
505-
} \
506-
template <> \
507-
EXPORT_IF_NOT_GCC const detail::TypeMetaData* \
508-
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
509-
return &MACRO_CONCAT(detail::_typeMetaDataInstance_, Counter); \
501+
#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \
502+
namespace detail { \
503+
const TypeMetaData C10_CONCATENATE(_typeMetaDataInstance_, Counter) = \
504+
_makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
505+
} \
506+
template <> \
507+
EXPORT_IF_NOT_GCC const detail::TypeMetaData* \
508+
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
509+
return &C10_CONCATENATE(detail::_typeMetaDataInstance_, Counter); \
510510
}
511511
#define CAFFE_KNOWN_TYPE(T) \
512512
template <> \
@@ -529,20 +529,20 @@ inline std::ostream& operator<<(
529529
return TypeIdentifier(PreallocatedId); \
530530
} \
531531
namespace detail { \
532-
C10_API extern const TypeMetaData MACRO_CONCAT( \
532+
C10_API extern const TypeMetaData C10_CONCATENATE( \
533533
_typeMetaDataInstance_preallocated_, \
534534
PreallocatedId); \
535535
}
536536
#define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \
537537
namespace detail { \
538-
C10_EXPORT const TypeMetaData MACRO_CONCAT( \
538+
C10_EXPORT const TypeMetaData C10_CONCATENATE( \
539539
_typeMetaDataInstance_preallocated_, \
540540
PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
541541
} \
542542
template <> \
543543
C10_EXPORT const detail::TypeMetaData* \
544544
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
545-
return &MACRO_CONCAT( \
545+
return &C10_CONCATENATE( \
546546
detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \
547547
}
548548
#else // _MSC_VER
@@ -552,19 +552,19 @@ inline std::ostream& operator<<(
552552
return TypeIdentifier(PreallocatedId); \
553553
} \
554554
namespace detail { \
555-
C10_EXPORT extern const TypeMetaData MACRO_CONCAT( \
555+
C10_EXPORT extern const TypeMetaData C10_CONCATENATE( \
556556
_typeMetaDataInstance_preallocated_, \
557557
PreallocatedId); \
558558
} \
559559
template <> \
560560
inline const detail::TypeMetaData* \
561561
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
562-
return &MACRO_CONCAT( \
562+
return &C10_CONCATENATE( \
563563
detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \
564564
}
565565
#define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \
566566
namespace detail { \
567-
const TypeMetaData MACRO_CONCAT( \
567+
const TypeMetaData C10_CONCATENATE( \
568568
_typeMetaDataInstance_preallocated_, \
569569
PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
570570
}

caffe2/core/context_gpu.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ static void Caffe2InitializeCuda() {
201201
VLOG(1) << "No cuda gpu present. Skipping.";
202202
return;
203203
}
204+
C10_LOG_API_USAGE_ONCE("caffe2.init.cuda");
204205
// Check if the number of GPUs matches the expected compile-time max number
205206
// of GPUs.
206207
CAFFE_ENFORCE_LE(

0 commit comments

Comments
 (0)