Skip to content

Commit 76e6109

Browse files
JXRivertensorflower-gardener
authored andcommitted
Remove unnecessary GPU allocator wrapped in GPUcudaMallocAllocator.
GPUcudaMallocAllocator doesn't use the passed in GPU allocator. Its GetStats doesn't return the correct value. Removing both to avoid confusions. PiperOrigin-RevId: 377383677 Change-Id: I09ee5def36588d70fc3c1ffa465ce28a2f2af018
1 parent 4233932 commit 76e6109

File tree

3 files changed

+16
-19
lines changed

3 files changed

+16
-19
lines changed

tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,12 @@ limitations under the License.
2727
namespace tensorflow {
2828

2929
GPUcudaMallocAllocator::GPUcudaMallocAllocator(
30-
Allocator* allocator, PlatformDeviceId platform_device_id)
31-
: base_allocator_(allocator) {
30+
PlatformDeviceId platform_device_id) {
3231
stream_exec_ = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
3332
platform_device_id)
3433
.ValueOrDie();
3534
}
3635

37-
GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; }
38-
3936
void* GPUcudaMallocAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
4037
#ifdef GOOGLE_CUDA
4138
// allocate with cudaMalloc
@@ -80,10 +77,6 @@ void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) {
8077
#endif // GOOGLE_CUDA
8178
}
8279

83-
absl::optional<AllocatorStats> GPUcudaMallocAllocator::GetStats() {
84-
return base_allocator_->GetStats();
85-
}
86-
8780
bool GPUcudaMallocAllocator::TracksAllocationSizes() const { return false; }
8881

8982
} // namespace tensorflow

tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,17 @@ limitations under the License.
2626

2727
namespace tensorflow {
2828

29-
// An allocator that wraps a GPU allocator and adds debugging
30-
// functionality that verifies that users do not write outside their
31-
// allocated memory.
29+
// An allocator which directly uses cuMemAlloc and cuMemFree to allocate and
30+
// free memory.
3231
class GPUcudaMallocAllocator : public Allocator {
3332
public:
34-
explicit GPUcudaMallocAllocator(Allocator* allocator,
35-
PlatformDeviceId platform_device_id);
36-
~GPUcudaMallocAllocator() override;
33+
explicit GPUcudaMallocAllocator(PlatformDeviceId platform_device_id);
3734
string Name() override { return "gpu_debug"; }
3835
void* AllocateRaw(size_t alignment, size_t num_bytes) override;
3936
void DeallocateRaw(void* ptr) override;
4037
bool TracksAllocationSizes() const override;
41-
absl::optional<AllocatorStats> GetStats() override;
4238

4339
private:
44-
Allocator* base_allocator_ = nullptr; // owned
45-
4640
se::StreamExecutor* stream_exec_; // Not owned.
4741

4842
TF_DISALLOW_COPY_AND_ASSIGN(GPUcudaMallocAllocator);

tensorflow/core/common_runtime/gpu/gpu_process_state.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,15 +204,22 @@ Allocator* GPUProcessState::GetGPUAllocator(
204204
// If true, passes all allocation requests through to cudaMalloc
205205
// useful for doing memory debugging with tools like cuda-memcheck
206206
// **WARNING** probably will not work in a multi-gpu scenario
207-
gpu_allocator =
208-
new GPUcudaMallocAllocator(gpu_allocator, platform_device_id);
207+
delete gpu_bfc_allocator;
208+
delete sub_allocator;
209+
gpu_bfc_allocator = nullptr;
210+
sub_allocator = nullptr;
211+
gpu_allocator = new GPUcudaMallocAllocator(platform_device_id);
209212
} else if (UseCudaMallocAsyncAllocator()) {
210213
LOG(INFO) << "Using CUDA malloc Async allocator for GPU: "
211214
<< platform_device_id;
212215
// If true, passes all allocation requests through to cudaMallocAsync
213216
// TODO: useful for doing memory debugging with tools like
214217
// compute-sanitizer.
215218
// TODO: **WARNING** probably will not work in a multi-gpu scenario
219+
delete gpu_bfc_allocator;
220+
delete sub_allocator;
221+
gpu_bfc_allocator = nullptr;
222+
sub_allocator = nullptr;
216223
gpu_allocator =
217224
new GpuCudaMallocAsyncAllocator(platform_device_id, total_bytes);
218225
}
@@ -259,6 +266,9 @@ SharedCounter* GPUProcessState::GPUAllocatorCounter(TfDeviceId tf_device_id) {
259266

260267
AllocatorParts& allocator_parts = gpu_allocators_[tf_device_id.value()];
261268
if (allocator_parts.counter.get() == nullptr) {
269+
if (allocator_parts.bfc_allocator == nullptr) {
270+
return nullptr;
271+
}
262272
SharedCounter* timing_counter = new SharedCounter;
263273
allocator_parts.bfc_allocator->SetTimingCounter(timing_counter);
264274
allocator_parts.counter.reset(timing_counter);

0 commit comments

Comments
 (0)