@@ -18,6 +18,7 @@ limitations under the License.
1818#include " tensorflow/core/common_runtime/gpu/gpu_device.h"
1919
2020#include " tensorflow/core/common_runtime/device/device_id_utils.h"
21+ #include " tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.h"
2122#include " tensorflow/core/common_runtime/gpu/gpu_init.h"
2223#include " tensorflow/core/common_runtime/gpu/gpu_process_state.h"
2324#include " tensorflow/core/lib/core/errors.h"
@@ -66,14 +67,17 @@ class GPUDeviceTest : public ::testing::Test {
6667 const string& visible_device_list = " " ,
6768 double per_process_gpu_memory_fraction = 0 , int gpu_device_count = 1 ,
6869 const std::vector<std::vector<float >>& memory_limit_mb = {},
69- const std::vector<std::vector<int32>>& priority = {}) {
70+ const std::vector<std::vector<int32>>& priority = {},
71+ const bool use_cuda_malloc_async = false ) {
7072 SessionOptions options;
7173 ConfigProto* config = &options.config ;
7274 (*config->mutable_device_count ())[" GPU" ] = gpu_device_count;
7375 GPUOptions* gpu_options = config->mutable_gpu_options ();
7476 gpu_options->set_visible_device_list (visible_device_list);
7577 gpu_options->set_per_process_gpu_memory_fraction (
7678 per_process_gpu_memory_fraction);
79+ gpu_options->mutable_experimental ()->set_use_cuda_malloc_async (
80+ use_cuda_malloc_async);
7781 for (int i = 0 ; i < memory_limit_mb.size (); ++i) {
7882 auto virtual_devices =
7983 gpu_options->mutable_experimental ()->add_virtual_devices ();
@@ -109,6 +113,33 @@ class GPUDeviceTest : public ::testing::Test {
109113 }
110114};
111115
116+ TEST_F (GPUDeviceTest, CudaMallocAsync) {
117+ SessionOptions opts = MakeSessionOptions (" 0" , 0 , 1 , {}, {},
118+ /* use_cuda_malloc_async=*/ true );
119+ std::vector<std::unique_ptr<Device>> devices;
120+ Status status;
121+ int number_instantiated =
122+ GpuCudaMallocAsyncAllocator::GetInstantiatedCountTestOnly ();
123+ { // The new scope is to trigger the destruction of the object.
124+ status = DeviceFactory::GetFactory (" GPU" )->CreateDevices (
125+ opts, kDeviceNamePrefix , &devices);
126+ EXPECT_EQ (devices.size (), 1 );
127+ Device* device = devices[0 ].get ();
128+ auto * device_info = device->tensorflow_gpu_device_info ();
129+ EXPECT_NE (device_info, nullptr );
130+
131+ AllocatorAttributes allocator_attributes = AllocatorAttributes ();
132+ allocator_attributes.set_gpu_compatible (true );
133+ Allocator* allocator = devices[0 ]->GetAllocator (allocator_attributes);
134+ void * ptr = allocator->AllocateRaw (Allocator::kAllocatorAlignment , 1024 );
135+ EXPECT_NE (ptr, nullptr );
136+ allocator->DeallocateRaw (ptr);
137+ }
138+ EXPECT_EQ (number_instantiated + 1 ,
139+ GpuCudaMallocAsyncAllocator::GetInstantiatedCountTestOnly ());
140+ EXPECT_EQ (status.code (), error::OK);
141+ }
142+
112143TEST_F (GPUDeviceTest, FailedToParseVisibleDeviceList) {
113144 SessionOptions opts = MakeSessionOptions (" 0,abc" );
114145 std::vector<std::unique_ptr<Device>> devices;
0 commit comments