|
| 1 | +#include <c10/core/CPUCachingAllocator.h> |
| 2 | + |
| 3 | +namespace c10 { |
| 4 | + |
| 5 | +namespace { |
| 6 | +thread_local CPUCachingAllocator* caching_allocator_ptr{nullptr}; |
| 7 | +} // namespace |
| 8 | + |
| 9 | +std::mutex CPUCachingAllocator::mutex_; |
| 10 | +ska::flat_hash_map<void*, size_t> CPUCachingAllocator::allocation_map_; |
| 11 | + |
| 12 | +inline void* CPUCachingAllocator::allocate_and_cache(const size_t bytes) { |
| 13 | + void* ptr; |
| 14 | + try { |
| 15 | + ptr = c10::alloc_cpu(bytes); |
| 16 | + } catch (c10::Error& e) { |
| 17 | + // If allocation fails, try freeing cached available blocks. |
| 18 | + // For now free all available cached blocks. |
| 19 | + free_cached(); |
| 20 | + // Furthermore to consider: If we ever come here running out of memory |
| 21 | + // perhaps it is best to disable caching, since this is likely to happen |
| 22 | + // again. |
| 23 | + // Try again. |
| 24 | + ptr = c10::alloc_cpu(bytes); |
| 25 | + } |
| 26 | + allocation_map_[ptr] = bytes; |
| 27 | + return ptr; |
| 28 | +} |
| 29 | + |
| 30 | +void* CPUCachingAllocator::allocate(const size_t bytes) { |
| 31 | + std::lock_guard<std::mutex> guard(mutex_); |
| 32 | + const auto& it = available_map_.find(bytes); |
| 33 | + if (it == available_map_.end() || it->second.empty()) { |
| 34 | + return allocate_and_cache(bytes); |
| 35 | + } |
| 36 | + return it->second.pop_back_val(); |
| 37 | +} |
| 38 | + |
| 39 | +void CPUCachingAllocator::free(void* ptr) { |
| 40 | + // NB: since we are not really freeing the memory |
| 41 | + // the cases such as quantization code freeing original weights |
| 42 | + // on mobile, will not quite work, as we likely will hold |
| 43 | + // onto that memory. |
| 44 | + // NB: We can also enable max memory cached for better memory |
| 45 | + // management such that free will actually free the memory if |
| 46 | + // we are nearing or above the watermark. |
| 47 | + std::lock_guard<std::mutex> guard(mutex_); |
| 48 | + // If this allocation was done before caching allocator was enabled |
| 49 | + // then free regularly |
| 50 | + const auto& it = allocation_map_.find(ptr); |
| 51 | + if (it == allocation_map_.end()) { |
| 52 | + c10::free_cpu(ptr); |
| 53 | + return; |
| 54 | + } |
| 55 | + const size_t alloc_size = it->second; |
| 56 | + available_map_[alloc_size].push_back(ptr); |
| 57 | +} |
| 58 | + |
| 59 | +void CPUCachingAllocator::record_free(void* ptr) { |
| 60 | + // This function captures the case when the allocated memory |
| 61 | + // is being freed outside the scope of this allocator. |
| 62 | + // At the moment only way to capture this is to have the allocator, |
| 63 | + // that uses this CachingAllocator as the backing allocator, |
| 64 | + // call this function explicity upon freeing memory while |
| 65 | + // outside the scope of caching allocator. |
| 66 | + // If the memory is freed in some other way, then we will likely |
| 67 | + // have undefined behavior or page fault. But this can be |
| 68 | + // the case without caching allocator as well. |
| 69 | + std::lock_guard<std::mutex> guard(mutex_); |
| 70 | + const auto& it = allocation_map_.find(ptr); |
| 71 | + if (it != allocation_map_.end()) { |
| 72 | + allocation_map_.erase(it); |
| 73 | + } |
| 74 | +} |
| 75 | + |
| 76 | +void CPUCachingAllocator::free_cached() { |
| 77 | + for (const auto& it : available_map_) { |
| 78 | + for (const auto ptr : it.second) { |
| 79 | + c10::free_cpu(ptr); |
| 80 | + // When cached memory is return to OS, it must be removed |
| 81 | + // from allocation_map. |
| 82 | + allocation_map_.erase(ptr); |
| 83 | + } |
| 84 | + } |
| 85 | + available_map_.clear(); |
| 86 | +} |
| 87 | + |
| 88 | +CPUCachingAllocator::~CPUCachingAllocator() { |
| 89 | + free_cached(); |
| 90 | +} |
| 91 | + |
| 92 | +CPUCachingAllocator* GetThreadLocalCachingAllocator() { |
| 93 | + return caching_allocator_ptr; |
| 94 | +} |
| 95 | + |
| 96 | +WithCPUCachingAllocatorGuard::WithCPUCachingAllocatorGuard( |
| 97 | + CPUCachingAllocator* allocator) { |
| 98 | + prev_caching_allocator_ptr_ = GetThreadLocalCachingAllocator(); |
| 99 | +} |
| 100 | + |
| 101 | +WithCPUCachingAllocatorGuard::~WithCPUCachingAllocatorGuard() { |
| 102 | + caching_allocator_ptr = prev_caching_allocator_ptr_; |
| 103 | +} |
| 104 | + |
| 105 | +} // namespace c10 |
0 commit comments