-
Notifications
You must be signed in to change notification settings - Fork 59
Expand file tree
/
Copy pathlarge_objects.h
More file actions
380 lines (317 loc) · 15 KB
/
large_objects.h
File metadata and controls
380 lines (317 loc) · 15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
/*
Copyright (c) 2005-2024 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_tbbmalloc_internal_H
#error tbbmalloc_internal.h must be included at this point
#endif
#ifndef __TBB_large_objects_H
#define __TBB_large_objects_H
//! The list of possible Cache Bin Aggregator operations.
/* Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */
enum CacheBinOperationType {
CBOP_INVALID = 0,
CBOP_GET,
CBOP_PUT_LIST,
CBOP_CLEAN_TO_THRESHOLD,
CBOP_CLEAN_ALL,
CBOP_UPDATE_USED_SIZE
};
// The Cache Bin Aggregator operation status list.
// CBST_NOWAIT can be specified for non-blocking operations.
enum CacheBinOperationStatus {
CBST_WAIT = 0,
CBST_NOWAIT,
CBST_DONE
};
/*
* Bins that grow with arithmetic step
*/
template<size_t MIN_SIZE, size_t MAX_SIZE>
struct LargeBinStructureProps {
public:
static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
static const size_t CacheStep = 8 * 1024;
static const unsigned NumBins = (MaxSize - MinSize) / CacheStep;
static size_t alignToBin(size_t size) {
return alignUp(size, CacheStep);
}
static int sizeToIdx(size_t size) {
MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT);
MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT);
return (size - MinSize) / CacheStep;
}
};
/*
* Bins that grow with special geometric progression.
*/
template<size_t MIN_SIZE, size_t MAX_SIZE>
struct HugeBinStructureProps {
private:
// Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor))
// There are StepFactor bins (8 be default) between each power of 2 bin
static const int MaxSizeExp = Log2<MAX_SIZE>::value;
static const int MinSizeExp = Log2<MIN_SIZE>::value;
static const int StepFactor = 8;
static const int StepFactorExp = Log2<StepFactor>::value;
public:
static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor;
static size_t alignToBin(size_t size) {
MALLOC_ASSERT(size >= StepFactor, "Size must not be less than the StepFactor");
int sizeExp = (int)BitScanRev(size);
MALLOC_ASSERT(sizeExp >= 0, "BitScanRev() cannot return -1, as size >= stepfactor > 0");
MALLOC_ASSERT(sizeExp >= StepFactorExp, "sizeExp >= StepFactorExp, because size >= stepFactor");
int minorStepExp = sizeExp - StepFactorExp;
return alignUp(size, 1ULL << minorStepExp);
}
// Sizes between the power of 2 values are approximated to StepFactor.
static int sizeToIdx(size_t size) {
MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT);
int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2
MALLOC_ASSERT(sizeExp >= 0, "BitScanRev() cannot return -1, as size >= stepfactor > 0");
MALLOC_ASSERT(sizeExp >= StepFactorExp, "sizeExp >= StepFactorExp, because size >= stepFactor");
int minorStepExp = sizeExp - StepFactorExp;
size_t majorStepSize = 1ULL << sizeExp;
int minorIdx = (size - majorStepSize) >> minorStepExp;
MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp),
"Size is not aligned on the bin");
return StepFactor * (sizeExp - MinSizeExp) + minorIdx;
}
};
/*
* Cache properties accessor
*
* TooLargeFactor -- when cache size treated "too large" in comparison to user data size
* OnMissFactor -- If cache miss occurred and cache was cleaned,
* set ageThreshold to OnMissFactor * the difference
* between current time and last time cache was cleaned.
* LongWaitFactor -- to detect rarely-used bins and forget about their usage history
*/
template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT>
struct LargeObjectCacheProps : public StructureProps {
static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT;
};
template<typename Props>
class LargeObjectCacheImpl {
private:
// Current sizes of used and cached objects. It's calculated while we are
// traversing bins, and used for isLOCTooLarge() check at the same time.
class BinsSummary {
size_t usedSz;
size_t cachedSz;
public:
BinsSummary() : usedSz(0), cachedSz(0) {}
// "too large" criteria
bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; }
void update(size_t usedSize, size_t cachedSize) {
usedSz += usedSize;
cachedSz += cachedSize;
}
void reset() { usedSz = cachedSz = 0; }
};
public:
// The number of bins to cache large/huge objects.
static const uint32_t numBins = Props::NumBins;
typedef BitMaskMax<numBins> BinBitMask;
// 2-linked list of same-size cached blocks ordered by age (oldest on top)
// TODO: are we really want the list to be 2-linked? This allows us
// reduce memory consumption and do less operations under lock.
// TODO: try to switch to 32-bit logical time to save space in CacheBin
// and move bins to different cache lines.
class CacheBin {
private:
LargeMemoryBlock* first;
std::atomic<LargeMemoryBlock*> last;
/* age of an oldest block in the list; equal to last->age, if last defined,
used for quick checking it without acquiring the lock. */
std::atomic<uintptr_t> oldest;
/* currAge when something was excluded out of list because of the age,
not because of cache hit */
uintptr_t lastCleanedAge;
/* Current threshold value for the blocks of a particular size.
Set on cache miss. */
std::atomic<intptr_t> ageThreshold;
/* total size of all objects corresponding to the bin and allocated by user */
std::atomic<size_t> usedSize;
/* total size of all objects cached in the bin */
std::atomic<size_t> cachedSize;
/* mean time of presence of block in the bin before successful reuse */
std::atomic<intptr_t> meanHitRange;
/* time of last get called for the bin */
uintptr_t lastGet;
typename MallocAggregator<CacheBinOperation>::type aggregator;
void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true);
/* should be placed in zero-initialized memory, ctor not needed. */
CacheBin();
public:
void init() {
memset(static_cast<void*>(this), 0, sizeof(CacheBin));
}
/* ---------- Cache accessors ---------- */
void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx);
LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
/* ---------- Cleanup functions -------- */
bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx);
bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx);
/* ------------------------------------- */
void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
void decreaseThreshold() {
intptr_t threshold = ageThreshold.load(std::memory_order_relaxed);
if (threshold)
ageThreshold.store((threshold + meanHitRange.load(std::memory_order_relaxed)) / 2, std::memory_order_relaxed);
}
void updateBinsSummary(BinsSummary *binsSummary) const {
binsSummary->update(usedSize.load(std::memory_order_relaxed), cachedSize.load(std::memory_order_relaxed));
}
size_t getSize() const { return cachedSize.load(std::memory_order_relaxed); }
size_t getUsedSize() const { return usedSize.load(std::memory_order_relaxed); }
size_t reportStat(int num, FILE *f);
/* --------- Unsafe methods used with the aggregator ------- */
void forgetOutdatedState(uintptr_t currTime);
LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask,
int idx, int num, size_t hugeObjectThreshold);
LargeMemoryBlock *get();
LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx);
LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx);
void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) {
if (!usedSize.load(std::memory_order_relaxed)) bitMask->set(idx, true);
usedSize.store(usedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
if (!usedSize.load(std::memory_order_relaxed) && !first) bitMask->set(idx, false);
}
void updateMeanHitRange( intptr_t hitRange ) {
hitRange = hitRange >= 0 ? hitRange : 0;
intptr_t mean = meanHitRange.load(std::memory_order_relaxed);
mean = mean ? (mean + hitRange) / 2 : hitRange;
meanHitRange.store(mean, std::memory_order_relaxed);
}
void updateAgeThreshold( uintptr_t currTime ) {
if (lastCleanedAge)
ageThreshold.store(Props::OnMissFactor * (currTime - lastCleanedAge), std::memory_order_relaxed);
}
void updateCachedSize(size_t size) {
cachedSize.store(cachedSize.load(std::memory_order_relaxed) + size, std::memory_order_relaxed);
}
void setLastGet( uintptr_t newLastGet ) {
lastGet = newLastGet;
}
/* -------------------------------------------------------- */
};
// Huge bins index for fast regular cleanup searching in case of
// the "huge size threshold" setting defined
intptr_t hugeSizeThresholdIdx;
private:
// How many times LOC was "too large"
std::atomic<intptr_t> tooLargeLOC;
// for fast finding of used bins and bins with non-zero usedSize;
// indexed from the end, as we need largest 1st
BinBitMask bitMask;
// bins with lists of recently freed large blocks cached for reuse
CacheBin bin[numBins];
public:
/* ------------ CacheBin structure dependent stuff ------------ */
static size_t alignToBin(size_t size) {
return Props::alignToBin(size);
}
static int sizeToIdx(size_t size) {
return Props::sizeToIdx(size);
}
/* --------- Main cache functions (put, get object) ------------ */
void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size);
/* ------------------------ Cleanup ---------------------------- */
bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr);
bool cleanAll(ExtMemoryPool *extMemPool);
/* -------------------------- Other ---------------------------- */
void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size);
void reset();
void reportStat(FILE *f);
#if __TBB_MALLOC_WHITEBOX_TEST
size_t getLOCSize() const;
size_t getUsedSize() const;
#endif
};
class LargeObjectCache {
private:
// Large bins [minLargeSize, maxLargeSize)
// Huge bins [maxLargeSize, maxHugeSize)
static const size_t minLargeSize = 8 * 1024,
maxLargeSize = 8 * 1024 * 1024,
// Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold
maxHugeSize = tbb::detail::select_size_t_constant<2147483648U, 1099511627776ULL>::value;
public:
// Upper bound threshold for caching size. After that size all objects sieve through cache
// By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks)
static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL;
// After that size large object interpreted as huge and does not participate in regular cleanup.
// Can be changed during the program execution.
size_t hugeSizeThreshold;
private:
// Large objects cache properties
typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps;
typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps;
// Huge objects cache properties
typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps;
typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps;
// Cache implementation type with properties
typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType;
typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType;
// Beginning of largeCache is more actively used and smaller than hugeCache,
// so put hugeCache first to prevent false sharing
// with LargeObjectCache's predecessor
HugeCacheType hugeCache;
LargeCacheType largeCache;
/* logical time, incremented on each put/get operation
To prevent starvation between pools, keep separately for each pool.
Overflow is OK, as we only want difference between
its current value and some recent.
Both malloc and free should increment logical time, as in
a different case multiple cached blocks would have same age,
and accuracy of predictors suffers.
*/
std::atomic<uintptr_t> cacheCurrTime;
// Memory pool that owns this LargeObjectCache.
// strict 1:1 relation, never changed
ExtMemoryPool *extMemPool;
// Returns artificial bin index,
// it's used only during sorting and never saved
static int sizeToIdx(size_t size);
// Our friends
friend class Backend;
public:
void init(ExtMemoryPool *memPool);
// Item accessors
void put(LargeMemoryBlock *largeBlock);
void putList(LargeMemoryBlock *head);
LargeMemoryBlock *get(size_t size);
void updateCacheState(DecreaseOrIncrease op, size_t size);
bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime);
// Cleanup operations
bool doCleanup(uintptr_t currTime, bool doThreshDecr);
bool decreasingCleanup();
bool regularCleanup();
bool cleanAll();
void reset();
void reportStat(FILE *f);
#if __TBB_MALLOC_WHITEBOX_TEST
size_t getLOCSize() const;
size_t getUsedSize() const;
#endif
// Cache deals with exact-fit sizes, so need to align each size
// to the specific bin when put object to cache
static size_t alignToBin(size_t size);
void setHugeSizeThreshold(size_t value);
// Check if we should cache or sieve this size
bool sizeInCacheRange(size_t size);
uintptr_t getCurrTimeRange(uintptr_t range);
void registerRealloc(size_t oldSize, size_t newSize);
};
#endif // __TBB_large_objects_H