Skip to content

Commit 57fd612

Browse files
author
yangjingjing
committed
init blog
1 parent bebfe9f commit 57fd612

19 files changed

+12249
-0
lines changed

_posts/2021-01-23-Linux源码内存管理10直接页面回收.md

Lines changed: 1163 additions & 0 deletions
Large diffs are not rendered by default.

_posts/2021-01-23-Linux源码内存管理11内存规整过程分析.md

Lines changed: 1152 additions & 0 deletions
Large diffs are not rendered by default.

_posts/2021-01-23-Linux源码内存管理12内存OOM触发分析.md

Lines changed: 791 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
---
2+
layout: post
3+
categories: [Linux]
4+
description: none
5+
keywords: Linux
6+
---
7+
# Linux源码内存管理13内存池
8+
9+
## 内存池原理
10+
平时我们直接所使用的 malloc,new,free,delete 等等 API 申请内存分配,这做缺点在于,由于所申请内存块的大小不定,当频繁使用时会造成大量的内存碎片并进而降低性能。
11+
memory pool 是一种内存分配方式,又被称为固定大小区块规划。内存池则是在真正使用内存之前,先申请分配一定数量的、大小相等的内存块留作备用。当有新的内存需要的时候,就直接从内存池中分出一部分内存块,若内存块不够再继续申请新的内存,这样做优势,使得内存分配效率得到提升。
12+
13+
## 内存池源码分析
14+
1.内存池数据结构源码
15+
先看看内存池主要的数据结构,结构体位于include/linux/mempool.h文件中:
16+
```
17+
typedef struct mempool_s {
18+
spinlock_t lock;//防止多处理器并发而引入的锁
19+
int min_nr; //elements数组中的成员数量
20+
int curr_nr;//当前elements数组中空闲的成员数量
21+
void **elements;//用来存放内存成员的二维数组,等于elements[min_nr][内存对象的长度]
22+
23+
//内存池与内核缓冲区结合使用的指针(这个指针专门用来指向这种内存对象对应的缓存区的指针)
24+
void *pool_data;
25+
mempool_alloc_t *alloc;//内存分配函数
26+
mempool_free_t *free;//内存释放函数
27+
wait_queue_head_t wait;//任务等待队列
28+
} mempool_t;
29+
30+
```
31+
32+
## 内存池创建函数源码
33+
内核里使用mempool_create()创建一个内存池,使用mempool_destroy()销毁一个内存池,使用mempool_alloc()申请内存和mempool_free()是否内存。mempool_create,函数位于mm/mempool.c文件中:
34+
```
35+
mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
36+
mempool_free_t *free_fn, void *pool_data)
37+
{
38+
return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
39+
GFP_KERNEL, NUMA_NO_NODE);
40+
}
41+
EXPORT_SYMBOL(mempool_create);
42+
43+
/******************
44+
创建一个内存池对象
45+
参数:
46+
min_nr : 为内存池分配的最小内存成员数量
47+
alloc_fn : 用户自定义内存分配函数(可以使用系统定义函数)
48+
free_fn : 用户自定义内存释放函数(可以使用系统定义函数)
49+
pool.data :根据用户自定义内存分配函数所提供的可选私有数据,一般是缓存区指针
50+
gfp_mask : 内存分配掩码
51+
node_id : 内存节点的id
52+
******************/
53+
mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
54+
mempool_free_t *free_fn, void *pool_data,
55+
gfp_t gfp_mask, int node_id)
56+
{
57+
mempool_t *pool;
58+
59+
//为内存池对象分配内存
60+
pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
61+
if (!pool)
62+
return NULL;
63+
64+
//初始化内存池
65+
if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data,
66+
gfp_mask, node_id)) {
67+
kfree(pool);
68+
return NULL;
69+
}
70+
71+
return pool;//返回内存池结构体
72+
}
73+
EXPORT_SYMBOL(mempool_create_node);
74+
75+
```
76+
mempool_create主要通过mempool_create_node来创建内存池,mempool_create_node首先分配内存池对象的内存后使用mempool_init_node初始化内存池,mempool_init_node:
77+
```
78+
int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
79+
mempool_free_t *free_fn, void *pool_data,
80+
gfp_t gfp_mask, int node_id)
81+
{
82+
//初始化内存池的相关参数
83+
spin_lock_init(&pool->lock);//初始化锁
84+
pool->min_nr = min_nr;
85+
pool->pool_data = pool_data;
86+
pool->alloc = alloc_fn;
87+
pool->free = free_fn;
88+
init_waitqueue_head(&pool->wait);//初始化等待队列
89+
90+
//分配一个长度为min_nr的数组用于存放申请后对象的指针
91+
pool->elements = kmalloc_array_node(min_nr, sizeof(void *),
92+
gfp_mask, node_id);
93+
if (!pool->elements)
94+
return -ENOMEM;
95+
96+
/*
97+
* First pre-allocate the guaranteed number of buffers.
98+
*/
99+
//首先保证预分配的缓冲区数量
100+
while (pool->curr_nr < pool->min_nr) {
101+
void *element;
102+
103+
//调用pool->alloc函数min_nr次
104+
element = pool->alloc(gfp_mask, pool->pool_data);
105+
if (unlikely(!element)) {//如果申请不到element,则直接销毁此内存池
106+
mempool_exit(pool);
107+
return -ENOMEM;
108+
}
109+
add_element(pool, element);//添加到elements指针数组中
110+
}
111+
112+
return 0;
113+
}
114+
EXPORT_SYMBOL(mempool_init_node);
115+
116+
```
117+
118+
## 内存池销毁函数源码
119+
我们再看看mempool_destroy,mempool_destroy:
120+
```
121+
//销毁一个内存池
122+
void mempool_destroy(mempool_t *pool)
123+
{
124+
if (unlikely(!pool))
125+
return;
126+
127+
mempool_exit(pool);//释放内存池中的内存块
128+
kfree(pool);//释放内存池结构体
129+
}
130+
EXPORT_SYMBOL(mempool_destroy);
131+
132+
void mempool_exit(mempool_t *pool)
133+
{
134+
while (pool->curr_nr) {
135+
void *element = remove_element(pool);//把elements指针数组中的内存移除
136+
pool->free(element, pool->pool_data);//释放elements数组中的所有对象
137+
}
138+
kfree(pool->elements);//销毁elements指针数组
139+
pool->elements = NULL;
140+
}
141+
EXPORT_SYMBOL(mempool_exit);
142+
143+
```
144+
mempool_destroy也很简单,首先调用mempool_exit直接将elements存放的内存依个释放掉再释放elements指针数组结构体,后将mempool_t结构也释放掉。
145+
146+
## 内存池分配内存函数
147+
现在我们看mempool_alloc()函数
148+
```
149+
//内存池分配对象
150+
void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
151+
{
152+
void *element;
153+
unsigned long flags;
154+
wait_queue_entry_t wait;
155+
gfp_t gfp_temp;
156+
157+
//形参gfp_mask中不能包含_GFP_ZERO
158+
VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
159+
160+
//如果有__GFP_WAIT标志,则会先阻塞,切换进程
161+
//#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
162+
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
163+
164+
gfp_mask |= __GFP_NOMEMALLOC;//不使用预留内存
165+
gfp_mask |= __GFP_NORETRY;//分配页时如果失败则返回,不进行重试
166+
gfp_mask |= __GFP_NOWARN;//分配失败不提供警告
167+
168+
//gfp_mask只保留__GFP_DIRECT_RECLAIM和__GFP_IO标志
169+
gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
170+
171+
repeat_alloc:
172+
173+
//使用内存池中的alloc函数进行分配对象
174+
element = pool->alloc(gfp_temp, pool->pool_data);
175+
if (likely(element != NULL))
176+
return element;
177+
178+
//给内存池上锁,获取后此段临界区禁止中断和抢占
179+
spin_lock_irqsave(&pool->lock, flags);
180+
181+
//如果当前内存池中有空闲数量
182+
if (likely(pool->curr_nr)) {
183+
element = remove_element(pool);//从内存池中获取内存对象
184+
spin_unlock_irqrestore(&pool->lock, flags);//解锁
185+
/* paired with rmb in mempool_free(), read comment there */
186+
smp_wmb();//写内存屏障,保证之前的写操作已经完成
187+
/*
188+
* Update the allocation stack trace as this is more useful
189+
* for debugging.
190+
*/
191+
kmemleak_update_trace(element);//用于debug
192+
return element;
193+
}
194+
195+
/*
196+
* We use gfp mask w/o direct reclaim or IO for the first round. If
197+
* alloc failed with that and @pool was empty, retry immediately.
198+
*/
199+
//这里是内存池中也没有空闲内存对象的时候进行的操作
200+
201+
//如果gfp_temp != gfp_mask
202+
if (gfp_temp != gfp_mask) {
203+
spin_unlock_irqrestore(&pool->lock, flags);
204+
gfp_temp = gfp_mask;
205+
goto repeat_alloc;//跳到repeat_alloc重新获取一次
206+
}
207+
208+
/* We must not sleep if !__GFP_DIRECT_RECLAIM */
209+
//传入的参数gfp_mask不允许回收的等待,分配不到内存则直接退出
210+
if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
211+
spin_unlock_irqrestore(&pool->lock, flags);
212+
return NULL;
213+
}
214+
215+
/* Let's wait for someone else to return an element to @pool */
216+
init_wait(&wait);//初始化wait等待进程
217+
//加入到内存池的等待队列中,等待当内存池中有空闲对象或者等待超时
218+
prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
219+
220+
spin_unlock_irqrestore(&pool->lock, flags);
221+
222+
/*
223+
* FIXME: this should be io_schedule(). The timeout is there as a
224+
* workaround for some DM problems in 2.6.18.
225+
*/
226+
io_schedule_timeout(5*HZ);//阻塞等待5秒
227+
228+
finish_wait(&pool->wait, &wait);//从内存池的等待队列删除此进程
229+
goto repeat_alloc;//跳转到repeat_alloc,重新尝试获取内存对象
230+
}
231+
EXPORT_SYMBOL(mempool_alloc);
232+
233+
```
234+
当模块从此内存池中获取内存对象时,会调用此函数,此函数优先从伙伴系统或slab缓冲区获取需要的内存对象,当内存不足导致无法获取内存对象时,才会从内存池elements数组中获取,如果elements也没有空闲的内存对象,根据传入的分配标识进行相应的处理,最终会每5秒进行一次重新请求分配。
235+
236+
## 内存池释放内存函数
237+
```
238+
/ 内存池释放内存对象操作
239+
void mempool_free(void *element, mempool_t *pool)
240+
{
241+
unsigned long flags;
242+
243+
//传入的对象为空,则直接退出
244+
if (unlikely(element == NULL))
245+
return;
246+
247+
/*
248+
* Paired with the wmb in mempool_alloc(). The preceding read is
249+
* for @element and the following @pool->curr_nr. This ensures
250+
* that the visible value of @pool->curr_nr is from after the
251+
* allocation of @element. This is necessary for fringe cases
252+
* where @element was passed to this task without going through
253+
* barriers.
254+
*
255+
* For example, assume @p is %NULL at the beginning and one task
256+
* performs "p = mempool_alloc(...);" while another task is doing
257+
* "while (!p) cpu_relax(); mempool_free(p, ...);". This function
258+
* may end up using curr_nr value which is from before allocation
259+
* of @p without the following rmb.
260+
*/
261+
smp_rmb();//读内存屏障
262+
263+
/*
264+
* For correctness, we need a test which is guaranteed to trigger
265+
* if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr
266+
* without locking achieves that and refilling as soon as possible
267+
* is desirable.
268+
*
269+
* Because curr_nr visible here is always a value after the
270+
* allocation of @element, any task which decremented curr_nr below
271+
* min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets
272+
* incremented to min_nr afterwards. If curr_nr gets incremented
273+
* to min_nr after the allocation of @element, the elements
274+
* allocated after that are subject to the same guarantee.
275+
*
276+
* Waiters happen iff curr_nr is 0 and the above guarantee also
277+
* ensures that there will be frees which return elements to the
278+
* pool waking up the waiters.
279+
*/
280+
//如果当前内存池中空闲的内存对象少于内存池中应当保存的内存对象的数量时,优先把释放的对象加入到内存池空闲数组中
281+
if (unlikely(pool->curr_nr < pool->min_nr)) {
282+
spin_lock_irqsave(&pool->lock, flags);
283+
if (likely(pool->curr_nr < pool->min_nr)) {
284+
add_element(pool, element);//将用户释放的element重新加到缓存而当中
285+
spin_unlock_irqrestore(&pool->lock, flags);
286+
wake_up(&pool->wait);//唤醒等待队列,目前已经有人释放内存,可以再次申请这个内存来使用
287+
return;
288+
}
289+
spin_unlock_irqrestore(&pool->lock, flags);
290+
}
291+
pool->free(element, pool->pool_data);//直接调用释放函数
292+
}
293+
EXPORT_SYMBOL(mempool_free);
294+
295+
```
296+
mempool_free将空闲内存对象释放到内存池中,当内存池中空闲对象不足时,优先将空闲内存对象放到elements数组中,把mempool填满,否则直接释放掉,让内存返回到伙伴系统或slab缓冲区中。
297+
298+

0 commit comments

Comments
 (0)