Skip to content

Commit 64c4e8b

Browse files
stefanbellergitster
authored andcommitted
xdiff/xhistogram: move index allocation into find_lcs
This fixes a memory issue when recursing a lot, which can be reproduced as seq 1 100000 >one seq 1 4 100000 >two git diff --no-index --histogram one two Before this patch, histogram_diff would call itself recursively before calling free_index, which would mean a lot of memory is allocated during the recursion and only freed afterwards. By moving the memory allocation (and its free call) into find_lcs, the memory is free'd before we recurse, such that memory is reused in the next step of the recursion instead of using new memory. This addresses only the memory pressure, not the run time complexity, that is also awful for the corner case outlined above. Helpful in understanding the code (in addition to the sparse history of this file), was https://stackoverflow.com/a/32367597 which reproduces most of the code comments of the JGit implementation. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent c671d4b commit 64c4e8b

File tree

1 file changed

+53
-43
lines changed

1 file changed

+53
-43
lines changed

xdiff/xhistogram.c

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -251,44 +251,13 @@ static inline void free_index(struct histindex *index)
251251
xdl_cha_free(&index->rcha);
252252
}
253253

254-
static int find_lcs(struct histindex *index, struct region *lcs,
255-
int line1, int count1, int line2, int count2) {
256-
int b_ptr;
257-
258-
if (scanA(index, line1, count1))
259-
return -1;
260-
261-
index->cnt = index->max_chain_length + 1;
262-
263-
for (b_ptr = line2; b_ptr <= LINE_END(2); )
264-
b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2);
265-
266-
return index->has_common && index->max_chain_length < index->cnt;
267-
}
268-
269-
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
270-
int line1, int count1, int line2, int count2)
254+
static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
255+
struct region *lcs,
256+
int line1, int count1, int line2, int count2)
271257
{
258+
int b_ptr;
259+
int sz, ret = -1;
272260
struct histindex index;
273-
struct region lcs;
274-
int sz;
275-
int result = -1;
276-
277-
if (count1 <= 0 && count2 <= 0)
278-
return 0;
279-
280-
if (LINE_END(1) >= MAX_PTR)
281-
return -1;
282-
283-
if (!count1) {
284-
while(count2--)
285-
env->xdf2.rchg[line2++ - 1] = 1;
286-
return 0;
287-
} else if (!count2) {
288-
while(count1--)
289-
env->xdf1.rchg[line1++ - 1] = 1;
290-
return 0;
291-
}
292261

293262
memset(&index, 0, sizeof(index));
294263

@@ -326,8 +295,52 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
326295
index.ptr_shift = line1;
327296
index.max_chain_length = 64;
328297

298+
if (scanA(&index, line1, count1))
299+
goto cleanup;
300+
301+
index.cnt = index.max_chain_length + 1;
302+
303+
for (b_ptr = line2; b_ptr <= LINE_END(2); )
304+
b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2);
305+
306+
if (index.has_common && index.max_chain_length < index.cnt)
307+
ret = 1;
308+
else
309+
ret = 0;
310+
311+
cleanup:
312+
free_index(&index);
313+
return ret;
314+
}
315+
316+
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
317+
int line1, int count1, int line2, int count2)
318+
{
319+
struct region lcs;
320+
int lcs_found;
321+
int result = -1;
322+
323+
if (count1 <= 0 && count2 <= 0)
324+
return 0;
325+
326+
if (LINE_END(1) >= MAX_PTR)
327+
return -1;
328+
329+
if (!count1) {
330+
while(count2--)
331+
env->xdf2.rchg[line2++ - 1] = 1;
332+
return 0;
333+
} else if (!count2) {
334+
while(count1--)
335+
env->xdf1.rchg[line1++ - 1] = 1;
336+
return 0;
337+
}
338+
329339
memset(&lcs, 0, sizeof(lcs));
330-
if (find_lcs(&index, &lcs, line1, count1, line2, count2))
340+
lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2);
341+
if (lcs_found < 0)
342+
goto out;
343+
else if (lcs_found)
331344
result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
332345
else {
333346
if (lcs.begin1 == 0 && lcs.begin2 == 0) {
@@ -341,18 +354,15 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
341354
line1, lcs.begin1 - line1,
342355
line2, lcs.begin2 - line2);
343356
if (result)
344-
goto cleanup;
357+
goto out;
345358
result = histogram_diff(xpp, env,
346359
lcs.end1 + 1, LINE_END(1) - lcs.end1,
347360
lcs.end2 + 1, LINE_END(2) - lcs.end2);
348361
if (result)
349-
goto cleanup;
362+
goto out;
350363
}
351364
}
352-
353-
cleanup:
354-
free_index(&index);
355-
365+
out:
356366
return result;
357367
}
358368

0 commit comments

Comments
 (0)