Skip to content

Commit 7195fbf

Browse files
Kirill Smelkovgitster
authored andcommitted
combine-diff: speed it up, by using multiparent diff tree-walker directly
As was recently shown in "combine-diff: optimize combine_diff_path sets intersection", combine-diff runs very slowly. In that commit we optimized paths sets intersection, but that accounted only for ~ 25% of the slowness, and as my tracing showed, for linux.git v3.10..v3.11, for merges a lot of time is spent computing diff(commit,commit^2) just to only then intersect that huge diff to almost small set of files from diff(commit,commit^1). In previous commit, we described the problem in more details, and reworked the diff tree-walker to be general one - i.e. to work in multiple parent case too. Now is the time to take advantage of it for finding paths for combine diff. The implementation is straightforward - if we know, we can get generated diff paths directly, and at present that means no diff filtering or rename/copy detection was requested(*), we can call multiparent tree-walker directly and get ready paths. (*) because e.g. at present, all diffcore transformations work on diff_filepair queues, but in the future, that limitation can be lifted, if filters would operate directly on combine_diff_paths. Timings for `git log --raw --no-abbrev --no-renames` without `-c` ("git log") and with `-c` ("git log -c") and with `-c --merges` ("git log -c --merges") before and after the patch are as follows: linux.git v3.10..v3.11 log log -c log -c --merges before 1.9s 16.4s 15.2s after 1.9s 2.4s 1.1s The result stayed the same. Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 72441af commit 7195fbf

File tree

2 files changed

+84
-5
lines changed

2 files changed

+84
-5
lines changed

combine-diff.c

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,7 @@ static const char *path_path(void *obj)
13031303

13041304

13051305
/* find set of paths that every parent touches */
1306-
static struct combine_diff_path *find_paths(const unsigned char *sha1,
1306+
static struct combine_diff_path *find_paths_generic(const unsigned char *sha1,
13071307
const struct sha1_array *parents, struct diff_options *opt)
13081308
{
13091309
struct combine_diff_path *paths = NULL;
@@ -1316,6 +1316,7 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1,
13161316
/* tell diff_tree to emit paths in sorted (=tree) order */
13171317
opt->orderfile = NULL;
13181318

1319+
/* D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (wrt paths) */
13191320
for (i = 0; i < num_parent; i++) {
13201321
/*
13211322
* show stat against the first parent even when doing
@@ -1346,6 +1347,35 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1,
13461347
}
13471348

13481349

1350+
/*
1351+
* find set of paths that everybody touches, assuming diff is run without
1352+
* rename/copy detection, etc, comparing all trees simultaneously (= faster).
1353+
*/
1354+
static struct combine_diff_path *find_paths_multitree(
1355+
const unsigned char *sha1, const struct sha1_array *parents,
1356+
struct diff_options *opt)
1357+
{
1358+
int i, nparent = parents->nr;
1359+
const unsigned char **parents_sha1;
1360+
struct combine_diff_path paths_head;
1361+
struct strbuf base;
1362+
1363+
parents_sha1 = xmalloc(nparent * sizeof(parents_sha1[0]));
1364+
for (i = 0; i < nparent; i++)
1365+
parents_sha1[i] = parents->sha1[i];
1366+
1367+
/* fake list head, so worker can assume it is non-NULL */
1368+
paths_head.next = NULL;
1369+
1370+
strbuf_init(&base, PATH_MAX);
1371+
diff_tree_paths(&paths_head, sha1, parents_sha1, nparent, &base, opt);
1372+
1373+
strbuf_release(&base);
1374+
free(parents_sha1);
1375+
return paths_head.next;
1376+
}
1377+
1378+
13491379
void diff_tree_combined(const unsigned char *sha1,
13501380
const struct sha1_array *parents,
13511381
int dense,
@@ -1355,6 +1385,7 @@ void diff_tree_combined(const unsigned char *sha1,
13551385
struct diff_options diffopts;
13561386
struct combine_diff_path *p, *paths;
13571387
int i, num_paths, needsep, show_log_first, num_parent = parents->nr;
1388+
int need_generic_pathscan;
13581389

13591390
/* nothing to do, if no parents */
13601391
if (!num_parent)
@@ -1377,11 +1408,58 @@ void diff_tree_combined(const unsigned char *sha1,
13771408

13781409
/* find set of paths that everybody touches
13791410
*
1380-
* NOTE find_paths() also handles --stat, as it computes
1381-
* diff(sha1,parent_i) for all i to do the job, specifically
1382-
* for parent0.
1411+
* NOTE
1412+
*
1413+
* Diffcore transformations are bound to diff_filespec and logic
1414+
* comparing two entries - i.e. they do not apply directly to combine
1415+
* diff.
1416+
*
1417+
* If some of such transformations is requested - we launch generic
1418+
* path scanning, which works significantly slower compared to
1419+
* simultaneous all-trees-in-one-go scan in find_paths_multitree().
1420+
*
1421+
* TODO some of the filters could be ported to work on
1422+
* combine_diff_paths - i.e. all functionality that skips paths, so in
1423+
* theory, we could end up having only multitree path scanning.
1424+
*
1425+
* NOTE please keep this semantically in sync with diffcore_std()
13831426
*/
1384-
paths = find_paths(sha1, parents, &diffopts);
1427+
need_generic_pathscan = opt->skip_stat_unmatch ||
1428+
DIFF_OPT_TST(opt, FOLLOW_RENAMES) ||
1429+
opt->break_opt != -1 ||
1430+
opt->detect_rename ||
1431+
opt->pickaxe ||
1432+
opt->filter;
1433+
1434+
1435+
if (need_generic_pathscan) {
1436+
/*
1437+
* NOTE generic case also handles --stat, as it computes
1438+
* diff(sha1,parent_i) for all i to do the job, specifically
1439+
* for parent0.
1440+
*/
1441+
paths = find_paths_generic(sha1, parents, &diffopts);
1442+
}
1443+
else {
1444+
int stat_opt;
1445+
paths = find_paths_multitree(sha1, parents, &diffopts);
1446+
1447+
/*
1448+
* show stat against the first parent even
1449+
* when doing combined diff.
1450+
*/
1451+
stat_opt = (opt->output_format &
1452+
(DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT));
1453+
if (stat_opt) {
1454+
diffopts.output_format = stat_opt;
1455+
1456+
diff_tree_sha1(parents->sha1[0], sha1, "", &diffopts);
1457+
diffcore_std(&diffopts);
1458+
if (opt->orderfile)
1459+
diffcore_order(opt->orderfile);
1460+
diff_flush(&diffopts);
1461+
}
1462+
}
13851463

13861464
/* find out number of surviving paths */
13871465
for (num_paths = 0, p = paths; p; p = p->next)

diff.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4764,6 +4764,7 @@ void diffcore_fix_diff_index(struct diff_options *options)
47644764

47654765
void diffcore_std(struct diff_options *options)
47664766
{
4767+
/* NOTE please keep the following in sync with diff_tree_combined() */
47674768
if (options->skip_stat_unmatch)
47684769
diffcore_skip_stat_unmatch(options);
47694770
if (!options->found_follow) {

0 commit comments

Comments
 (0)