Skip to content

Commit 5fda343

Browse files
committed
Merge branch 'ds/push-sparse-tree-walk'
"git pack-objects" learned another algorithm to compute the set of objects to send, that trades the resulting packfile off to save traversal cost to favor small pushes. * ds/push-sparse-tree-walk: pack-objects: create GIT_TEST_PACK_SPARSE pack-objects: create pack.useSparse setting revision: implement sparse algorithm list-objects: consume sparse tree walk revision: add mark_tree_uninteresting_sparse
2 parents d8d62e6 + 99dbbfa commit 5fda343

File tree

12 files changed

+378
-17
lines changed

12 files changed

+378
-17
lines changed

Documentation/config/pack.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ pack.useBitmaps::
105105
true. You should not generally need to turn this off unless
106106
you are debugging pack bitmaps.
107107

108+
pack.useSparse::
109+
When true, git will default to using the '--sparse' option in
110+
'git pack-objects' when the '--revs' option is present. This
111+
algorithm only walks trees that appear in paths that introduce new
112+
objects. This can have significant performance benefits when
113+
computing a pack to send a small change. However, it is possible
114+
that extra objects are added to the pack-file if the included
115+
commits contain certain types of direct renames.
116+
108117
pack.writeBitmaps (deprecated)::
109118
This is a deprecated synonym for `repack.writeBitmaps`.
110119

Documentation/git-pack-objects.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ SYNOPSIS
1414
[--local] [--incremental] [--window=<n>] [--depth=<n>]
1515
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
1616
[--stdout [--filter=<filter-spec>] | base-name]
17-
[--shallow] [--keep-true-parents] < object-list
17+
[--shallow] [--keep-true-parents] [--sparse] < object-list
1818

1919

2020
DESCRIPTION
@@ -196,6 +196,15 @@ depth is 4095.
196196
Add --no-reuse-object if you want to force a uniform compression
197197
level on all data no matter the source.
198198

199+
--sparse::
200+
Use the "sparse" algorithm to determine which objects to include in
201+
the pack, when combined with the "--revs" option. This algorithm
202+
only walks trees that appear in paths that introduce new objects.
203+
This can have significant performance benefits when computing
204+
a pack to send a small change. However, it is possible that extra
205+
objects are added to the pack-file if the included commits contain
206+
certain types of direct renames.
207+
199208
--thin::
200209
Create a "thin" pack by omitting the common objects between a
201210
sender and a receiver in order to reduce network transfer. This

bisect.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@ static void bisect_common(struct rev_info *revs)
658658
if (prepare_revision_walk(revs))
659659
die("revision walk setup failed");
660660
if (revs->tree_objects)
661-
mark_edges_uninteresting(revs, NULL);
661+
mark_edges_uninteresting(revs, NULL, 0);
662662
}
663663

664664
static void exit_if_skipped_commits(struct commit_list *tried,

builtin/pack-objects.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ static unsigned long pack_size_limit;
8484
static int depth = 50;
8585
static int delta_search_threads;
8686
static int pack_to_stdout;
87+
static int sparse;
8788
static int thin;
8889
static int num_preferred_base;
8990
static struct progress *progress_state;
@@ -2703,6 +2704,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
27032704
use_bitmap_index_default = git_config_bool(k, v);
27042705
return 0;
27052706
}
2707+
if (!strcmp(k, "pack.usesparse")) {
2708+
sparse = git_config_bool(k, v);
2709+
return 0;
2710+
}
27062711
if (!strcmp(k, "pack.threads")) {
27072712
delta_search_threads = git_config_int(k, v);
27082713
if (delta_search_threads < 0)
@@ -3130,7 +3135,7 @@ static void get_object_list(int ac, const char **av)
31303135

31313136
if (prepare_revision_walk(&revs))
31323137
die(_("revision walk setup failed"));
3133-
mark_edges_uninteresting(&revs, show_edge);
3138+
mark_edges_uninteresting(&revs, show_edge, sparse);
31343139

31353140
if (!fn_show_object)
31363141
fn_show_object = show_object;
@@ -3287,6 +3292,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
32873292
{ OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
32883293
N_("unpack unreachable objects newer than <time>"),
32893294
PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
3295+
OPT_BOOL(0, "sparse", &sparse,
3296+
N_("use the sparse reachability algorithm")),
32903297
OPT_BOOL(0, "thin", &thin,
32913298
N_("create thin packs")),
32923299
OPT_BOOL(0, "shallow", &shallow,
@@ -3319,6 +3326,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
33193326

33203327
read_replace_refs = 0;
33213328

3329+
sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0);
33223330
reset_pack_idx_option(&pack_idx_opts);
33233331
git_config(git_pack_config, NULL);
33243332

builtin/rev-list.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
546546
if (prepare_revision_walk(&revs))
547547
die("revision walk setup failed");
548548
if (revs.tree_objects)
549-
mark_edges_uninteresting(&revs, show_edge);
549+
mark_edges_uninteresting(&revs, show_edge, 0);
550550

551551
if (bisect_list) {
552552
int reaches, all;

http-push.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1933,7 +1933,7 @@ int cmd_main(int argc, const char **argv)
19331933
pushing = 0;
19341934
if (prepare_revision_walk(&revs))
19351935
die("revision walk setup failed");
1936-
mark_edges_uninteresting(&revs, NULL);
1936+
mark_edges_uninteresting(&revs, NULL, 0);
19371937
objects_to_send = get_delta(&revs, ref_lock);
19381938
finish_all_active_slots();
19391939

list-objects.c

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -226,25 +226,73 @@ static void mark_edge_parents_uninteresting(struct commit *commit,
226226
}
227227
}
228228

229-
void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
229+
static void add_edge_parents(struct commit *commit,
230+
struct rev_info *revs,
231+
show_edge_fn show_edge,
232+
struct oidset *set)
233+
{
234+
struct commit_list *parents;
235+
236+
for (parents = commit->parents; parents; parents = parents->next) {
237+
struct commit *parent = parents->item;
238+
struct tree *tree = get_commit_tree(parent);
239+
240+
if (!tree)
241+
continue;
242+
243+
oidset_insert(set, &tree->object.oid);
244+
245+
if (!(parent->object.flags & UNINTERESTING))
246+
continue;
247+
tree->object.flags |= UNINTERESTING;
248+
249+
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
250+
parent->object.flags |= SHOWN;
251+
show_edge(parent);
252+
}
253+
}
254+
}
255+
256+
void mark_edges_uninteresting(struct rev_info *revs,
257+
show_edge_fn show_edge,
258+
int sparse)
230259
{
231260
struct commit_list *list;
232261
int i;
233262

234-
for (list = revs->commits; list; list = list->next) {
235-
struct commit *commit = list->item;
263+
if (sparse) {
264+
struct oidset set;
265+
oidset_init(&set, 16);
236266

237-
if (commit->object.flags & UNINTERESTING) {
238-
mark_tree_uninteresting(revs->repo,
239-
get_commit_tree(commit));
240-
if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
241-
commit->object.flags |= SHOWN;
242-
show_edge(commit);
267+
for (list = revs->commits; list; list = list->next) {
268+
struct commit *commit = list->item;
269+
struct tree *tree = get_commit_tree(commit);
270+
271+
if (commit->object.flags & UNINTERESTING)
272+
tree->object.flags |= UNINTERESTING;
273+
274+
oidset_insert(&set, &tree->object.oid);
275+
add_edge_parents(commit, revs, show_edge, &set);
276+
}
277+
278+
mark_trees_uninteresting_sparse(revs->repo, &set);
279+
oidset_clear(&set);
280+
} else {
281+
for (list = revs->commits; list; list = list->next) {
282+
struct commit *commit = list->item;
283+
if (commit->object.flags & UNINTERESTING) {
284+
mark_tree_uninteresting(revs->repo,
285+
get_commit_tree(commit));
286+
if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
287+
commit->object.flags |= SHOWN;
288+
show_edge(commit);
289+
}
290+
continue;
243291
}
244-
continue;
292+
mark_edge_parents_uninteresting(commit, revs, show_edge);
245293
}
246-
mark_edge_parents_uninteresting(commit, revs, show_edge);
247294
}
295+
248296
if (revs->edge_hint_aggressive) {
249297
for (i = 0; i < revs->cmdline.nr; i++) {
250298
struct object *obj = revs->cmdline.rev[i].item;

list-objects.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ typedef void (*show_object_fn)(struct object *, const char *, void *);
1010
void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
1111

1212
typedef void (*show_edge_fn)(struct commit *);
13-
void mark_edges_uninteresting(struct rev_info *, show_edge_fn);
13+
void mark_edges_uninteresting(struct rev_info *revs,
14+
show_edge_fn show_edge,
15+
int sparse);
1416

1517
struct oidset;
1618
struct list_objects_filter_options;

revision.c

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "commit-reach.h"
2828
#include "commit-graph.h"
2929
#include "prio-queue.h"
30+
#include "hashmap.h"
3031

3132
volatile show_early_output_fn_t show_early_output;
3233

@@ -99,6 +100,148 @@ void mark_tree_uninteresting(struct repository *r, struct tree *tree)
99100
mark_tree_contents_uninteresting(r, tree);
100101
}
101102

103+
struct path_and_oids_entry {
104+
struct hashmap_entry ent;
105+
char *path;
106+
struct oidset trees;
107+
};
108+
109+
static int path_and_oids_cmp(const void *hashmap_cmp_fn_data,
110+
const struct path_and_oids_entry *e1,
111+
const struct path_and_oids_entry *e2,
112+
const void *keydata)
113+
{
114+
return strcmp(e1->path, e2->path);
115+
}
116+
117+
static void paths_and_oids_init(struct hashmap *map)
118+
{
119+
hashmap_init(map, (hashmap_cmp_fn) path_and_oids_cmp, NULL, 0);
120+
}
121+
122+
static void paths_and_oids_clear(struct hashmap *map)
123+
{
124+
struct hashmap_iter iter;
125+
struct path_and_oids_entry *entry;
126+
hashmap_iter_init(map, &iter);
127+
128+
while ((entry = (struct path_and_oids_entry *)hashmap_iter_next(&iter))) {
129+
oidset_clear(&entry->trees);
130+
free(entry->path);
131+
}
132+
133+
hashmap_free(map, 1);
134+
}
135+
136+
static void paths_and_oids_insert(struct hashmap *map,
137+
const char *path,
138+
const struct object_id *oid)
139+
{
140+
int hash = strhash(path);
141+
struct path_and_oids_entry key;
142+
struct path_and_oids_entry *entry;
143+
144+
hashmap_entry_init(&key, hash);
145+
146+
/* use a shallow copy for the lookup */
147+
key.path = (char *)path;
148+
oidset_init(&key.trees, 0);
149+
150+
if (!(entry = (struct path_and_oids_entry *)hashmap_get(map, &key, NULL))) {
151+
entry = xcalloc(1, sizeof(struct path_and_oids_entry));
152+
hashmap_entry_init(entry, hash);
153+
entry->path = xstrdup(key.path);
154+
oidset_init(&entry->trees, 16);
155+
hashmap_put(map, entry);
156+
}
157+
158+
oidset_insert(&entry->trees, oid);
159+
}
160+
161+
static void add_children_by_path(struct repository *r,
162+
struct tree *tree,
163+
struct hashmap *map)
164+
{
165+
struct tree_desc desc;
166+
struct name_entry entry;
167+
168+
if (!tree)
169+
return;
170+
171+
if (parse_tree_gently(tree, 1) < 0)
172+
return;
173+
174+
init_tree_desc(&desc, tree->buffer, tree->size);
175+
while (tree_entry(&desc, &entry)) {
176+
switch (object_type(entry.mode)) {
177+
case OBJ_TREE:
178+
paths_and_oids_insert(map, entry.path, &entry.oid);
179+
180+
if (tree->object.flags & UNINTERESTING) {
181+
struct tree *child = lookup_tree(r, &entry.oid);
182+
if (child)
183+
child->object.flags |= UNINTERESTING;
184+
}
185+
break;
186+
case OBJ_BLOB:
187+
if (tree->object.flags & UNINTERESTING) {
188+
struct blob *child = lookup_blob(r, &entry.oid);
189+
if (child)
190+
child->object.flags |= UNINTERESTING;
191+
}
192+
break;
193+
default:
194+
/* Subproject commit - not in this repository */
195+
break;
196+
}
197+
}
198+
199+
free_tree_buffer(tree);
200+
}
201+
202+
void mark_trees_uninteresting_sparse(struct repository *r,
203+
struct oidset *trees)
204+
{
205+
unsigned has_interesting = 0, has_uninteresting = 0;
206+
struct hashmap map;
207+
struct hashmap_iter map_iter;
208+
struct path_and_oids_entry *entry;
209+
struct object_id *oid;
210+
struct oidset_iter iter;
211+
212+
oidset_iter_init(trees, &iter);
213+
while ((!has_interesting || !has_uninteresting) &&
214+
(oid = oidset_iter_next(&iter))) {
215+
struct tree *tree = lookup_tree(r, oid);
216+
217+
if (!tree)
218+
continue;
219+
220+
if (tree->object.flags & UNINTERESTING)
221+
has_uninteresting = 1;
222+
else
223+
has_interesting = 1;
224+
}
225+
226+
/* Do not walk unless we have both types of trees. */
227+
if (!has_uninteresting || !has_interesting)
228+
return;
229+
230+
paths_and_oids_init(&map);
231+
232+
oidset_iter_init(trees, &iter);
233+
while ((oid = oidset_iter_next(&iter))) {
234+
struct tree *tree = lookup_tree(r, oid);
235+
add_children_by_path(r, tree, &map);
236+
}
237+
238+
hashmap_iter_init(&map, &map_iter);
239+
while ((entry = hashmap_iter_next(&map_iter)))
240+
mark_trees_uninteresting_sparse(r, &entry->trees);
241+
242+
paths_and_oids_clear(&map);
243+
}
244+
102245
struct commit_stack {
103246
struct commit **items;
104247
size_t nr, alloc;

revision.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ struct rev_cmdline_info {
6767
#define REVISION_WALK_NO_WALK_SORTED 1
6868
#define REVISION_WALK_NO_WALK_UNSORTED 2
6969

70+
struct oidset;
7071
struct topo_walk_info;
7172

7273
struct rev_info {
@@ -327,6 +328,7 @@ void put_revision_mark(const struct rev_info *revs,
327328

328329
void mark_parents_uninteresting(struct commit *commit);
329330
void mark_tree_uninteresting(struct repository *r, struct tree *tree);
331+
void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees);
330332

331333
void show_object_with_name(FILE *, struct object *, const char *);
332334

0 commit comments

Comments
 (0)