Skip to content

Commit b8a2486

Browse files
pcloudsgitster
authored andcommitted
index-pack: support multithreaded delta resolving
This puts delta resolving on each base on a separate thread, one base cache per thread. Per-thread data is grouped in struct thread_local. When running with nr_threads == 1, no pthreads calls are made. The system essentially runs in non-thread mode. An experiment on a Xeon 24 core machine with git.git shows that performance does not increase proportional to the number of cores. So by default, we use maximum 3 cores. Some numbers with --threads from 1 to 16: 1..4 real 0m8.003s 0m5.307s 0m4.321s 0m3.830s user 0m7.720s 0m8.009s 0m8.133s 0m8.305s sys 0m0.224s 0m0.372s 0m0.360s 0m0.360s 5..8 real 0m3.727s 0m3.604s 0m3.332s 0m3.369s user 0m9.361s 0m9.817s 0m9.525s 0m9.769s sys 0m0.584s 0m0.624s 0m0.540s 0m0.560s 9..12 real 0m3.036s 0m3.139s 0m3.177s 0m2.961s user 0m8.977s 0m10.205s 0m9.737s 0m10.073s sys 0m0.596s 0m0.680s 0m0.684s 0m0.680s 13..16 real 0m2.985s 0m2.894s 0m2.975s 0m2.971s user 0m9.825s 0m10.573s 0m10.833s 0m11.361s sys 0m0.788s 0m0.732s 0m0.904s 0m1.016s On an Intel dual core and linux-2.6.git 1..4 real 2m37.789s 2m7.963s 2m0.920s 1m58.213s user 2m28.415s 2m52.325s 2m50.176s 2m41.187s sys 0m7.808s 0m11.181s 0m11.224s 0m10.731s Thanks Ramsay Jones for troubleshooting and support on MinGW platform. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 5272f75 commit b8a2486

File tree

4 files changed

+244
-12
lines changed

4 files changed

+244
-12
lines changed

Documentation/git-index-pack.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ OPTIONS
7474
--strict::
7575
Die, if the pack contains broken objects or links.
7676

77+
--threads=<n>::
78+
Specifies the number of threads to spawn when resolving
79+
deltas. This requires that index-pack be compiled with
80+
pthreads otherwise this option is ignored with a warning.
81+
This is meant to reduce packing time on multiprocessor
82+
machines. The required amount of memory for the delta search
83+
window is however multiplied by the number of threads.
84+
Specifying 0 will cause git to auto-detect the number of CPU's
85+
and use maximum 3 threads.
86+
7787

7888
Note
7989
----

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2160,7 +2160,7 @@ builtin/branch.o builtin/checkout.o builtin/clone.o builtin/reset.o branch.o tra
21602160
builtin/bundle.o bundle.o transport.o: bundle.h
21612161
builtin/bisect--helper.o builtin/rev-list.o bisect.o: bisect.h
21622162
builtin/clone.o builtin/fetch-pack.o transport.o: fetch-pack.h
2163-
builtin/grep.o builtin/pack-objects.o transport-helper.o thread-utils.o: thread-utils.h
2163+
builtin/index-pack.o builtin/grep.o builtin/pack-objects.o transport-helper.o thread-utils.o: thread-utils.h
21642164
builtin/send-pack.o transport.o: send-pack.h
21652165
builtin/log.o builtin/shortlog.o: shortlog.h
21662166
builtin/prune.o builtin/reflog.o reachable.o: reachable.h

builtin/index-pack.c

Lines changed: 193 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "progress.h"
1010
#include "fsck.h"
1111
#include "exec_cmd.h"
12+
#include "thread-utils.h"
1213

1314
static const char index_pack_usage[] =
1415
"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
@@ -38,6 +39,14 @@ struct base_data {
3839
int ofs_first, ofs_last;
3940
};
4041

42+
struct thread_local {
43+
#ifndef NO_PTHREADS
44+
pthread_t thread;
45+
#endif
46+
struct base_data *base_cache;
47+
size_t base_cache_used;
48+
};
49+
4150
/*
4251
* Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
4352
* to memcmp() only the first 20 bytes.
@@ -54,11 +63,11 @@ struct delta_entry {
5463

5564
static struct object_entry *objects;
5665
static struct delta_entry *deltas;
57-
static struct base_data *base_cache;
58-
static size_t base_cache_used;
66+
static struct thread_local nothread_data;
5967
static int nr_objects;
6068
static int nr_deltas;
6169
static int nr_resolved_deltas;
70+
static int nr_threads;
6271

6372
static int from_stdin;
6473
static int strict;
@@ -75,6 +84,77 @@ static git_SHA_CTX input_ctx;
7584
static uint32_t input_crc32;
7685
static int input_fd, output_fd, pack_fd;
7786

87+
#ifndef NO_PTHREADS
88+
89+
static struct thread_local *thread_data;
90+
static int nr_dispatched;
91+
static int threads_active;
92+
93+
static pthread_mutex_t read_mutex;
94+
#define read_lock() lock_mutex(&read_mutex)
95+
#define read_unlock() unlock_mutex(&read_mutex)
96+
97+
static pthread_mutex_t counter_mutex;
98+
#define counter_lock() lock_mutex(&counter_mutex)
99+
#define counter_unlock() unlock_mutex(&counter_mutex)
100+
101+
static pthread_mutex_t work_mutex;
102+
#define work_lock() lock_mutex(&work_mutex)
103+
#define work_unlock() unlock_mutex(&work_mutex)
104+
105+
static pthread_key_t key;
106+
107+
static inline void lock_mutex(pthread_mutex_t *mutex)
108+
{
109+
if (threads_active)
110+
pthread_mutex_lock(mutex);
111+
}
112+
113+
static inline void unlock_mutex(pthread_mutex_t *mutex)
114+
{
115+
if (threads_active)
116+
pthread_mutex_unlock(mutex);
117+
}
118+
119+
/*
120+
* Mutex and conditional variable can't be statically-initialized on Windows.
121+
*/
122+
static void init_thread(void)
123+
{
124+
init_recursive_mutex(&read_mutex);
125+
pthread_mutex_init(&counter_mutex, NULL);
126+
pthread_mutex_init(&work_mutex, NULL);
127+
pthread_key_create(&key, NULL);
128+
thread_data = xcalloc(nr_threads, sizeof(*thread_data));
129+
threads_active = 1;
130+
}
131+
132+
static void cleanup_thread(void)
133+
{
134+
if (!threads_active)
135+
return;
136+
threads_active = 0;
137+
pthread_mutex_destroy(&read_mutex);
138+
pthread_mutex_destroy(&counter_mutex);
139+
pthread_mutex_destroy(&work_mutex);
140+
pthread_key_delete(key);
141+
free(thread_data);
142+
}
143+
144+
#else
145+
146+
#define read_lock()
147+
#define read_unlock()
148+
149+
#define counter_lock()
150+
#define counter_unlock()
151+
152+
#define work_lock()
153+
#define work_unlock()
154+
155+
#endif
156+
157+
78158
static int mark_link(struct object *obj, int type, void *data)
79159
{
80160
if (!obj)
@@ -223,6 +303,25 @@ static NORETURN void bad_object(unsigned long offset, const char *format, ...)
223303
die("pack has bad object at offset %lu: %s", offset, buf);
224304
}
225305

306+
static inline struct thread_local *get_thread_data(void)
307+
{
308+
#ifndef NO_PTHREADS
309+
if (threads_active)
310+
return pthread_getspecific(key);
311+
assert(!threads_active &&
312+
"This should only be reached when all threads are gone");
313+
#endif
314+
return &nothread_data;
315+
}
316+
317+
#ifndef NO_PTHREADS
318+
static void set_thread_data(struct thread_local *data)
319+
{
320+
if (threads_active)
321+
pthread_setspecific(key, data);
322+
}
323+
#endif
324+
226325
static struct base_data *alloc_base_data(void)
227326
{
228327
struct base_data *base = xmalloc(sizeof(struct base_data));
@@ -237,15 +336,16 @@ static void free_base_data(struct base_data *c)
237336
if (c->data) {
238337
free(c->data);
239338
c->data = NULL;
240-
base_cache_used -= c->size;
339+
get_thread_data()->base_cache_used -= c->size;
241340
}
242341
}
243342

244343
static void prune_base_data(struct base_data *retain)
245344
{
246345
struct base_data *b;
247-
for (b = base_cache;
248-
base_cache_used > delta_base_cache_limit && b;
346+
struct thread_local *data = get_thread_data();
347+
for (b = data->base_cache;
348+
data->base_cache_used > delta_base_cache_limit && b;
249349
b = b->child) {
250350
if (b->data && b != retain)
251351
free_base_data(b);
@@ -257,12 +357,12 @@ static void link_base_data(struct base_data *base, struct base_data *c)
257357
if (base)
258358
base->child = c;
259359
else
260-
base_cache = c;
360+
get_thread_data()->base_cache = c;
261361

262362
c->base = base;
263363
c->child = NULL;
264364
if (c->data)
265-
base_cache_used += c->size;
365+
get_thread_data()->base_cache_used += c->size;
266366
prune_base_data(c);
267367
}
268368

@@ -272,7 +372,7 @@ static void unlink_base_data(struct base_data *c)
272372
if (base)
273373
base->child = NULL;
274374
else
275-
base_cache = NULL;
375+
get_thread_data()->base_cache = NULL;
276376
free_base_data(c);
277377
}
278378

@@ -461,19 +561,24 @@ static void sha1_object(const void *data, unsigned long size,
461561
enum object_type type, unsigned char *sha1)
462562
{
463563
hash_sha1_file(data, size, typename(type), sha1);
564+
read_lock();
464565
if (has_sha1_file(sha1)) {
465566
void *has_data;
466567
enum object_type has_type;
467568
unsigned long has_size;
468569
has_data = read_sha1_file(sha1, &has_type, &has_size);
570+
read_unlock();
469571
if (!has_data)
470572
die("cannot read existing object %s", sha1_to_hex(sha1));
471573
if (size != has_size || type != has_type ||
472574
memcmp(data, has_data, size) != 0)
473575
die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1));
474576
free(has_data);
475-
}
577+
} else
578+
read_unlock();
579+
476580
if (strict) {
581+
read_lock();
477582
if (type == OBJ_BLOB) {
478583
struct blob *blob = lookup_blob(sha1);
479584
if (blob)
@@ -507,6 +612,7 @@ static void sha1_object(const void *data, unsigned long size,
507612
}
508613
obj->flags |= FLAG_CHECKED;
509614
}
615+
read_unlock();
510616
}
511617
}
512618

@@ -552,7 +658,7 @@ static void *get_base_data(struct base_data *c)
552658
if (!delta_nr) {
553659
c->data = get_data_from_pack(obj);
554660
c->size = obj->size;
555-
base_cache_used += c->size;
661+
get_thread_data()->base_cache_used += c->size;
556662
prune_base_data(c);
557663
}
558664
for (; delta_nr > 0; delta_nr--) {
@@ -568,7 +674,7 @@ static void *get_base_data(struct base_data *c)
568674
free(raw);
569675
if (!c->data)
570676
bad_object(obj->idx.offset, "failed to apply delta");
571-
base_cache_used += c->size;
677+
get_thread_data()->base_cache_used += c->size;
572678
prune_base_data(c);
573679
}
574680
free(delta);
@@ -596,7 +702,9 @@ static void resolve_delta(struct object_entry *delta_obj,
596702
bad_object(delta_obj->idx.offset, "failed to apply delta");
597703
sha1_object(result->data, result->size, delta_obj->real_type,
598704
delta_obj->idx.sha1);
705+
counter_lock();
599706
nr_resolved_deltas++;
707+
counter_unlock();
600708
}
601709

602710
static struct base_data *find_unresolved_deltas_1(struct base_data *base,
@@ -690,6 +798,30 @@ static void resolve_base(struct object_entry *obj)
690798
find_unresolved_deltas(base_obj);
691799
}
692800

801+
#ifndef NO_PTHREADS
802+
static void *threaded_second_pass(void *data)
803+
{
804+
set_thread_data(data);
805+
for (;;) {
806+
int i;
807+
work_lock();
808+
display_progress(progress, nr_resolved_deltas);
809+
while (nr_dispatched < nr_objects &&
810+
is_delta_type(objects[nr_dispatched].type))
811+
nr_dispatched++;
812+
if (nr_dispatched >= nr_objects) {
813+
work_unlock();
814+
break;
815+
}
816+
i = nr_dispatched++;
817+
work_unlock();
818+
819+
resolve_base(&objects[i]);
820+
}
821+
return NULL;
822+
}
823+
#endif
824+
693825
/*
694826
* First pass:
695827
* - find locations of all objects;
@@ -758,6 +890,24 @@ static void resolve_deltas(void)
758890

759891
if (verbose)
760892
progress = start_progress("Resolving deltas", nr_deltas);
893+
894+
#ifndef NO_PTHREADS
895+
nr_dispatched = 0;
896+
if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) {
897+
init_thread();
898+
for (i = 0; i < nr_threads; i++) {
899+
int ret = pthread_create(&thread_data[i].thread, NULL,
900+
threaded_second_pass, thread_data + i);
901+
if (ret)
902+
die("unable to create thread: %s", strerror(ret));
903+
}
904+
for (i = 0; i < nr_threads; i++)
905+
pthread_join(thread_data[i].thread, NULL);
906+
cleanup_thread();
907+
return;
908+
}
909+
#endif
910+
761911
for (i = 0; i < nr_objects; i++) {
762912
struct object_entry *obj = &objects[i];
763913

@@ -1016,6 +1166,18 @@ static int git_index_pack_config(const char *k, const char *v, void *cb)
10161166
die("bad pack.indexversion=%"PRIu32, opts->version);
10171167
return 0;
10181168
}
1169+
if (!strcmp(k, "pack.threads")) {
1170+
nr_threads = git_config_int(k, v);
1171+
if (nr_threads < 0)
1172+
die("invalid number of threads specified (%d)",
1173+
nr_threads);
1174+
#ifdef NO_PTHREADS
1175+
if (nr_threads != 1)
1176+
warning("no threads support, ignoring %s", k);
1177+
nr_threads = 1;
1178+
#endif
1179+
return 0;
1180+
}
10191181
return git_default_config(k, v, cb);
10201182
}
10211183

@@ -1174,6 +1336,17 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
11741336
keep_msg = "";
11751337
} else if (!prefixcmp(arg, "--keep=")) {
11761338
keep_msg = arg + 7;
1339+
} else if (!prefixcmp(arg, "--threads=")) {
1340+
char *end;
1341+
nr_threads = strtoul(arg+10, &end, 0);
1342+
if (!arg[10] || *end || nr_threads < 0)
1343+
usage(index_pack_usage);
1344+
#ifdef NO_PTHREADS
1345+
if (nr_threads != 1)
1346+
warning("no threads support, "
1347+
"ignoring %s", arg);
1348+
nr_threads = 1;
1349+
#endif
11771350
} else if (!prefixcmp(arg, "--pack_header=")) {
11781351
struct pack_header *hdr;
11791352
char *c;
@@ -1245,6 +1418,15 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
12451418
if (strict)
12461419
opts.flags |= WRITE_IDX_STRICT;
12471420

1421+
#ifndef NO_PTHREADS
1422+
if (!nr_threads) {
1423+
nr_threads = online_cpus();
1424+
/* An experiment showed that more threads does not mean faster */
1425+
if (nr_threads > 3)
1426+
nr_threads = 3;
1427+
}
1428+
#endif
1429+
12481430
curr_pack = open_pack_file(pack_name);
12491431
parse_pack_header();
12501432
objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));

0 commit comments

Comments
 (0)