Skip to content

Commit 5379a5c

Browse files
author
Junio C Hamano
committed
Thin pack generation: optimization.
Jens Axboe noticed that recent "git push" has become very slow since we made --thin transfer the default. Thin pack generation to push a handful revisions that touch relatively small number of paths out of huge tree was stupid; it registered _everything_ from the excluded revisions. As a result, "Counting objects" phase was unnecessarily expensive. This changes the logic to register the blobs and trees from excluded revisions only for paths we are actually going to send to the other end. Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent 9760662 commit 5379a5c

File tree

1 file changed

+236
-48
lines changed

1 file changed

+236
-48
lines changed

pack-objects.c

Lines changed: 236 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ static void rehash_objects(void)
453453
if (object_ix_hashsz < 1024)
454454
object_ix_hashsz = 1024;
455455
object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
456-
object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
456+
memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
457457
for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
458458
int ix = locate_object_entry_hash(oe->sha1);
459459
if (0 <= ix)
@@ -505,21 +505,6 @@ static unsigned name_hash(struct name_path *path, const char *name)
505505
* but close enough.
506506
*/
507507
hash = (name_hash<<DIRBITS) | (hash & ((1U<<DIRBITS )-1));
508-
509-
if (0) { /* debug */
510-
n = name + strlen(name);
511-
if (n != name && n[-1] == '\n')
512-
n--;
513-
while (name <= --n)
514-
fputc(*n, stderr);
515-
for (p = path; p; p = p->up) {
516-
fputc('/', stderr);
517-
n = p->elem + p->len;
518-
while (p->elem <= --n)
519-
fputc(*n, stderr);
520-
}
521-
fprintf(stderr, "\t%08x\n", hash);
522-
}
523508
return hash;
524509
}
525510

@@ -587,56 +572,254 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud
587572
return status;
588573
}
589574

590-
static void add_pbase_tree(struct tree_desc *tree, struct name_path *up)
575+
struct pbase_tree_cache {
576+
unsigned char sha1[20];
577+
int ref;
578+
int temporary;
579+
void *tree_data;
580+
unsigned long tree_size;
581+
};
582+
583+
static struct pbase_tree_cache *(pbase_tree_cache[256]);
584+
static int pbase_tree_cache_ix(const unsigned char *sha1)
585+
{
586+
return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
587+
}
588+
static int pbase_tree_cache_ix_incr(int ix)
589+
{
590+
return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
591+
}
592+
593+
static struct pbase_tree {
594+
struct pbase_tree *next;
595+
/* This is a phony "cache" entry; we are not
596+
* going to evict it nor find it through _get()
597+
* mechanism -- this is for the toplevel node that
598+
* would almost always change with any commit.
599+
*/
600+
struct pbase_tree_cache pcache;
601+
} *pbase_tree;
602+
603+
static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
604+
{
605+
struct pbase_tree_cache *ent, *nent;
606+
void *data;
607+
unsigned long size;
608+
char type[20];
609+
int neigh;
610+
int my_ix = pbase_tree_cache_ix(sha1);
611+
int available_ix = -1;
612+
613+
/* pbase-tree-cache acts as a limited hashtable.
614+
* your object will be found at your index or within a few
615+
* slots after that slot if it is cached.
616+
*/
617+
for (neigh = 0; neigh < 8; neigh++) {
618+
ent = pbase_tree_cache[my_ix];
619+
if (ent && !memcmp(ent->sha1, sha1, 20)) {
620+
ent->ref++;
621+
return ent;
622+
}
623+
else if (((available_ix < 0) && (!ent || !ent->ref)) ||
624+
((0 <= available_ix) &&
625+
(!ent && pbase_tree_cache[available_ix])))
626+
available_ix = my_ix;
627+
if (!ent)
628+
break;
629+
my_ix = pbase_tree_cache_ix_incr(my_ix);
630+
}
631+
632+
/* Did not find one. Either we got a bogus request or
633+
* we need to read and perhaps cache.
634+
*/
635+
data = read_sha1_file(sha1, type, &size);
636+
if (!data)
637+
return NULL;
638+
if (strcmp(type, tree_type)) {
639+
free(data);
640+
return NULL;
641+
}
642+
643+
/* We need to either cache or return a throwaway copy */
644+
645+
if (available_ix < 0)
646+
ent = NULL;
647+
else {
648+
ent = pbase_tree_cache[available_ix];
649+
my_ix = available_ix;
650+
}
651+
652+
if (!ent) {
653+
nent = xmalloc(sizeof(*nent));
654+
nent->temporary = (available_ix < 0);
655+
}
656+
else {
657+
/* evict and reuse */
658+
free(ent->tree_data);
659+
nent = ent;
660+
}
661+
memcpy(nent->sha1, sha1, 20);
662+
nent->tree_data = data;
663+
nent->tree_size = size;
664+
nent->ref = 1;
665+
if (!nent->temporary)
666+
pbase_tree_cache[my_ix] = nent;
667+
return nent;
668+
}
669+
670+
static void pbase_tree_put(struct pbase_tree_cache *cache)
671+
{
672+
if (!cache->temporary) {
673+
cache->ref--;
674+
return;
675+
}
676+
free(cache->tree_data);
677+
free(cache);
678+
}
679+
680+
static int name_cmp_len(const char *name)
681+
{
682+
int i;
683+
for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
684+
;
685+
return i;
686+
}
687+
688+
static void add_pbase_object(struct tree_desc *tree,
689+
struct name_path *up,
690+
const char *name,
691+
int cmplen)
591692
{
592693
while (tree->size) {
593694
const unsigned char *sha1;
594-
const char *name;
595-
unsigned mode, hash;
695+
const char *entry_name;
696+
int entry_len;
697+
unsigned mode;
596698
unsigned long size;
597699
char type[20];
598700

599-
sha1 = tree_entry_extract(tree, &name, &mode);
701+
sha1 = tree_entry_extract(tree, &entry_name, &mode);
600702
update_tree_entry(tree);
601-
if (!has_sha1_file(sha1))
602-
continue;
603-
if (sha1_object_info(sha1, type, &size))
703+
entry_len = strlen(entry_name);
704+
if (entry_len != cmplen ||
705+
memcmp(entry_name, name, cmplen) ||
706+
!has_sha1_file(sha1) ||
707+
sha1_object_info(sha1, type, &size))
604708
continue;
605-
606-
hash = name_hash(up, name);
607-
if (!add_object_entry(sha1, hash, 1))
608-
continue;
609-
709+
if (name[cmplen] != '/') {
710+
unsigned hash = name_hash(up, name);
711+
add_object_entry(sha1, hash, 1);
712+
return;
713+
}
610714
if (!strcmp(type, tree_type)) {
611715
struct tree_desc sub;
612-
void *elem;
613716
struct name_path me;
717+
struct pbase_tree_cache *tree;
718+
const char *down = name+cmplen+1;
719+
int downlen = name_cmp_len(down);
720+
721+
tree = pbase_tree_get(sha1);
722+
if (!tree)
723+
return;
724+
sub.buf = tree->tree_data;
725+
sub.size = tree->tree_size;
726+
727+
me.up = up;
728+
me.elem = entry_name;
729+
me.len = entry_len;
730+
add_pbase_object(&sub, &me, down, downlen);
731+
pbase_tree_put(tree);
732+
}
733+
}
734+
}
614735

615-
elem = read_sha1_file(sha1, type, &sub.size);
616-
sub.buf = elem;
617-
if (sub.buf) {
618-
me.up = up;
619-
me.elem = name;
620-
me.len = strlen(name);
621-
add_pbase_tree(&sub, &me);
622-
free(elem);
623-
}
736+
static unsigned *done_pbase_paths;
737+
static int done_pbase_paths_num;
738+
static int done_pbase_paths_alloc;
739+
static int done_pbase_path_pos(unsigned hash)
740+
{
741+
int lo = 0;
742+
int hi = done_pbase_paths_num;
743+
while (lo < hi) {
744+
int mi = (hi + lo) / 2;
745+
if (done_pbase_paths[mi] == hash)
746+
return mi;
747+
if (done_pbase_paths[mi] < hash)
748+
hi = mi;
749+
else
750+
lo = mi + 1;
751+
}
752+
return -lo-1;
753+
}
754+
755+
static int check_pbase_path(unsigned hash)
756+
{
757+
int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
758+
if (0 <= pos)
759+
return 1;
760+
pos = -pos - 1;
761+
if (done_pbase_paths_alloc <= done_pbase_paths_num) {
762+
done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc);
763+
done_pbase_paths = xrealloc(done_pbase_paths,
764+
done_pbase_paths_alloc *
765+
sizeof(unsigned));
766+
}
767+
done_pbase_paths_num++;
768+
if (pos < done_pbase_paths_num)
769+
memmove(done_pbase_paths + pos + 1,
770+
done_pbase_paths + pos,
771+
(done_pbase_paths_num - pos - 1) * sizeof(unsigned));
772+
done_pbase_paths[pos] = hash;
773+
return 0;
774+
}
775+
776+
static void add_preferred_base_object(char *name, unsigned hash)
777+
{
778+
struct pbase_tree *it;
779+
int cmplen = name_cmp_len(name);
780+
781+
if (check_pbase_path(hash))
782+
return;
783+
784+
for (it = pbase_tree; it; it = it->next) {
785+
if (cmplen == 0) {
786+
hash = name_hash(NULL, "");
787+
add_object_entry(it->pcache.sha1, hash, 1);
788+
}
789+
else {
790+
struct tree_desc tree;
791+
tree.buf = it->pcache.tree_data;
792+
tree.size = it->pcache.tree_size;
793+
add_pbase_object(&tree, NULL, name, cmplen);
624794
}
625795
}
626796
}
627797

628798
static void add_preferred_base(unsigned char *sha1)
629799
{
630-
struct tree_desc tree;
631-
void *elem;
800+
struct pbase_tree *it;
801+
void *data;
802+
unsigned long size;
803+
unsigned char tree_sha1[20];
632804

633-
elem = read_object_with_reference(sha1, tree_type, &tree.size, NULL);
634-
tree.buf = elem;
635-
if (!tree.buf)
805+
data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
806+
if (!data)
636807
return;
637-
if (add_object_entry(sha1, name_hash(NULL, ""), 1))
638-
add_pbase_tree(&tree, NULL);
639-
free(elem);
808+
809+
for (it = pbase_tree; it; it = it->next) {
810+
if (!memcmp(it->pcache.sha1, tree_sha1, 20)) {
811+
free(data);
812+
return;
813+
}
814+
}
815+
816+
it = xcalloc(1, sizeof(*it));
817+
it->next = pbase_tree;
818+
pbase_tree = it;
819+
820+
memcpy(it->pcache.sha1, tree_sha1, 20);
821+
it->pcache.tree_data = data;
822+
it->pcache.tree_size = size;
640823
}
641824

642825
static void check_object(struct object_entry *entry)
@@ -1051,6 +1234,7 @@ int main(int argc, char **argv)
10511234
char line[PATH_MAX + 20];
10521235
int window = 10, depth = 10, pack_to_stdout = 0;
10531236
struct object_entry **list;
1237+
int num_preferred_base = 0;
10541238
int i;
10551239

10561240
setup_git_directory();
@@ -1116,6 +1300,7 @@ int main(int argc, char **argv)
11161300

11171301
for (;;) {
11181302
unsigned char sha1[20];
1303+
unsigned hash;
11191304

11201305
if (!fgets(line, sizeof(line), stdin)) {
11211306
if (feof(stdin))
@@ -1132,12 +1317,15 @@ int main(int argc, char **argv)
11321317
if (get_sha1_hex(line+1, sha1))
11331318
die("expected edge sha1, got garbage:\n %s",
11341319
line+1);
1135-
add_preferred_base(sha1);
1320+
if (num_preferred_base++ < window)
1321+
add_preferred_base(sha1);
11361322
continue;
11371323
}
11381324
if (get_sha1_hex(line, sha1))
11391325
die("expected sha1, got garbage:\n %s", line);
1140-
add_object_entry(sha1, name_hash(NULL, line+41), 0);
1326+
hash = name_hash(NULL, line+41);
1327+
add_preferred_base_object(line+41, hash);
1328+
add_object_entry(sha1, hash, 0);
11411329
}
11421330
if (progress)
11431331
fprintf(stderr, "Done counting %d objects.\n", nr_objects);

0 commit comments

Comments
 (0)