Skip to content

Commit cf2999e

Browse files
author
Junio C Hamano
committed
Merge branch 'sp/mmap'
* sp/mmap: (27 commits) Spell default packedgitlimit slightly differently Increase packedGit{Limit,WindowSize} on 64 bit systems. Update packedGit config option documentation. mmap: set FD_CLOEXEC for file descriptors we keep open for mmap() pack-objects: fix use of use_pack(). Fix random segfaults in pack-objects. Cleanup read_cache_from error handling. Replace mmap with xmmap, better handling MAP_FAILED. Release pack windows before reporting out of memory. Default core.packdGitWindowSize to 1 MiB if NO_MMAP. Test suite for sliding window mmap implementation. Create pack_report() as a debugging aid. Support unmapping windows on 'temporary' packfiles. Improve error message when packfile mmap fails. Ensure core.packedGitWindowSize cannot be less than 2 pages. Load core configuration in git-verify-pack. Fully activate the sliding window pack access. Unmap individual windows rather than entire files. Document why header parsing won't exceed a window. Loop over pack_windows when inflating/accessing data. ... Conflicts: cache.h pack-check.c
2 parents e7bb17a + ecaebf4 commit cf2999e

File tree

14 files changed

+582
-231
lines changed

14 files changed

+582
-231
lines changed

Documentation/config.txt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,34 @@ core.legacyheaders::
118118
database directly (where the "http://" and "rsync://" protocols
119119
count as direct access).
120120

121+
core.packedGitWindowSize::
122+
Number of bytes of a pack file to map into memory in a
123+
single mapping operation. Larger window sizes may allow
124+
your system to process a smaller number of large pack files
125+
more quickly. Smaller window sizes will negatively affect
126+
performance due to increased calls to the operating system's
127+
memory manager, but may improve performance when accessing
128+
a large number of large pack files.
129+
+
130+
Default is 1 MiB if NO_MMAP was set at compile time, otherwise 32
131+
MiB on 32 bit platforms and 1 GiB on 64 bit platforms. This should
132+
be reasonable for all users/operating systems. You probably do
133+
not need to adjust this value.
134+
+
135+
Common unit suffixes of 'k', 'm', or 'g' are supported.
136+
137+
core.packedGitLimit::
138+
Maximum number of bytes to map simultaneously into memory
139+
from pack files. If Git needs to access more than this many
140+
bytes at once to complete an operation it will unmap existing
141+
regions to reclaim virtual address space within the process.
142+
+
143+
Default is 256 MiB on 32 bit platforms and 8 GiB on 64 bit platforms.
144+
This should be reasonable for all users/operating systems, except on
145+
the largest projects. You probably do not need to adjust this value.
146+
+
147+
Common unit suffixes of 'k', 'm', or 'g' are supported.
148+
121149
alias.*::
122150
Command aliases for the gitlink:git[1] command wrapper - e.g.
123151
after defining "alias.last = cat-file commit HEAD", the invocation

builtin-pack-objects.c

Lines changed: 66 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,52 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
276276
* we are going to reuse the existing object data as is. make
277277
* sure it is not corrupt.
278278
*/
279-
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
279+
static int check_pack_inflate(struct packed_git *p,
280+
struct pack_window **w_curs,
281+
unsigned long offset,
282+
unsigned long len,
283+
unsigned long expect)
284+
{
285+
z_stream stream;
286+
unsigned char fakebuf[4096], *in;
287+
int st;
288+
289+
memset(&stream, 0, sizeof(stream));
290+
inflateInit(&stream);
291+
do {
292+
in = use_pack(p, w_curs, offset, &stream.avail_in);
293+
stream.next_in = in;
294+
stream.next_out = fakebuf;
295+
stream.avail_out = sizeof(fakebuf);
296+
st = inflate(&stream, Z_FINISH);
297+
offset += stream.next_in - in;
298+
} while (st == Z_OK || st == Z_BUF_ERROR);
299+
inflateEnd(&stream);
300+
return (st == Z_STREAM_END &&
301+
stream.total_out == expect &&
302+
stream.total_in == len) ? 0 : -1;
303+
}
304+
305+
static void copy_pack_data(struct sha1file *f,
306+
struct packed_git *p,
307+
struct pack_window **w_curs,
308+
unsigned long offset,
309+
unsigned long len)
310+
{
311+
unsigned char *in;
312+
unsigned int avail;
313+
314+
while (len) {
315+
in = use_pack(p, w_curs, offset, &avail);
316+
if (avail > len)
317+
avail = len;
318+
sha1write(f, in, avail);
319+
offset += avail;
320+
len -= avail;
321+
}
322+
}
323+
324+
static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect)
280325
{
281326
z_stream stream;
282327
unsigned char fakebuf[4096];
@@ -323,7 +368,7 @@ static int revalidate_loose_object(struct object_entry *entry,
323368
return -1;
324369
map += used;
325370
mapsize -= used;
326-
return check_inflate(map, mapsize, size);
371+
return check_loose_inflate(map, mapsize, size);
327372
}
328373

329374
static unsigned long write_object(struct sha1file *f,
@@ -416,6 +461,8 @@ static unsigned long write_object(struct sha1file *f,
416461
}
417462
else {
418463
struct packed_git *p = entry->in_pack;
464+
struct pack_window *w_curs = NULL;
465+
unsigned long offset;
419466

420467
if (entry->delta) {
421468
obj_type = (allow_ofs_delta && entry->delta->offset) ?
@@ -437,16 +484,14 @@ static unsigned long write_object(struct sha1file *f,
437484
hdrlen += 20;
438485
}
439486

440-
use_packed_git(p);
441-
buf = (char *) p->pack_base
442-
+ entry->in_pack_offset
443-
+ entry->in_pack_header_size;
487+
offset = entry->in_pack_offset + entry->in_pack_header_size;
444488
datalen = find_packed_object_size(p, entry->in_pack_offset)
445489
- entry->in_pack_header_size;
446-
if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
490+
if (!pack_to_stdout && check_pack_inflate(p, &w_curs,
491+
offset, datalen, entry->size))
447492
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
448-
sha1write(f, buf, datalen);
449-
unuse_packed_git(p);
493+
copy_pack_data(f, p, &w_curs, offset, datalen);
494+
unuse_pack(&w_curs);
450495
reused++;
451496
}
452497
if (entry->delta)
@@ -937,22 +982,19 @@ static void check_object(struct object_entry *entry)
937982

938983
if (entry->in_pack && !entry->preferred_base) {
939984
struct packed_git *p = entry->in_pack;
985+
struct pack_window *w_curs = NULL;
940986
unsigned long left = p->pack_size - entry->in_pack_offset;
941987
unsigned long size, used;
942988
unsigned char *buf;
943989
struct object_entry *base_entry = NULL;
944990

945-
use_packed_git(p);
946-
buf = p->pack_base;
947-
buf += entry->in_pack_offset;
991+
buf = use_pack(p, &w_curs, entry->in_pack_offset, NULL);
948992

949993
/* We want in_pack_type even if we do not reuse delta.
950994
* There is no point not reusing non-delta representations.
951995
*/
952996
used = unpack_object_header_gently(buf, left,
953997
&entry->in_pack_type, &size);
954-
if (!used || left - used <= 20)
955-
die("corrupt pack for %s", sha1_to_hex(entry->sha1));
956998

957999
/* Check if it is delta, and the base is also an object
9581000
* we are going to pack. If so we will reuse the existing
@@ -961,36 +1003,42 @@ static void check_object(struct object_entry *entry)
9611003
if (!no_reuse_delta) {
9621004
unsigned char c, *base_name;
9631005
unsigned long ofs;
1006+
unsigned long used_0;
9641007
/* there is at least 20 bytes left in the pack */
9651008
switch (entry->in_pack_type) {
9661009
case OBJ_REF_DELTA:
967-
base_name = buf + used;
1010+
base_name = use_pack(p, &w_curs,
1011+
entry->in_pack_offset + used, NULL);
9681012
used += 20;
9691013
break;
9701014
case OBJ_OFS_DELTA:
971-
c = buf[used++];
1015+
buf = use_pack(p, &w_curs,
1016+
entry->in_pack_offset + used, NULL);
1017+
used_0 = 0;
1018+
c = buf[used_0++];
9721019
ofs = c & 127;
9731020
while (c & 128) {
9741021
ofs += 1;
9751022
if (!ofs || ofs & ~(~0UL >> 7))
9761023
die("delta base offset overflow in pack for %s",
9771024
sha1_to_hex(entry->sha1));
978-
c = buf[used++];
1025+
c = buf[used_0++];
9791026
ofs = (ofs << 7) + (c & 127);
9801027
}
9811028
if (ofs >= entry->in_pack_offset)
9821029
die("delta base offset out of bound for %s",
9831030
sha1_to_hex(entry->sha1));
9841031
ofs = entry->in_pack_offset - ofs;
9851032
base_name = find_packed_object_name(p, ofs);
1033+
used += used_0;
9861034
break;
9871035
default:
9881036
base_name = NULL;
9891037
}
9901038
if (base_name)
9911039
base_entry = locate_object_entry(base_name);
9921040
}
993-
unuse_packed_git(p);
1041+
unuse_pack(&w_curs);
9941042
entry->in_pack_header_size = used;
9951043

9961044
if (base_entry) {

builtin-verify-pack.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix)
5555
int no_more_options = 0;
5656
int nothing_done = 1;
5757

58+
git_config(git_default_config);
5859
while (1 < argc) {
5960
if (!no_more_options && argv[1][0] == '-') {
6061
if (!strcmp("-v", argv[1]))

cache.h

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ extern int warn_ambiguous_refs;
197197
extern int shared_repository;
198198
extern const char *apply_default_whitespace;
199199
extern int zlib_compression_level;
200+
extern size_t packed_git_window_size;
201+
extern size_t packed_git_limit;
200202

201203
#define GIT_REPO_VERSION 0
202204
extern int repository_format_version;
@@ -336,14 +338,22 @@ extern struct alternate_object_database {
336338
} *alt_odb_list;
337339
extern void prepare_alt_odb(void);
338340

341+
struct pack_window {
342+
struct pack_window *next;
343+
unsigned char *base;
344+
off_t offset;
345+
size_t len;
346+
unsigned int last_used;
347+
unsigned int inuse_cnt;
348+
};
349+
339350
extern struct packed_git {
340351
struct packed_git *next;
341-
unsigned long index_size;
342-
unsigned long pack_size;
352+
struct pack_window *windows;
343353
unsigned int *index_base;
344-
void *pack_base;
345-
unsigned int pack_last_used;
346-
unsigned int pack_use_cnt;
354+
off_t index_size;
355+
off_t pack_size;
356+
int pack_fd;
347357
int pack_local;
348358
unsigned char sha1[20];
349359
/* something like ".git/objects/pack/xxxxx.pack" */
@@ -389,13 +399,14 @@ extern void install_packed_git(struct packed_git *pack);
389399
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
390400
struct packed_git *packs);
391401

392-
extern int use_packed_git(struct packed_git *);
393-
extern void unuse_packed_git(struct packed_git *);
402+
extern void pack_report();
403+
extern unsigned char* use_pack(struct packed_git *, struct pack_window **, unsigned long, unsigned int *);
404+
extern void unuse_pack(struct pack_window **);
394405
extern struct packed_git *add_packed_git(char *, int, int);
395406
extern int num_packed_objects(const struct packed_git *p);
396407
extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*);
397408
extern unsigned long find_pack_entry_one(const unsigned char *, struct packed_git *);
398-
extern void *unpack_entry_gently(struct packed_git *, unsigned long, char *, unsigned long *);
409+
extern void *unpack_entry(struct packed_git *, unsigned long, char *, unsigned long *);
399410
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
400411
extern void packed_object_info_detail(struct packed_git *, unsigned long, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
401412

@@ -421,6 +432,7 @@ extern char *git_commit_encoding;
421432
extern char *git_log_output_encoding;
422433

423434
extern int copy_fd(int ifd, int ofd);
435+
extern void read_or_die(int fd, void *buf, size_t count);
424436
extern int write_in_full(int fd, const void *buf, size_t count, const char *);
425437
extern void write_or_die(int fd, const void *buf, size_t count);
426438
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);

config.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,21 @@ int git_default_config(const char *var, const char *value)
304304
return 0;
305305
}
306306

307+
if (!strcmp(var, "core.packedgitwindowsize")) {
308+
int pgsz = getpagesize();
309+
packed_git_window_size = git_config_int(var, value);
310+
packed_git_window_size /= pgsz;
311+
if (packed_git_window_size < 2)
312+
packed_git_window_size = 2;
313+
packed_git_window_size *= pgsz;
314+
return 0;
315+
}
316+
317+
if (!strcmp(var, "core.packedgitlimit")) {
318+
packed_git_limit = git_config_int(var, value);
319+
return 0;
320+
}
321+
307322
if (!strcmp(var, "user.name")) {
308323
strlcpy(git_default_name, value, sizeof(git_default_name));
309324
return 0;
@@ -695,7 +710,7 @@ int git_config_set_multivar(const char* key, const char* value,
695710
}
696711

697712
fstat(in_fd, &st);
698-
contents = mmap(NULL, st.st_size, PROT_READ,
713+
contents = xmmap(NULL, st.st_size, PROT_READ,
699714
MAP_PRIVATE, in_fd, 0);
700715
close(in_fd);
701716

diff.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,10 +1341,8 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
13411341
fd = open(s->path, O_RDONLY);
13421342
if (fd < 0)
13431343
goto err_empty;
1344-
s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
1344+
s->data = xmmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
13451345
close(fd);
1346-
if (s->data == MAP_FAILED)
1347-
goto err_empty;
13481346
s->should_munmap = 1;
13491347
}
13501348
else {

environment.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ char *git_log_output_encoding;
2323
int shared_repository = PERM_UMASK;
2424
const char *apply_default_whitespace;
2525
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
26+
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
27+
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
2628
int pager_in_use;
2729
int pager_use_color = 1;
2830

0 commit comments

Comments
 (0)