Skip to content

Commit 65c2e0c

Browse files
Junio C HamanoLinus Torvalds
authored andcommitted
[PATCH] Find size of SHA1 object without inflating everything.
This adds sha1_file_size() helper function and uses it in the rename/copy similarity estimator. The helper function handles deltified object as well. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent 4a62b61 commit 65c2e0c

File tree

3 files changed

+67
-5
lines changed

3 files changed

+67
-5
lines changed

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ extern void * map_sha1_file(const unsigned char *sha1, unsigned long *size);
154154
extern int unpack_sha1_header(z_stream *stream, void *map, unsigned long mapsize, void *buffer, unsigned long size);
155155
extern int parse_sha1_header(char *hdr, char *type, unsigned long *sizep);
156156
extern int sha1_delta_base(const unsigned char *, unsigned char *);
157+
extern int sha1_file_size(const unsigned char *, unsigned long *);
157158
extern void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size);
158159
extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size);
159160
extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);

diff.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,6 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
333333
close(fd);
334334
}
335335
else {
336-
/* We cannot do size only for SHA1 blobs */
337336
char type[20];
338337
struct sha1_size_cache *e;
339338

@@ -343,11 +342,13 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
343342
s->size = e->size;
344343
return 0;
345344
}
345+
if (!sha1_file_size(s->sha1, &s->size))
346+
locate_size_cache(s->sha1, s->size);
347+
}
348+
else {
349+
s->data = read_sha1_file(s->sha1, type, &s->size);
350+
s->should_free = 1;
346351
}
347-
s->data = read_sha1_file(s->sha1, type, &s->size);
348-
s->should_free = 1;
349-
if (s->data && size_only)
350-
locate_size_cache(s->sha1, s->size);
351352
}
352353
return 0;
353354
}

sha1_file.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,66 @@ int sha1_delta_base(const unsigned char *sha1, unsigned char *base_sha1)
432432
return ret;
433433
}
434434

435+
int sha1_file_size(const unsigned char *sha1, unsigned long *sizep)
436+
{
437+
int ret, status;
438+
unsigned long mapsize, size;
439+
void *map;
440+
z_stream stream;
441+
char hdr[64], type[20];
442+
const unsigned char *data;
443+
unsigned char cmd;
444+
int i;
445+
446+
map = map_sha1_file(sha1, &mapsize);
447+
if (!map)
448+
return -1;
449+
ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
450+
status = -1;
451+
if (ret < Z_OK || parse_sha1_header(hdr, type, &size) < 0)
452+
goto out;
453+
if (strcmp(type, "delta")) {
454+
*sizep = size;
455+
status = 0;
456+
goto out;
457+
}
458+
459+
/* We are dealing with a delta object. Inflated, the first
460+
* 20 bytes hold the base object SHA1, and delta data follows
461+
* immediately after it.
462+
*
463+
* The initial part of the delta starts at delta_data_head +
464+
* 20. Borrow code from patch-delta to read the result size.
465+
*/
466+
data = hdr + strlen(hdr) + 1 + 20;
467+
468+
/* Skip over the source size; we are not interested in
469+
* it and we cannot verify it because we do not want
470+
* to read the base object.
471+
*/
472+
cmd = *data++;
473+
while (cmd) {
474+
if (cmd & 1)
475+
data++;
476+
cmd >>= 1;
477+
}
478+
/* Read the result size */
479+
size = i = 0;
480+
cmd = *data++;
481+
while (cmd) {
482+
if (cmd & 1)
483+
size |= *data++ << i;
484+
i += 8;
485+
cmd >>= 1;
486+
}
487+
*sizep = size;
488+
status = 0;
489+
out:
490+
inflateEnd(&stream);
491+
munmap(map, mapsize);
492+
return status;
493+
}
494+
435495
void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size)
436496
{
437497
unsigned long mapsize;

0 commit comments

Comments
 (0)