Skip to content

Commit 4614043

Browse files
pcloudsgitster
authored andcommitted
index-pack: use streaming interface for collision test on large blobs
When putting whole objects in core is unavoidable, try match object type and size first before actually inflating. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 8a2e163 commit 4614043

File tree

2 files changed

+82
-5
lines changed

2 files changed

+82
-5
lines changed

builtin/index-pack.c

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "progress.h"
1010
#include "fsck.h"
1111
#include "exec_cmd.h"
12+
#include "streaming.h"
1213
#include "thread-utils.h"
1314

1415
static const char index_pack_usage[] =
@@ -621,31 +622,102 @@ static void find_delta_children(const union delta_base *base,
621622
*last_index = last;
622623
}
623624

625+
struct compare_data {
626+
struct object_entry *entry;
627+
struct git_istream *st;
628+
unsigned char *buf;
629+
unsigned long buf_size;
630+
};
631+
632+
static int compare_objects(const unsigned char *buf, unsigned long size,
633+
void *cb_data)
634+
{
635+
struct compare_data *data = cb_data;
636+
637+
if (data->buf_size < size) {
638+
free(data->buf);
639+
data->buf = xmalloc(size);
640+
data->buf_size = size;
641+
}
642+
643+
while (size) {
644+
ssize_t len = read_istream(data->st, data->buf, size);
645+
if (len == 0)
646+
die(_("SHA1 COLLISION FOUND WITH %s !"),
647+
sha1_to_hex(data->entry->idx.sha1));
648+
if (len < 0)
649+
die(_("unable to read %s"),
650+
sha1_to_hex(data->entry->idx.sha1));
651+
if (memcmp(buf, data->buf, len))
652+
die(_("SHA1 COLLISION FOUND WITH %s !"),
653+
sha1_to_hex(data->entry->idx.sha1));
654+
size -= len;
655+
buf += len;
656+
}
657+
return 0;
658+
}
659+
660+
static int check_collison(struct object_entry *entry)
661+
{
662+
struct compare_data data;
663+
enum object_type type;
664+
unsigned long size;
665+
666+
if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB)
667+
return -1;
668+
669+
memset(&data, 0, sizeof(data));
670+
data.entry = entry;
671+
data.st = open_istream(entry->idx.sha1, &type, &size, NULL);
672+
if (!data.st)
673+
return -1;
674+
if (size != entry->size || type != entry->type)
675+
die(_("SHA1 COLLISION FOUND WITH %s !"),
676+
sha1_to_hex(entry->idx.sha1));
677+
unpack_data(entry, compare_objects, &data);
678+
close_istream(data.st);
679+
free(data.buf);
680+
return 0;
681+
}
682+
624683
static void sha1_object(const void *data, struct object_entry *obj_entry,
625684
unsigned long size, enum object_type type,
626685
const unsigned char *sha1)
627686
{
628687
void *new_data = NULL;
688+
int collision_test_needed;
629689

630690
assert(data || obj_entry);
631691

632692
read_lock();
633-
if (has_sha1_file(sha1)) {
693+
collision_test_needed = has_sha1_file(sha1);
694+
read_unlock();
695+
696+
if (collision_test_needed && !data) {
697+
read_lock();
698+
if (!check_collison(obj_entry))
699+
collision_test_needed = 0;
700+
read_unlock();
701+
}
702+
if (collision_test_needed) {
634703
void *has_data;
635704
enum object_type has_type;
636705
unsigned long has_size;
637-
if (!data)
638-
data = new_data = get_data_from_pack(obj_entry);
706+
read_lock();
707+
has_type = sha1_object_info(sha1, &has_size);
708+
if (has_type != type || has_size != size)
709+
die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
639710
has_data = read_sha1_file(sha1, &has_type, &has_size);
640711
read_unlock();
712+
if (!data)
713+
data = new_data = get_data_from_pack(obj_entry);
641714
if (!has_data)
642715
die(_("cannot read existing object %s"), sha1_to_hex(sha1));
643716
if (size != has_size || type != has_type ||
644717
memcmp(data, has_data, size) != 0)
645718
die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
646719
free(has_data);
647-
} else
648-
read_unlock();
720+
}
649721

650722
if (strict) {
651723
read_lock();

t/t5300-pack-object.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,4 +418,9 @@ test_expect_success \
418418
'test_must_fail git index-pack -o bad.idx test-3.pack 2>msg &&
419419
grep "SHA1 COLLISION FOUND" msg'
420420

421+
test_expect_success \
422+
'make sure index-pack detects the SHA1 collision (large blobs)' \
423+
'test_must_fail git -c core.bigfilethreshold=1 index-pack -o bad.idx test-3.pack 2>msg &&
424+
grep "SHA1 COLLISION FOUND" msg'
425+
421426
test_done

0 commit comments

Comments
 (0)