Skip to content

Commit 090ea12

Browse files
pcloudsgitster
authored andcommitted
parse_object: avoid putting whole blob in core
Traditionally, all the callers of check_sha1_signature() first called read_sha1_file() to prepare the whole object data in core, and called this function. The function is used to revalidate what we read from the object database actually matches the object name we used to ask for the data from the object database. Update the API to allow callers to pass NULL as the object data, and have the function read and hash the object data using streaming API to recompute the object name, without having to hold everything in core at the same time. This is most useful in parse_object() that parses a blob object, because this caller does not have to keep the actual blob data around in memory after a "struct blob" is returned. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 00c8fd4 commit 090ea12

File tree

2 files changed

+51
-2
lines changed

2 files changed

+51
-2
lines changed

object.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,17 @@ struct object *parse_object(const unsigned char *sha1)
198198
if (obj && obj->parsed)
199199
return obj;
200200

201+
if ((obj && obj->type == OBJ_BLOB) ||
202+
(!obj && has_sha1_file(sha1) &&
203+
sha1_object_info(sha1, NULL) == OBJ_BLOB)) {
204+
if (check_sha1_signature(repl, NULL, 0, NULL) < 0) {
205+
error("sha1 mismatch %s\n", sha1_to_hex(repl));
206+
return NULL;
207+
}
208+
parse_blob_buffer(lookup_blob(sha1), NULL, 0);
209+
return lookup_object(sha1);
210+
}
211+
201212
buffer = read_sha1_file(sha1, &type, &size);
202213
if (buffer) {
203214
if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) {

sha1_file.c

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "pack-revindex.h"
2020
#include "sha1-lookup.h"
2121
#include "bulk-checkin.h"
22+
#include "streaming.h"
2223

2324
#ifndef O_NOATIME
2425
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1146,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
11461147
return NULL;
11471148
}
11481149

1149-
int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
1150+
/*
1151+
* With an in-core object data in "map", rehash it to make sure the
1152+
* object name actually matches "sha1" to detect object corruption.
1153+
* With "map" == NULL, try reading the object named with "sha1" using
1154+
* the streaming interface and rehash it to do the same.
1155+
*/
1156+
int check_sha1_signature(const unsigned char *sha1, void *map,
1157+
unsigned long size, const char *type)
11501158
{
11511159
unsigned char real_sha1[20];
1152-
hash_sha1_file(map, size, type, real_sha1);
1160+
enum object_type obj_type;
1161+
struct git_istream *st;
1162+
git_SHA_CTX c;
1163+
char hdr[32];
1164+
int hdrlen;
1165+
1166+
if (map) {
1167+
hash_sha1_file(map, size, type, real_sha1);
1168+
return hashcmp(sha1, real_sha1) ? -1 : 0;
1169+
}
1170+
1171+
st = open_istream(sha1, &obj_type, &size, NULL);
1172+
if (!st)
1173+
return -1;
1174+
1175+
/* Generate the header */
1176+
hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
1177+
1178+
/* Sha1.. */
1179+
git_SHA1_Init(&c);
1180+
git_SHA1_Update(&c, hdr, hdrlen);
1181+
for (;;) {
1182+
char buf[1024 * 16];
1183+
ssize_t readlen = read_istream(st, buf, sizeof(buf));
1184+
1185+
if (!readlen)
1186+
break;
1187+
git_SHA1_Update(&c, buf, readlen);
1188+
}
1189+
git_SHA1_Final(real_sha1, &c);
1190+
close_istream(st);
11531191
return hashcmp(sha1, real_sha1) ? -1 : 0;
11541192
}
11551193

0 commit comments

Comments
 (0)