Skip to content

Commit c4fb06c

Browse files
author
Linus Torvalds
committed
Fix object packing/unpacking.
This actually successfully packed and unpacked a git archive down to 1.3MB (17MB unpacked). Right now unpacking is way too noisy, lots of debug messages left.
1 parent 8ee378a commit c4fb06c

File tree

2 files changed

+49
-62
lines changed

2 files changed

+49
-62
lines changed

pack-objects.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ static unsigned long write_object(FILE *f, struct object_entry *entry)
9696
unsigned long size;
9797
char type[10];
9898
void *buf = read_sha1_file(entry->sha1, type, &size);
99-
char header[21];
99+
char header[25];
100100
unsigned hdrlen, datalen;
101101

102102
if (!buf)
@@ -110,16 +110,16 @@ static unsigned long write_object(FILE *f, struct object_entry *entry)
110110
* instead.
111111
*/
112112
header[0] = ".CTB"[entry->type];
113-
datalen = htonl(size);
114-
memcpy(header+1, &datalen, 4);
115113
hdrlen = 5;
116114
if (entry->delta) {
117115
header[0] = 'D';
118-
memcpy(header+1, entry->delta, 20);
116+
memcpy(header+5, entry->delta, 20);
119117
buf = delta_against(buf, size, entry);
120118
size = entry->delta_size;
121-
hdrlen = 21;
119+
hdrlen = 25;
122120
}
121+
datalen = htonl(size);
122+
memcpy(header+1, &datalen, 4);
123123
fwrite(header, hdrlen, 1, f);
124124
datalen = fwrite_compressed(buf, size, f);
125125
free(buf);

unpack-objects.c

Lines changed: 44 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -88,26 +88,26 @@ static int check_index(void)
8888
}
8989

9090
static int unpack_non_delta_entry(struct pack_entry *entry,
91-
unsigned char *pack)
91+
int kind,
92+
unsigned char *data,
93+
unsigned long size,
94+
unsigned long left)
9295
{
93-
int st, kind;
94-
unsigned long size;
96+
int st;
9597
z_stream stream;
9698
char *buffer;
9799
unsigned char sha1[20];
98100
char *type_s;
99-
unsigned long offset = ntohl(entry->offset);
100101

101-
kind = pack[0];
102-
size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
103102
printf("%s %c %lu\n", sha1_to_hex(entry->sha1), kind, size);
104-
pack += 5;
103+
if (dry_run)
104+
return 0;
105105

106106
buffer = xmalloc(size + 1);
107107
buffer[size] = 0;
108108
memset(&stream, 0, sizeof(stream));
109-
stream.next_in = pack;
110-
stream.avail_in = pack_size - offset; /* sheesh. */
109+
stream.next_in = data;
110+
stream.avail_in = left;
111111
stream.next_out = buffer;
112112
stream.avail_out = size;
113113

@@ -148,85 +148,71 @@ static int find_pack_entry(unsigned char *sha1, struct pack_entry **ent)
148148
do {
149149
int mi = (lo + hi) / 2;
150150
int cmp = memcmp(index + 24 * mi + 4, sha1, 20);
151+
printf("lo=%d mi=%d hi=%d cmp=%d\n", lo, mi, hi, cmp);
151152
if (!cmp) {
152153
*ent = index + 24 * mi;
153154
return 1;
154155
}
155-
if (cmp < 0)
156+
if (cmp > 0)
156157
hi = mi;
157158
else
158-
lo = mi;
159+
lo = mi+1;
159160
} while (lo < hi);
160161
return 0;
161162
}
162163

163164
/* forward declaration for a mutually recursive function */
164165
static void unpack_entry(struct pack_entry *);
165166

166-
static int unpack_delta_entry(struct pack_entry *entry, unsigned char *pack)
167+
static int unpack_delta_entry(struct pack_entry *entry,
168+
unsigned char *base_sha1,
169+
unsigned long delta_size,
170+
unsigned long left)
167171
{
168-
void *delta_data, *result, *base;
169-
unsigned long delta_alloc, delta_size, result_size, base_size;
172+
void *data, *delta_data, *result, *base;
173+
unsigned long data_size, result_size, base_size;
170174
z_stream stream;
171175
int st;
172176
char type[20];
173177
unsigned char sha1[20];
174178

175-
printf("%s D", sha1_to_hex(entry->sha1));
176-
printf(" %s\n", sha1_to_hex(pack+1));
179+
if (left < 20)
180+
die("truncated pack file");
181+
data = base_sha1 + 20;
182+
data_size = left - 20;
183+
printf("%s D %lu", sha1_to_hex(entry->sha1), delta_size);
184+
printf(" %s\n", sha1_to_hex(base_sha1));
185+
186+
if (dry_run)
187+
return 0;
177188

178-
/* pack+1 is the base sha1, unless we have it, we need to
189+
/* pack+5 is the base sha1, unless we have it, we need to
179190
* unpack it first.
180191
*/
181-
if (!has_sha1_file(pack+1)) {
192+
if (!has_sha1_file(base_sha1)) {
182193
struct pack_entry *base;
183-
if (!find_pack_entry(pack+1, &base))
194+
if (!find_pack_entry(base_sha1, &base))
184195
die("cannot find delta-pack base object");
185196
unpack_entry(base);
186197
}
187-
188-
/* pack+1 thru pack+20 is the base sha1 and
189-
* pack+21 thru unknown number is the delta data.
190-
* we do not even have size of the delta data uncompressed.
191-
* sheesh!
192-
*/
193-
delta_alloc = 1024;
194-
delta_data = xmalloc(delta_alloc);
198+
delta_data = xmalloc(delta_size);
195199

196200
memset(&stream, 0, sizeof(stream));
197201

198-
stream.next_in = pack + 21;
199-
stream.avail_in = pack_size - ntohl(entry->offset); /* sheesh. */
202+
stream.next_in = data;
203+
stream.avail_in = data_size;
200204
stream.next_out = delta_data;
201-
stream.avail_out = delta_alloc;
202-
delta_size = 0;
205+
stream.avail_out = delta_size;
203206

204207
inflateInit(&stream);
205-
while (1) {
206-
st = inflate(&stream, Z_FINISH);
207-
if (st == Z_STREAM_END) {
208-
delta_size = stream.total_out;
209-
break;
210-
}
211-
if (st < 0)
212-
break;
213-
214-
if (delta_alloc <= stream.total_out) {
215-
delta_alloc = (delta_alloc +1024) * 3 / 2;
216-
delta_data = xrealloc(delta_data, delta_alloc);
217-
stream.next_out = delta_data + stream.total_out;
218-
stream.avail_out = delta_alloc - stream.total_out;
219-
}
220-
}
208+
st = inflate(&stream, Z_FINISH);
221209
inflateEnd(&stream);
222-
if (st != Z_STREAM_END) {
223-
free(delta_data);
224-
return -1;
225-
}
210+
if ((st != Z_STREAM_END) || stream.total_out != delta_size)
211+
die("delta data unpack failed");
226212

227-
base = read_sha1_file(pack+1, type, &base_size);
213+
base = read_sha1_file(base_sha1, type, &base_size);
228214
if (!base)
229-
die("failed to read delta-pack base object");
215+
die("failed to read delta-pack base object %s", sha1_to_hex(base_sha1));
230216
result = patch_delta(base, base_size,
231217
delta_data, delta_size,
232218
&result_size);
@@ -246,7 +232,7 @@ static int unpack_delta_entry(struct pack_entry *entry, unsigned char *pack)
246232

247233
static void unpack_entry(struct pack_entry *entry)
248234
{
249-
unsigned long offset;
235+
unsigned long offset, size, left;
250236
unsigned char *pack;
251237

252238
/* Have we done this one already due to deltas based on it? */
@@ -257,13 +243,14 @@ static void unpack_entry(struct pack_entry *entry)
257243
if (offset > pack_size - 5)
258244
die("object offset outside of pack file");
259245
pack = pack_base + offset;
260-
offset = pack_size - offset;
246+
size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
247+
left = pack_size - offset - 5;
261248
switch (*pack) {
262249
case 'C': case 'T': case 'B':
263-
unpack_non_delta_entry(entry, pack);
250+
unpack_non_delta_entry(entry, *pack, pack+5, size, left);
264251
break;
265252
case 'D':
266-
unpack_delta_entry(entry, pack);
253+
unpack_delta_entry(entry, pack+5, size, left);
267254
break;
268255
default:
269256
die("corrupted pack file");

0 commit comments

Comments
 (0)