Skip to content

Commit 8ee378a

Browse files
Junio C HamanoLinus Torvalds
authored andcommitted
[PATCH] Finish initial cut of git-pack-object/git-unpack-object pair.
This finishes the initial round of git-pack-object / git-unpack-object pair. They are now good enough to be used as a transport medium: - Fix delta direction in pack-objects; the original was computing delta to create the base object from the object to be squashed, which was quite unfriendly for unpacker ;-). - Add a script to test the very basics. - Implement unpacker for both regular and deltified objects. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent d116a45 commit 8ee378a

File tree

3 files changed

+261
-18
lines changed

3 files changed

+261
-18
lines changed

pack-objects.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e
8282

8383
if (!otherbuf)
8484
die("unable to read %s", sha1_to_hex(entry->delta->sha1));
85-
delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size, ~0UL);
85+
delta_buf = diff_delta(otherbuf, othersize,
86+
buf, size, &delta_size, ~0UL);
8687
if (!delta_buf || delta_size != entry->delta_size)
8788
die("delta size changed");
8889
free(buf);
@@ -318,7 +319,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
318319
max_size = size / 2 - 20;
319320
if (cur_entry->delta)
320321
max_size = cur_entry->delta_size-1;
321-
delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size, max_size);
322+
delta_buf = diff_delta(old->data, oldsize,
323+
cur->data, size, &delta_size, max_size);
322324
if (!delta_buf)
323325
return 0;
324326
cur_entry->delta = old_entry;

t/t5300-pack-object.sh

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/bin/sh
2+
#
3+
# Copyright (c) 2005 Junio C Hamano
4+
#
5+
6+
test_description='git-pack-object
7+
8+
'
9+
. ./test-lib.sh
10+
11+
TRASH=`pwd`
12+
13+
test_expect_success \
14+
'setup' \
15+
'rm -f .git/index*
16+
for i in a b c
17+
do
18+
dd if=/dev/zero bs=4k count=1 | tr "\\0" $i >$i &&
19+
git-update-cache --add $i || exit
20+
done &&
21+
cat c >d && echo foo >>d && git-update-cache --add d &&
22+
tree=`git-write-tree` && {
23+
echo $tree &&
24+
git-ls-tree $tree | sed -e "s/.* \\([0-9a-f]*\\) .*/\\1/"
25+
} >obj-list'
26+
27+
test_expect_success \
28+
'pack without delta' \
29+
'git-pack-objects --window=0 test-1 <obj-list'
30+
31+
rm -fr .git2
32+
mkdir .git2
33+
34+
test_expect_success \
35+
'unpack without delta' \
36+
'GIT_OBJECT_DIRECTORY=.git2/objects &&
37+
export GIT_OBJECT_DIRECTORY &&
38+
git-init-db &&
39+
git-unpack-objects test-1'
40+
41+
unset GIT_OBJECT_DIRECTORY
42+
cd $TRASH/.git2
43+
44+
test_expect_success \
45+
'check unpack without delta' \
46+
'(cd ../.git && find objects -type f -print) |
47+
while read path
48+
do
49+
cmp $path ../.git/$path || {
50+
echo $path differs.
51+
exit 1
52+
}
53+
done'
54+
cd $TRASH
55+
56+
test_expect_success \
57+
'pack with delta' \
58+
'pwd &&
59+
git-pack-objects test-2 <obj-list'
60+
61+
rm -fr .git2
62+
mkdir .git2
63+
64+
test_expect_success \
65+
'unpack with delta' \
66+
'GIT_OBJECT_DIRECTORY=.git2/objects &&
67+
export GIT_OBJECT_DIRECTORY &&
68+
git-init-db &&
69+
git-unpack-objects test-2'
70+
71+
unset GIT_OBJECT_DIRECTORY
72+
cd $TRASH/.git2
73+
test_expect_success \
74+
'check unpack with delta' \
75+
'(cd ../.git && find objects -type f -print) |
76+
while read path
77+
do
78+
cmp $path ../.git/$path || {
79+
echo $path differs.
80+
exit 1
81+
}
82+
done'
83+
cd $TRASH
84+
85+
test_done

unpack-objects.c

Lines changed: 172 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
#include "cache.h"
22
#include "object.h"
3+
#include "delta.h"
34

45
static int dry_run;
56
static int nr_entries;
67
static const char *base_name;
78
static const char unpack_usage[] = "git-unpack-objects basename";
89

910
struct pack_entry {
10-
unsigned int offset;
11+
unsigned int offset; /* network byte order */
1112
unsigned char sha1[20];
1213
};
1314

1415
static void *pack_base;
1516
static unsigned long pack_size;
17+
static void *index_base;
18+
static unsigned long index_size;
1619

1720
static struct pack_entry **pack_list;
1821

@@ -52,13 +55,13 @@ static int sort_by_offset(const void *_a, const void *_b)
5255
return o1 < o2 ? -1 : 1;
5356
}
5457

55-
static int check_index(void *index, unsigned long idx_size)
58+
static int check_index(void)
5659
{
57-
unsigned int *array = index;
60+
unsigned int *array = index_base;
5861
unsigned int nr;
5962
int i;
6063

61-
if (idx_size < 4*256)
64+
if (index_size < 4*256)
6265
return error("index file too small");
6366
nr = 0;
6467
for (i = 0; i < 256; i++) {
@@ -67,25 +70,182 @@ static int check_index(void *index, unsigned long idx_size)
6770
return error("non-monotonic index");
6871
nr = n;
6972
}
70-
if (idx_size != 4*256 + nr * 24) {
71-
printf("idx_size=%lu, expected %u (%u)\n", idx_size, 4*256 + nr * 24, nr);
73+
if (index_size != 4*256 + nr * 24) {
74+
printf("index_size=%lu, expected %u (%u)\n",
75+
index_size, 4*256 + nr * 24, nr);
7276
return error("wrong index file size");
7377
}
7478

7579
nr_entries = nr;
7680
pack_list = xmalloc(nr * sizeof(struct pack_entry *));
7781
for (i = 0; i < nr; i++)
78-
pack_list[i] = index + 4*256 + i*24;
82+
pack_list[i] = index_base + 4*256 + i*24;
7983

8084
qsort(pack_list, nr, sizeof(*pack_list), sort_by_offset);
8185

8286
printf("%d entries\n", nr);
8387
return 0;
8488
}
8589

86-
static void unpack_entry(struct pack_entry *entry)
90+
static int unpack_non_delta_entry(struct pack_entry *entry,
91+
unsigned char *pack)
8792
{
93+
int st, kind;
8894
unsigned long size;
95+
z_stream stream;
96+
char *buffer;
97+
unsigned char sha1[20];
98+
char *type_s;
99+
unsigned long offset = ntohl(entry->offset);
100+
101+
kind = pack[0];
102+
size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
103+
printf("%s %c %lu\n", sha1_to_hex(entry->sha1), kind, size);
104+
pack += 5;
105+
106+
buffer = xmalloc(size + 1);
107+
buffer[size] = 0;
108+
memset(&stream, 0, sizeof(stream));
109+
stream.next_in = pack;
110+
stream.avail_in = pack_size - offset; /* sheesh. */
111+
stream.next_out = buffer;
112+
stream.avail_out = size;
113+
114+
inflateInit(&stream);
115+
st = inflate(&stream, Z_FINISH);
116+
inflateEnd(&stream);
117+
if ((st != Z_STREAM_END) || stream.total_out != size)
118+
goto err_finish;
119+
switch (kind) {
120+
case 'C': type_s = "commit"; break;
121+
case 'T': type_s = "tree"; break;
122+
case 'B': type_s = "blob"; break;
123+
default: goto err_finish;
124+
}
125+
if (write_sha1_file(buffer, size, type_s, sha1) < 0)
126+
die("failed to write %s (%s)",
127+
sha1_to_hex(entry->sha1), type_s);
128+
printf("%s %s\n", sha1_to_hex(sha1), type_s);
129+
if (memcmp(sha1, entry->sha1, 20))
130+
die("resulting %s have wrong SHA1", type_s);
131+
132+
finish:
133+
st = 0;
134+
free(buffer);
135+
return st;
136+
err_finish:
137+
st = -1;
138+
goto finish;
139+
}
140+
141+
static int find_pack_entry(unsigned char *sha1, struct pack_entry **ent)
142+
{
143+
int *level1_ofs = index_base;
144+
int hi = ntohl(level1_ofs[*sha1]);
145+
int lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
146+
void *index = index_base + 4*256;
147+
148+
do {
149+
int mi = (lo + hi) / 2;
150+
int cmp = memcmp(index + 24 * mi + 4, sha1, 20);
151+
if (!cmp) {
152+
*ent = index + 24 * mi;
153+
return 1;
154+
}
155+
if (cmp < 0)
156+
hi = mi;
157+
else
158+
lo = mi;
159+
} while (lo < hi);
160+
return 0;
161+
}
162+
163+
/* forward declaration for a mutually recursive function */
164+
static void unpack_entry(struct pack_entry *);
165+
166+
static int unpack_delta_entry(struct pack_entry *entry, unsigned char *pack)
167+
{
168+
void *delta_data, *result, *base;
169+
unsigned long delta_alloc, delta_size, result_size, base_size;
170+
z_stream stream;
171+
int st;
172+
char type[20];
173+
unsigned char sha1[20];
174+
175+
printf("%s D", sha1_to_hex(entry->sha1));
176+
printf(" %s\n", sha1_to_hex(pack+1));
177+
178+
/* pack+1 is the base sha1, unless we have it, we need to
179+
* unpack it first.
180+
*/
181+
if (!has_sha1_file(pack+1)) {
182+
struct pack_entry *base;
183+
if (!find_pack_entry(pack+1, &base))
184+
die("cannot find delta-pack base object");
185+
unpack_entry(base);
186+
}
187+
188+
/* pack+1 thru pack+20 is the base sha1 and
189+
* pack+21 thru unknown number is the delta data.
190+
* we do not even have size of the delta data uncompressed.
191+
* sheesh!
192+
*/
193+
delta_alloc = 1024;
194+
delta_data = xmalloc(delta_alloc);
195+
196+
memset(&stream, 0, sizeof(stream));
197+
198+
stream.next_in = pack + 21;
199+
stream.avail_in = pack_size - ntohl(entry->offset); /* sheesh. */
200+
stream.next_out = delta_data;
201+
stream.avail_out = delta_alloc;
202+
delta_size = 0;
203+
204+
inflateInit(&stream);
205+
while (1) {
206+
st = inflate(&stream, Z_FINISH);
207+
if (st == Z_STREAM_END) {
208+
delta_size = stream.total_out;
209+
break;
210+
}
211+
if (st < 0)
212+
break;
213+
214+
if (delta_alloc <= stream.total_out) {
215+
delta_alloc = (delta_alloc +1024) * 3 / 2;
216+
delta_data = xrealloc(delta_data, delta_alloc);
217+
stream.next_out = delta_data + stream.total_out;
218+
stream.avail_out = delta_alloc - stream.total_out;
219+
}
220+
}
221+
inflateEnd(&stream);
222+
if (st != Z_STREAM_END) {
223+
free(delta_data);
224+
return -1;
225+
}
226+
227+
base = read_sha1_file(pack+1, type, &base_size);
228+
if (!base)
229+
die("failed to read delta-pack base object");
230+
result = patch_delta(base, base_size,
231+
delta_data, delta_size,
232+
&result_size);
233+
if (!result)
234+
die("failed to apply delta");
235+
free(delta_data);
236+
237+
if (write_sha1_file(result, result_size, type, sha1) < 0)
238+
die("failed to write %s (%s)",
239+
sha1_to_hex(entry->sha1), type);
240+
free(result);
241+
printf("%s %s\n", sha1_to_hex(sha1), type);
242+
if (memcmp(sha1, entry->sha1, 20))
243+
die("resulting %s have wrong SHA1", type);
244+
return 0;
245+
}
246+
247+
static void unpack_entry(struct pack_entry *entry)
248+
{
89249
unsigned long offset;
90250
unsigned char *pack;
91251

@@ -100,12 +260,10 @@ static void unpack_entry(struct pack_entry *entry)
100260
offset = pack_size - offset;
101261
switch (*pack) {
102262
case 'C': case 'T': case 'B':
103-
size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4];
104-
printf("%s %c %lu\n", sha1_to_hex(entry->sha1), *pack, size);
263+
unpack_non_delta_entry(entry, pack);
105264
break;
106265
case 'D':
107-
printf("%s D", sha1_to_hex(entry->sha1));
108-
printf(" %s\n", sha1_to_hex(pack+1));
266+
unpack_delta_entry(entry, pack);
109267
break;
110268
default:
111269
die("corrupted pack file");
@@ -130,8 +288,6 @@ static void unpack_all(void)
130288
int main(int argc, char **argv)
131289
{
132290
int i;
133-
unsigned long idx_size;
134-
void *index;
135291

136292
for (i = 1 ; i < argc; i++) {
137293
const char *arg = argv[i];
@@ -149,9 +305,9 @@ int main(int argc, char **argv)
149305
}
150306
if (!base_name)
151307
usage(unpack_usage);
152-
index = map_file("idx", &idx_size);
308+
index_base = map_file("idx", &index_size);
153309
pack_base = map_file("pack", &pack_size);
154-
if (check_index(index, idx_size) < 0)
310+
if (check_index() < 0)
155311
die("bad index file");
156312
unpack_all();
157313
return 0;

0 commit comments

Comments
 (0)