Skip to content

Commit e002a16

Browse files
Nicolas PitreLinus Torvalds
authored andcommitted
[PATCH] delta creation
This adds the ability to actually create delta objects using a new tool: git-mkdelta. It uses an ordered list of potential objects to deltafy against earlier objects in the list. A cap on the depth of delta references can be provided as well, otherwise the default is to not have any limit. A limit of 0 will also undeltafy any given object. Also provided is the beginning of a script to deltafy an entire repository. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent d1af002 commit e002a16

File tree

3 files changed

+360
-2
lines changed

3 files changed

+360
-2
lines changed

Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ AR=ar
1919
INSTALL=install
2020

2121
SCRIPTS=git-apply-patch-script git-merge-one-file-script git-prune-script \
22-
git-pull-script git-tag-script git-resolve-script git-whatchanged
22+
git-pull-script git-tag-script git-resolve-script git-whatchanged \
23+
git-deltafy-script
2324

2425
PROG= git-update-cache git-diff-files git-init-db git-write-tree \
2526
git-read-tree git-commit-tree git-cat-file git-fsck-cache \
@@ -28,7 +29,7 @@ PROG= git-update-cache git-diff-files git-init-db git-write-tree \
2829
git-unpack-file git-export git-diff-cache git-convert-cache \
2930
git-http-pull git-rpush git-rpull git-rev-list git-mktag \
3031
git-diff-helper git-tar-tree git-local-pull git-write-blob \
31-
git-get-tar-commit-id
32+
git-get-tar-commit-id git-mkdelta
3233

3334
all: $(PROG)
3435

@@ -107,6 +108,7 @@ git-mktag: mktag.c
107108
git-diff-helper: diff-helper.c
108109
git-tar-tree: tar-tree.c
109110
git-write-blob: write-blob.c
111+
git-mkdelta: mkdelta.c
110112

111113
git-http-pull: LIBS += -lcurl
112114

git-deltafy-script

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/bash
2+
3+
# Script to deltafy an entire GIT repository based on the commit list.
4+
# The most recent version of a file is the reference and previous versions
5+
# are made delta against the best earlier version available. And so on for
6+
# successive versions going back in time. This way the delta overhead is
7+
# pushed towards older version of any given file.
8+
#
9+
# NOTE: the "best earlier version" is not implemented in mkdelta yet
10+
# and therefore only the next eariler version is used at this time.
11+
#
12+
# TODO: deltafy tree objects as well.
13+
#
14+
# The -d argument allows to provide a limit on the delta chain depth.
15+
# If 0 is passed then everything is undeltafied.
16+
17+
set -e
18+
19+
depth=
20+
[ "$1" == "-d" ] && depth="--max-depth=$2" && shift 2
21+
22+
curr_file=""
23+
24+
git-rev-list HEAD |
25+
git-diff-tree -r --stdin |
26+
sed -n '/^\*/ s/^.*->\(.\{41\}\)\(.*\)$/\2 \1/p' | sort | uniq |
27+
while read file sha1; do
28+
if [ "$file" == "$curr_file" ]; then
29+
list="$list $sha1"
30+
else
31+
if [ "$list" ]; then
32+
echo "Processing $curr_file"
33+
echo "$head $list" | xargs git-mkdelta $depth -v
34+
fi
35+
curr_file="$file"
36+
list=""
37+
head="$sha1"
38+
fi
39+
done

mkdelta.c

Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
/*
2+
* Deltafication of a GIT database.
3+
*
4+
* (C) 2005 Nicolas Pitre <nico@cam.org>
5+
*
6+
* This code is free software; you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License version 2 as
8+
* published by the Free Software Foundation.
9+
*/
10+
11+
#include "cache.h"
12+
#include "delta.h"
13+
14+
static int replace_object(char *buf, unsigned long size, unsigned char *sha1)
15+
{
16+
char tmpfile[PATH_MAX];
17+
int fd;
18+
19+
snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", get_object_directory());
20+
fd = mkstemp(tmpfile);
21+
if (fd < 0)
22+
return error("%s: %s\n", tmpfile, strerror(errno));
23+
if (write(fd, buf, size) != size) {
24+
perror("unable to write file");
25+
close(fd);
26+
unlink(tmpfile);
27+
return -1;
28+
}
29+
fchmod(fd, 0444);
30+
close(fd);
31+
if (rename(tmpfile, sha1_file_name(sha1))) {
32+
perror("unable to replace original object");
33+
unlink(tmpfile);
34+
return -1;
35+
}
36+
return 0;
37+
}
38+
39+
static void *create_object(char *buf, unsigned long len, char *hdr, int hdrlen,
40+
unsigned long *retsize)
41+
{
42+
char *compressed;
43+
unsigned long size;
44+
z_stream stream;
45+
46+
/* Set it up */
47+
memset(&stream, 0, sizeof(stream));
48+
deflateInit(&stream, Z_BEST_COMPRESSION);
49+
size = deflateBound(&stream, len+hdrlen);
50+
compressed = xmalloc(size);
51+
52+
/* Compress it */
53+
stream.next_out = compressed;
54+
stream.avail_out = size;
55+
56+
/* First header.. */
57+
stream.next_in = hdr;
58+
stream.avail_in = hdrlen;
59+
while (deflate(&stream, 0) == Z_OK)
60+
/* nothing */;
61+
62+
/* Then the data itself.. */
63+
stream.next_in = buf;
64+
stream.avail_in = len;
65+
while (deflate(&stream, Z_FINISH) == Z_OK)
66+
/* nothing */;
67+
deflateEnd(&stream);
68+
*retsize = stream.total_out;
69+
return compressed;
70+
}
71+
72+
static int restore_original_object(char *buf, unsigned long len,
73+
char *type, unsigned char *sha1)
74+
{
75+
char hdr[50];
76+
int hdrlen, ret;
77+
void *compressed;
78+
unsigned long size;
79+
80+
hdrlen = sprintf(hdr, "%s %lu", type, len)+1;
81+
compressed = create_object(buf, len, hdr, hdrlen, &size);
82+
ret = replace_object(compressed, size, sha1);
83+
free(compressed);
84+
return ret;
85+
}
86+
87+
static void *create_delta_object(char *buf, unsigned long len,
88+
unsigned char *sha1_ref, unsigned long *size)
89+
{
90+
char hdr[50];
91+
int hdrlen;
92+
93+
/* Generate the header + sha1 of reference for delta */
94+
hdrlen = sprintf(hdr, "delta %lu", len+20)+1;
95+
memcpy(hdr + hdrlen, sha1_ref, 20);
96+
hdrlen += 20;
97+
98+
return create_object(buf, len, hdr, hdrlen, size);
99+
}
100+
101+
static unsigned long get_object_size(unsigned char *sha1)
102+
{
103+
struct stat st;
104+
if (stat(sha1_file_name(sha1), &st))
105+
die("%s: %s", sha1_to_hex(sha1), strerror(errno));
106+
return st.st_size;
107+
}
108+
109+
static void *get_buffer(unsigned char *sha1, char *type, unsigned long *size)
110+
{
111+
unsigned long mapsize;
112+
void *map = map_sha1_file(sha1, &mapsize);
113+
if (map) {
114+
void *buffer = unpack_sha1_file(map, mapsize, type, size);
115+
munmap(map, mapsize);
116+
if (buffer)
117+
return buffer;
118+
}
119+
error("unable to get object %s", sha1_to_hex(sha1));
120+
return NULL;
121+
}
122+
123+
static void *expand_delta(void *delta, unsigned long delta_size, char *type,
124+
unsigned long *size, unsigned int *depth, char *head)
125+
{
126+
void *buf = NULL;
127+
*depth++;
128+
if (delta_size < 20) {
129+
error("delta object is bad");
130+
free(delta);
131+
} else {
132+
unsigned long ref_size;
133+
void *ref = get_buffer(delta, type, &ref_size);
134+
if (ref && !strcmp(type, "delta"))
135+
ref = expand_delta(ref, ref_size, type, &ref_size,
136+
depth, head);
137+
else
138+
memcpy(head, delta, 20);
139+
if (ref)
140+
buf = patch_delta(ref, ref_size, delta+20,
141+
delta_size-20, size);
142+
free(ref);
143+
free(delta);
144+
}
145+
return buf;
146+
}
147+
148+
static char *mkdelta_usage =
149+
"mkdelta [ --max-depth=N ] <reference_sha1> <target_sha1> [ <next_sha1> ... ]";
150+
151+
int main(int argc, char **argv)
152+
{
153+
unsigned char sha1_ref[20], sha1_trg[20], head_ref[20], head_trg[20];
154+
char type_ref[20], type_trg[20];
155+
void *buf_ref, *buf_trg, *buf_delta;
156+
unsigned long size_ref, size_trg, size_orig, size_delta;
157+
unsigned int depth_ref, depth_trg, depth_max = -1;
158+
int i, verbose = 0;
159+
160+
for (i = 1; i < argc; i++) {
161+
if (!strcmp(argv[i], "-v")) {
162+
verbose = 1;
163+
} else if (!strcmp(argv[i], "-d") && i+1 < argc) {
164+
depth_max = atoi(argv[++i]);
165+
} else if (!strncmp(argv[i], "--max-depth=", 12)) {
166+
depth_max = atoi(argv[i]+12);
167+
} else
168+
break;
169+
}
170+
171+
if (i + (depth_max != 0) >= argc)
172+
usage(mkdelta_usage);
173+
174+
if (get_sha1(argv[i], sha1_ref))
175+
die("bad sha1 %s", argv[i]);
176+
depth_ref = 0;
177+
buf_ref = get_buffer(sha1_ref, type_ref, &size_ref);
178+
if (buf_ref && !strcmp(type_ref, "delta"))
179+
buf_ref = expand_delta(buf_ref, size_ref, type_ref,
180+
&size_ref, &depth_ref, head_ref);
181+
else
182+
memcpy(head_ref, sha1_ref, 20);
183+
if (!buf_ref)
184+
die("unable to obtain initial object %s", argv[i]);
185+
186+
if (depth_ref > depth_max) {
187+
if (restore_original_object(buf_ref, size_ref, type_ref, sha1_ref))
188+
die("unable to restore %s", argv[i]);
189+
if (verbose)
190+
printf("undelta %s (depth was %d)\n", argv[i], depth_ref);
191+
depth_ref = 0;
192+
}
193+
194+
/*
195+
* TODO: deltafication should be tried against any early object
196+
* in the object list and not only the previous object.
197+
*/
198+
199+
while (++i < argc) {
200+
if (get_sha1(argv[i], sha1_trg))
201+
die("bad sha1 %s", argv[i]);
202+
depth_trg = 0;
203+
buf_trg = get_buffer(sha1_trg, type_trg, &size_trg);
204+
if (buf_trg && !size_trg) {
205+
if (verbose)
206+
printf("skip %s (object is empty)\n", argv[i]);
207+
continue;
208+
}
209+
size_orig = size_trg;
210+
if (buf_trg && !strcmp(type_trg, "delta")) {
211+
if (!memcmp(buf_trg, sha1_ref, 20)) {
212+
/* delta already in place */
213+
depth_ref++;
214+
memcpy(sha1_ref, sha1_trg, 20);
215+
buf_ref = patch_delta(buf_ref, size_ref,
216+
buf_trg+20, size_trg-20,
217+
&size_ref);
218+
if (!buf_ref)
219+
die("unable to apply delta %s", argv[i]);
220+
if (depth_ref > depth_max) {
221+
if (restore_original_object(buf_ref, size_ref,
222+
type_ref, sha1_ref))
223+
die("unable to restore %s", argv[i]);
224+
if (verbose)
225+
printf("undelta %s (depth was %d)\n", argv[i], depth_ref);
226+
depth_ref = 0;
227+
continue;
228+
}
229+
if (verbose)
230+
printf("skip %s (delta already in place)\n", argv[i]);
231+
continue;
232+
}
233+
buf_trg = expand_delta(buf_trg, size_trg, type_trg,
234+
&size_trg, &depth_trg, head_trg);
235+
} else
236+
memcpy(head_trg, sha1_trg, 20);
237+
if (!buf_trg)
238+
die("unable to read target object %s", argv[i]);
239+
240+
if (depth_trg > depth_max) {
241+
if (restore_original_object(buf_trg, size_trg, type_trg, sha1_trg))
242+
die("unable to restore %s", argv[i]);
243+
if (verbose)
244+
printf("undelta %s (depth was %d)\n", argv[i], depth_trg);
245+
depth_trg = 0;
246+
size_orig = size_trg;
247+
}
248+
249+
if (depth_max == 0)
250+
goto skip;
251+
252+
if (strcmp(type_ref, type_trg))
253+
die("type mismatch for object %s", argv[i]);
254+
255+
if (!size_ref) {
256+
if (verbose)
257+
printf("skip %s (initial object is empty)\n", argv[i]);
258+
goto skip;
259+
}
260+
261+
if (depth_ref + 1 > depth_max) {
262+
if (verbose)
263+
printf("skip %s (exceeding max link depth)\n", argv[i]);
264+
goto skip;
265+
}
266+
267+
if (!memcmp(head_ref, sha1_trg, 20)) {
268+
if (verbose)
269+
printf("skip %s (would create a loop)\n", argv[i]);
270+
goto skip;
271+
}
272+
273+
buf_delta = diff_delta(buf_ref, size_ref, buf_trg, size_trg, &size_delta);
274+
if (!buf_delta)
275+
die("out of memory");
276+
277+
/* no need to even try to compress if original
278+
uncompressed is already smaller */
279+
if (size_delta+20 < size_orig) {
280+
void *buf_obj;
281+
unsigned long size_obj;
282+
buf_obj = create_delta_object(buf_delta, size_delta,
283+
sha1_ref, &size_obj);
284+
free(buf_delta);
285+
size_orig = get_object_size(sha1_trg);
286+
if (size_obj >= size_orig) {
287+
free(buf_obj);
288+
if (verbose)
289+
printf("skip %s (original is smaller)\n", argv[i]);
290+
goto skip;
291+
}
292+
if (replace_object(buf_obj, size_obj, sha1_trg))
293+
die("unable to write delta for %s", argv[i]);
294+
free(buf_obj);
295+
depth_ref++;
296+
if (verbose)
297+
printf("delta %s (size=%ld.%02ld%%, depth=%d)\n",
298+
argv[i], size_obj*100 / size_orig,
299+
(size_obj*10000 / size_orig)%100,
300+
depth_ref);
301+
} else {
302+
free(buf_delta);
303+
if (verbose)
304+
printf("skip %s (original is smaller)\n", argv[i]);
305+
skip:
306+
depth_ref = depth_trg;
307+
memcpy(head_ref, head_trg, 20);
308+
}
309+
310+
free(buf_ref);
311+
buf_ref = buf_trg;
312+
size_ref = size_trg;
313+
memcpy(sha1_ref, sha1_trg, 20);
314+
}
315+
316+
return 0;
317+
}

0 commit comments

Comments
 (0)