Skip to content

Commit 0660626

Browse files
author
Junio C Hamano
committed
binary diff: further updates.
This updates the user interface and generated diff data format. * "diff --binary" is used to signal that we want an e-mailable binary patch. It implies --full-index and -p. * "apply --allow-binary-replacement" acquired a short synonym "apply --binary". * After the "GIT binary patch\n" header line there is a token to record which binary patch mechanism was used, so that we can extend it later. Currently there are two mechanisms defined: "literal" and "delta". The former records the deflated postimage and the latter records the deflated delta from the preimage to postimage. For purely implementation convenience, I added the deflated length after these "literal/delta" tokens (otherwise the decoding side needs to guess and reallocate the buffer while inflating). Improvement patches are very welcomed. Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent 051308f commit 0660626

File tree

5 files changed

+304
-96
lines changed

5 files changed

+304
-96
lines changed

apply.c

Lines changed: 97 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ struct patch {
114114
char *new_name, *old_name, *def_name;
115115
unsigned int old_mode, new_mode;
116116
int is_rename, is_copy, is_new, is_delete, is_binary;
117+
#define BINARY_DELTA_DEFLATED 1
118+
#define BINARY_LITERAL_DEFLATED 2
119+
unsigned long deflate_origlen;
117120
int lines_added, lines_deleted;
118121
int score;
119122
struct fragment *fragments;
@@ -969,9 +972,11 @@ static inline int metadata_changes(struct patch *patch)
969972

970973
static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
971974
{
972-
/* We have read "GIT binary patch\n"; what follows is a
973-
* sequence of 'length-byte' followed by base-85 encoded
974-
* delta data.
975+
/* We have read "GIT binary patch\n"; what follows is a line
976+
* that says the patch method (currently, either "deflated
977+
* literal" or "deflated delta") and the length of data before
978+
* deflating; a sequence of 'length-byte' followed by base-85
979+
* encoded data follows.
975980
*
976981
* Each 5-byte sequence of base-85 encodes up to 4 bytes,
977982
* and we would limit the patch line to 66 characters,
@@ -982,11 +987,27 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
982987
*/
983988
int llen, used;
984989
struct fragment *fragment;
985-
char *delta = NULL;
990+
char *data = NULL;
986991

987-
patch->is_binary = 1;
988992
patch->fragments = fragment = xcalloc(1, sizeof(*fragment));
989-
used = 0;
993+
994+
/* Grab the type of patch */
995+
llen = linelen(buffer, size);
996+
used = llen;
997+
linenr++;
998+
999+
if (!strncmp(buffer, "delta ", 6)) {
1000+
patch->is_binary = BINARY_DELTA_DEFLATED;
1001+
patch->deflate_origlen = strtoul(buffer + 6, NULL, 10);
1002+
}
1003+
else if (!strncmp(buffer, "literal ", 8)) {
1004+
patch->is_binary = BINARY_LITERAL_DEFLATED;
1005+
patch->deflate_origlen = strtoul(buffer + 8, NULL, 10);
1006+
}
1007+
else
1008+
return error("unrecognized binary patch at line %d: %.*s",
1009+
linenr-1, llen-1, buffer);
1010+
buffer += llen;
9901011
while (1) {
9911012
int byte_length, max_byte_length, newsize;
9921013
llen = linelen(buffer, size);
@@ -1015,16 +1036,16 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch)
10151036
byte_length <= max_byte_length - 4)
10161037
goto corrupt;
10171038
newsize = fragment->size + byte_length;
1018-
delta = xrealloc(delta, newsize);
1019-
if (decode_85(delta + fragment->size,
1039+
data = xrealloc(data, newsize);
1040+
if (decode_85(data + fragment->size,
10201041
buffer + 1,
10211042
byte_length))
10221043
goto corrupt;
10231044
fragment->size = newsize;
10241045
buffer += llen;
10251046
size -= llen;
10261047
}
1027-
fragment->patch = delta;
1048+
fragment->patch = data;
10281049
return used;
10291050
corrupt:
10301051
return error("corrupt binary patch at line %d: %.*s",
@@ -1425,6 +1446,61 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag)
14251446
return offset;
14261447
}
14271448

1449+
static char *inflate_it(const void *data, unsigned long size,
1450+
unsigned long inflated_size)
1451+
{
1452+
z_stream stream;
1453+
void *out;
1454+
int st;
1455+
1456+
memset(&stream, 0, sizeof(stream));
1457+
1458+
stream.next_in = (unsigned char *)data;
1459+
stream.avail_in = size;
1460+
stream.next_out = out = xmalloc(inflated_size);
1461+
stream.avail_out = inflated_size;
1462+
inflateInit(&stream);
1463+
st = inflate(&stream, Z_FINISH);
1464+
if ((st != Z_STREAM_END) || stream.total_out != inflated_size) {
1465+
free(out);
1466+
return NULL;
1467+
}
1468+
return out;
1469+
}
1470+
1471+
static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch)
1472+
{
1473+
unsigned long dst_size;
1474+
struct fragment *fragment = patch->fragments;
1475+
void *data;
1476+
void *result;
1477+
1478+
data = inflate_it(fragment->patch, fragment->size,
1479+
patch->deflate_origlen);
1480+
if (!data)
1481+
return error("corrupt patch data");
1482+
switch (patch->is_binary) {
1483+
case BINARY_DELTA_DEFLATED:
1484+
result = patch_delta(desc->buffer, desc->size,
1485+
data,
1486+
patch->deflate_origlen,
1487+
&dst_size);
1488+
free(desc->buffer);
1489+
desc->buffer = result;
1490+
free(data);
1491+
break;
1492+
case BINARY_LITERAL_DEFLATED:
1493+
free(desc->buffer);
1494+
desc->buffer = data;
1495+
dst_size = patch->deflate_origlen;
1496+
break;
1497+
}
1498+
if (!desc->buffer)
1499+
return -1;
1500+
desc->size = desc->alloc = dst_size;
1501+
return 0;
1502+
}
1503+
14281504
static int apply_binary(struct buffer_desc *desc, struct patch *patch)
14291505
{
14301506
const char *name = patch->old_name ? patch->old_name : patch->new_name;
@@ -1466,18 +1542,20 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
14661542
"'%s' but it is not empty", name);
14671543
}
14681544

1469-
if (desc->buffer) {
1545+
get_sha1_hex(patch->new_sha1_prefix, sha1);
1546+
if (!memcmp(sha1, null_sha1, 20)) {
14701547
free(desc->buffer);
14711548
desc->alloc = desc->size = 0;
1472-
}
1473-
get_sha1_hex(patch->new_sha1_prefix, sha1);
1474-
if (!memcmp(sha1, null_sha1, 20))
1549+
desc->buffer = NULL;
14751550
return 0; /* deletion patch */
1551+
}
14761552

14771553
if (has_sha1_file(sha1)) {
1554+
/* We already have the postimage */
14781555
char type[10];
14791556
unsigned long size;
14801557

1558+
free(desc->buffer);
14811559
desc->buffer = read_sha1_file(sha1, type, &size);
14821560
if (!desc->buffer)
14831561
return error("the necessary postimage %s for "
@@ -1486,28 +1564,13 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch)
14861564
desc->alloc = desc->size = size;
14871565
}
14881566
else {
1489-
char type[10];
1490-
unsigned long src_size, dst_size;
1491-
void *src;
1492-
1493-
get_sha1_hex(patch->old_sha1_prefix, sha1);
1494-
src = read_sha1_file(sha1, type, &src_size);
1495-
if (!src)
1496-
return error("the necessary preimage %s for "
1497-
"'%s' cannot be read",
1498-
patch->old_sha1_prefix, name);
1499-
1500-
/* patch->fragment->patch has the delta data and
1501-
* we should apply it to the preimage.
1567+
/* We have verified desc matches the preimage;
1568+
* apply the patch data to it, which is stored
1569+
* in the patch->fragments->{patch,size}.
15021570
*/
1503-
desc->buffer = patch_delta(src, src_size,
1504-
(void*) patch->fragments->patch,
1505-
patch->fragments->size,
1506-
&dst_size);
1507-
if (!desc->buffer)
1571+
if (apply_binary_fragment(desc, patch))
15081572
return error("binary patch does not apply to '%s'",
15091573
name);
1510-
desc->size = desc->alloc = dst_size;
15111574

15121575
/* verify that the result matches */
15131576
write_sha1_file_prepare(desc->buffer, desc->size, blob_type,
@@ -2102,7 +2165,8 @@ int main(int argc, char **argv)
21022165
diffstat = 1;
21032166
continue;
21042167
}
2105-
if (!strcmp(arg, "--allow-binary-replacement")) {
2168+
if (!strcmp(arg, "--allow-binary-replacement") ||
2169+
!strcmp(arg, "--binary")) {
21062170
allow_binary_replacement = 1;
21072171
continue;
21082172
}

base85.c

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#include "cache.h"
2+
3+
#undef DEBUG_85
4+
5+
#ifdef DEBUG_85
6+
#define say(a) fprintf(stderr, a)
7+
#define say1(a,b) fprintf(stderr, a, b)
8+
#define say2(a,b,c) fprintf(stderr, a, b, c)
9+
#else
10+
#define say(a) do {} while(0)
11+
#define say1(a,b) do {} while(0)
12+
#define say2(a,b,c) do {} while(0)
13+
#endif
14+
15+
static const char en85[] = {
16+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
17+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
18+
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
19+
'U', 'V', 'W', 'X', 'Y', 'Z',
20+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
21+
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
22+
'u', 'v', 'w', 'x', 'y', 'z',
23+
'!', '#', '$', '%', '&', '(', ')', '*', '+', '-',
24+
';', '<', '=', '>', '?', '@', '^', '_', '`', '{',
25+
'|', '}', '~'
26+
};
27+
28+
static char de85[256];
29+
static void prep_base85(void)
30+
{
31+
int i;
32+
if (de85['Z'])
33+
return;
34+
for (i = 0; i < ARRAY_SIZE(en85); i++) {
35+
int ch = en85[i];
36+
de85[ch] = i + 1;
37+
}
38+
}
39+
40+
int decode_85(char *dst, char *buffer, int len)
41+
{
42+
prep_base85();
43+
44+
say2("decode 85 <%.*s>", len/4*5, buffer);
45+
while (len) {
46+
unsigned acc = 0;
47+
int cnt;
48+
for (cnt = 0; cnt < 5; cnt++, buffer++) {
49+
int ch = *((unsigned char *)buffer);
50+
int de = de85[ch];
51+
if (!de)
52+
return error("invalid base85 alphabet %c", ch);
53+
de--;
54+
if (cnt == 4) {
55+
/*
56+
* Detect overflow. The largest
57+
* 5-letter possible is "|NsC0" to
58+
* encode 0xffffffff, and "|NsC" gives
59+
* 0x03030303 at this point (i.e.
60+
* 0xffffffff = 0x03030303 * 85).
61+
*/
62+
if (0x03030303 < acc ||
63+
(0x03030303 == acc && de))
64+
error("invalid base85 sequence %.5s",
65+
buffer-3);
66+
}
67+
acc = acc * 85 + de;
68+
say1(" <%08x>", acc);
69+
}
70+
say1(" %08x", acc);
71+
for (cnt = 0; cnt < 4 && len; cnt++, len--) {
72+
*dst++ = (acc >> 24) & 0xff;
73+
acc = acc << 8;
74+
}
75+
}
76+
say("\n");
77+
78+
return 0;
79+
}
80+
81+
void encode_85(char *buf, unsigned char *data, int bytes)
82+
{
83+
prep_base85();
84+
85+
say("encode 85");
86+
while (bytes) {
87+
unsigned acc = 0;
88+
int cnt;
89+
for (cnt = 0; cnt < 4 && bytes; cnt++, bytes--) {
90+
int ch = *data++;
91+
acc |= ch << ((3-cnt)*8);
92+
}
93+
say1(" %08x", acc);
94+
for (cnt = 0; cnt < 5; cnt++) {
95+
int val = acc % 85;
96+
acc /= 85;
97+
buf[4-cnt] = en85[val];
98+
}
99+
buf += 5;
100+
}
101+
say("\n");
102+
103+
*buf = 0;
104+
}
105+
106+
#ifdef DEBUG_85
107+
int main(int ac, char **av)
108+
{
109+
char buf[1024];
110+
111+
if (!strcmp(av[1], "-e")) {
112+
int len = strlen(av[2]);
113+
encode_85(buf, av[2], len);
114+
if (len <= 26) len = len + 'A' - 1;
115+
else len = len + 'a' - 26 + 1;
116+
printf("encoded: %c%s\n", len, buf);
117+
return 0;
118+
}
119+
if (!strcmp(av[1], "-d")) {
120+
int len = *av[2];
121+
if ('A' <= len && len <= 'Z') len = len - 'A' + 1;
122+
else len = len - 'a' + 26 + 1;
123+
decode_85(buf, av[2]+1, len);
124+
printf("decoded: %.*s\n", len, buf);
125+
return 0;
126+
}
127+
if (!strcmp(av[1], "-t")) {
128+
char t[4] = { -1,-1,-1,-1 };
129+
encode_85(buf, t, 4);
130+
printf("encoded: D%s\n", buf);
131+
return 0;
132+
}
133+
}
134+
#endif

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,5 +365,6 @@ extern void setup_pager(void);
365365

366366
/* base85 */
367367
int decode_85(char *dst, char *line, int linelen);
368+
void encode_85(char *buf, unsigned char *data, int bytes);
368369

369370
#endif /* CACHE_H */

0 commit comments

Comments
 (0)