Skip to content

Commit 55ac692

Browse files
committed
Merge branch 'jc/streaming' into next
* jc/streaming: sha1_file: use the correct type (ssize_t, not size_t) for read-style function streaming: read loose objects incrementally sha1_file.c: expose helpers to read loose objects streaming: read non-delta incrementally from a pack streaming_write_entry(): support files with holes convert: CRLF_INPUT is a no-op in the output codepath streaming_write_entry(): use streaming API in write_entry() streaming: a new API to read from the object store write_entry(): separate two helper functions out unpack_object_header(): make it public sha1_object_info_extended(): hint about objects in delta-base cache sha1_object_info_extended(): expose a bit more info packed_object_info_detail(): do not return a string
2 parents 61f4472 + 23c7df6 commit 55ac692

File tree

8 files changed

+593
-41
lines changed

8 files changed

+593
-41
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,7 @@ LIB_H += sha1-lookup.h
556556
LIB_H += sideband.h
557557
LIB_H += sigchain.h
558558
LIB_H += strbuf.h
559+
LIB_H += streaming.h
559560
LIB_H += string-list.h
560561
LIB_H += submodule.h
561562
LIB_H += tag.h
@@ -662,6 +663,7 @@ LIB_OBJS += shallow.o
662663
LIB_OBJS += sideband.o
663664
LIB_OBJS += sigchain.o
664665
LIB_OBJS += strbuf.o
666+
LIB_OBJS += streaming.o
665667
LIB_OBJS += string-list.o
666668
LIB_OBJS += submodule.o
667669
LIB_OBJS += symlinks.o

builtin/verify-pack.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ static void show_pack_info(struct packed_git *p, unsigned int flags)
3333
if (!sha1)
3434
die("internal error pack-check nth-packed-object");
3535
offset = nth_packed_object_offset(p, i);
36-
type = packed_object_info_detail(p, offset, &size, &store_size,
36+
type = typename(packed_object_info_detail(p, offset, &size, &store_size,
3737
&delta_chain_length,
38-
base_sha1);
38+
base_sha1));
3939
if (!stat_only)
4040
printf("%s ", sha1_to_hex(sha1));
4141
if (!delta_chain_length) {

cache.h

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,9 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type,
780780
extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
781781
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
782782
extern int force_object_loose(const unsigned char *sha1, time_t mtime);
783+
extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
784+
extern int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
785+
extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
783786

784787
/* global flag to enable extra checks when accessing packed objects */
785788
extern int do_check_packed_object_crc;
@@ -1021,7 +1024,37 @@ extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
10211024
extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
10221025
extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
10231026
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
1024-
extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
1027+
extern int packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
1028+
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
1029+
1030+
struct object_info {
1031+
/* Request */
1032+
unsigned long *sizep;
1033+
1034+
/* Response */
1035+
enum {
1036+
OI_CACHED,
1037+
OI_LOOSE,
1038+
OI_PACKED,
1039+
OI_DBCACHED
1040+
} whence;
1041+
union {
1042+
/*
1043+
* struct {
1044+
* ... Nothing to expose in this case
1045+
* } cached;
1046+
* struct {
1047+
* ... Nothing to expose in this case
1048+
* } loose;
1049+
*/
1050+
struct {
1051+
struct packed_git *pack;
1052+
off_t offset;
1053+
unsigned int is_delta;
1054+
} packed;
1055+
} u;
1056+
};
1057+
extern int sha1_object_info_extended(const unsigned char *, struct object_info *);
10251058

10261059
/* Dumb servers support */
10271060
extern int update_server_info(int);
@@ -1135,6 +1168,7 @@ extern int convert_to_git(const char *path, const char *src, size_t len,
11351168
struct strbuf *dst, enum safe_crlf checksafe);
11361169
extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
11371170
extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
1171+
extern int can_bypass_conversion(const char *path);
11381172

11391173
/* add */
11401174
/*

convert.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,3 +813,26 @@ int renormalize_buffer(const char *path, const char *src, size_t len, struct str
813813
}
814814
return ret | convert_to_git(path, src, len, dst, 0);
815815
}
816+
817+
/*
818+
* You would be crazy to set CRLF, smuge/clean or ident to
819+
* a large binary blob you would want us not to slurp into
820+
* the memory!
821+
*/
822+
int can_bypass_conversion(const char *path)
823+
{
824+
struct conv_attrs ca;
825+
enum crlf_action crlf_action;
826+
827+
convert_attrs(&ca, path);
828+
829+
if (ca.ident ||
830+
(ca.drv && (ca.drv->smudge || ca.drv->clean)))
831+
return 0;
832+
833+
crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
834+
if ((crlf_action == CRLF_BINARY) || (crlf_action == CRLF_INPUT) ||
835+
(crlf_action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE))
836+
return 1;
837+
return 0;
838+
}

entry.c

Lines changed: 95 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "cache.h"
22
#include "blob.h"
33
#include "dir.h"
4+
#include "streaming.h"
45

56
static void create_directories(const char *path, int path_len,
67
const struct checkout *state)
@@ -91,6 +92,90 @@ static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
9192
return NULL;
9293
}
9394

95+
static int open_output_fd(char *path, struct cache_entry *ce, int to_tempfile)
96+
{
97+
int symlink = (ce->ce_mode & S_IFMT) != S_IFREG;
98+
if (to_tempfile) {
99+
strcpy(path, symlink
100+
? ".merge_link_XXXXXX" : ".merge_file_XXXXXX");
101+
return mkstemp(path);
102+
} else {
103+
return create_file(path, !symlink ? ce->ce_mode : 0666);
104+
}
105+
}
106+
107+
static int fstat_output(int fd, const struct checkout *state, struct stat *st)
108+
{
109+
/* use fstat() only when path == ce->name */
110+
if (fstat_is_reliable() &&
111+
state->refresh_cache && !state->base_dir_len) {
112+
fstat(fd, st);
113+
return 1;
114+
}
115+
return 0;
116+
}
117+
118+
static int streaming_write_entry(struct cache_entry *ce, char *path,
119+
const struct checkout *state, int to_tempfile,
120+
int *fstat_done, struct stat *statbuf)
121+
{
122+
struct git_istream *st;
123+
enum object_type type;
124+
unsigned long sz;
125+
int result = -1;
126+
ssize_t kept = 0;
127+
int fd = -1;
128+
129+
st = open_istream(ce->sha1, &type, &sz);
130+
if (!st)
131+
return -1;
132+
if (type != OBJ_BLOB)
133+
goto close_and_exit;
134+
135+
fd = open_output_fd(path, ce, to_tempfile);
136+
if (fd < 0)
137+
goto close_and_exit;
138+
139+
for (;;) {
140+
char buf[1024 * 16];
141+
ssize_t wrote, holeto;
142+
ssize_t readlen = read_istream(st, buf, sizeof(buf));
143+
144+
if (!readlen)
145+
break;
146+
if (sizeof(buf) == readlen) {
147+
for (holeto = 0; holeto < readlen; holeto++)
148+
if (buf[holeto])
149+
break;
150+
if (readlen == holeto) {
151+
kept += holeto;
152+
continue;
153+
}
154+
}
155+
156+
if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
157+
goto close_and_exit;
158+
else
159+
kept = 0;
160+
wrote = write_in_full(fd, buf, readlen);
161+
162+
if (wrote != readlen)
163+
goto close_and_exit;
164+
}
165+
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
166+
write(fd, "", 1) != 1))
167+
goto close_and_exit;
168+
*fstat_done = fstat_output(fd, state, statbuf);
169+
170+
close_and_exit:
171+
close_istream(st);
172+
if (0 <= fd)
173+
result = close(fd);
174+
if (result && 0 <= fd)
175+
unlink(path);
176+
return result;
177+
}
178+
94179
static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
95180
{
96181
unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
@@ -101,6 +186,12 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
101186
size_t wrote, newsize = 0;
102187
struct stat st;
103188

189+
if ((ce_mode_s_ifmt == S_IFREG) &&
190+
can_bypass_conversion(path) &&
191+
!streaming_write_entry(ce, path, state, to_tempfile,
192+
&fstat_done, &st))
193+
goto finish;
194+
104195
switch (ce_mode_s_ifmt) {
105196
case S_IFREG:
106197
case S_IFLNK:
@@ -128,30 +219,16 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
128219
size = newsize;
129220
}
130221

131-
if (to_tempfile) {
132-
if (ce_mode_s_ifmt == S_IFREG)
133-
strcpy(path, ".merge_file_XXXXXX");
134-
else
135-
strcpy(path, ".merge_link_XXXXXX");
136-
fd = mkstemp(path);
137-
} else if (ce_mode_s_ifmt == S_IFREG) {
138-
fd = create_file(path, ce->ce_mode);
139-
} else {
140-
fd = create_file(path, 0666);
141-
}
222+
fd = open_output_fd(path, ce, to_tempfile);
142223
if (fd < 0) {
143224
free(new);
144225
return error("unable to create file %s (%s)",
145226
path, strerror(errno));
146227
}
147228

148229
wrote = write_in_full(fd, new, size);
149-
/* use fstat() only when path == ce->name */
150-
if (fstat_is_reliable() &&
151-
state->refresh_cache && !to_tempfile && !state->base_dir_len) {
152-
fstat(fd, &st);
153-
fstat_done = 1;
154-
}
230+
if (!to_tempfile)
231+
fstat_done = fstat_output(fd, state, &st);
155232
close(fd);
156233
free(new);
157234
if (wrote != size)
@@ -167,6 +244,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
167244
return error("unknown file mode for %s in index", path);
168245
}
169246

247+
finish:
170248
if (state->refresh_cache) {
171249
if (!fstat_done)
172250
lstat(ce->name, &st);

0 commit comments

Comments
 (0)