Skip to content

Commit 058b141

Browse files
committed
Py_UniversalNewlineFread(): Many changes.
+ Continued looping until n bytes in the buffer have been filled, not just when n bytes have been read from the file. This repairs the bug that f.readlines() only sucked up the first 8192 bytes of the file on Windows when universal newlines was enabled and f was opened in U mode (see Python-Dev -- this was the ultimate cause of the test_inspect.py failure). + Changed prototye to take a char* buffer (void* doesn't make much sense). + Squashed size_t vs int mismatches (in particular, besides the unsigned vs signed distinction, size_t may be larger than int). + Gets out under all error conditions now (it's possible for fread() to suffer an error even if it returns a number larger than 0 -- any "short read" is an error or EOF condition). + Rearranged and simplified declarations.
1 parent ea572b2 commit 058b141

File tree

2 files changed

+52
-42
lines changed

2 files changed

+52
-42
lines changed

Include/fileobject.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ extern DL_IMPORT(int) PyFile_WriteString(const char *, PyObject *);
4141
extern DL_IMPORT(int) PyObject_AsFileDescriptor(PyObject *);
4242

4343
/* The default encoding used by the platform file system APIs
44-
If non-NULL, this is different than the default encoding for strings
44+
If non-NULL, this is different than the default encoding for strings
4545
*/
4646
extern DL_IMPORT(const char *) Py_FileSystemDefaultEncoding;
4747

@@ -51,12 +51,12 @@ extern DL_IMPORT(const char *) Py_FileSystemDefaultEncoding;
5151
*/
5252
#define PY_STDIOTEXTMODE "b"
5353
char *Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
54-
size_t Py_UniversalNewlineFread(void *, size_t, FILE *, PyObject *);
54+
size_t Py_UniversalNewlineFread(char *, size_t, FILE *, PyObject *);
5555
#else
5656
#define PY_STDIOTEXTMODE ""
57-
#define Py_UniversalNewlineFgets(buf, len, fp, obj) (fgets((buf), (len), (fp)))
58-
#define Py_UniversalNewlineFread(buf, len, fp, obj) \
59-
(fread((buf), 1, (len), (fp)))
57+
#define Py_UniversalNewlineFgets(buf, len, fp, obj) fgets((buf), (len), (fp))
58+
#define Py_UniversalNewlineFread(buf, len, fp, obj)
59+
fread((buf), 1, (len), (fp))
6060
#endif /* WITH_UNIVERSAL_NEWLINES */
6161
#ifdef __cplusplus
6262
}

Objects/fileobject.c

Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,7 +1228,7 @@ file_readlines(PyFileObject *f, PyObject *args)
12281228
else {
12291229
Py_BEGIN_ALLOW_THREADS
12301230
errno = 0;
1231-
nread = Py_UniversalNewlineFread(buffer+nfilled,
1231+
nread = Py_UniversalNewlineFread(buffer+nfilled,
12321232
buffersize-nfilled, f->f_fp, (PyObject *)f);
12331233
Py_END_ALLOW_THREADS
12341234
shortread = (nread < buffersize-nfilled);
@@ -1943,7 +1943,7 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
19431943
int newlinetypes = 0;
19441944
int skipnextlf = 0;
19451945
int univ_newline = 1;
1946-
1946+
19471947
if (fobj) {
19481948
if (!PyFile_Check(fobj)) {
19491949
errno = ENXIO; /* What can you do... */
@@ -2024,61 +2024,71 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
20242024
** the different types of newlines seen.
20252025
*/
20262026
size_t
2027-
Py_UniversalNewlineFread(void *buf, size_t n,
2027+
Py_UniversalNewlineFread(char *buf, size_t n,
20282028
FILE *stream, PyObject *fobj)
20292029
{
2030-
char *src = buf, *dst = buf, c;
2031-
int nread, ntodo=n;
2032-
int newlinetypes, skipnextlf, univ_newline;
2033-
2030+
char *dst = buf;
2031+
PyFileObject *f = (PyFileObject *)fobj;
2032+
int newlinetypes, skipnextlf;
2033+
2034+
assert(buf != NULL);
2035+
assert(stream != NULL);
2036+
20342037
if (!fobj || !PyFile_Check(fobj)) {
20352038
errno = ENXIO; /* What can you do... */
20362039
return -1;
20372040
}
2038-
univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2039-
if ( !univ_newline )
2041+
if (!f->f_univ_newline)
20402042
return fread(buf, 1, n, stream);
2041-
newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2042-
skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2043-
while (ntodo > 0) {
2044-
if (ferror(stream))
2045-
break;
2046-
nread = fread(dst, 1, ntodo, stream);
2047-
src = dst;
2048-
if (nread <= 0) {
2049-
if (skipnextlf)
2050-
newlinetypes |= NEWLINE_CR;
2051-
break;
2052-
}
2053-
ntodo -= nread;
2054-
while ( nread-- ) {
2055-
c = *src++;
2043+
newlinetypes = f->f_newlinetypes;
2044+
skipnextlf = f->f_skipnextlf;
2045+
/* Invariant: n is the number of bytes remaining to be filled
2046+
* in the buffer.
2047+
*/
2048+
while (n) {
2049+
size_t nread;
2050+
int shortread;
2051+
char *src = dst;
2052+
2053+
nread = fread(dst, 1, n, stream);
2054+
assert(nread <= n);
2055+
shortread = nread != n; /* true iff EOF or error */
2056+
while (nread--) {
2057+
char c = *src++;
20562058
if (c == '\r') {
2057-
/* Save CR as LF and set flag to skip next newline
2058-
*/
2059+
/* Save as LF and set flag to skip next LF. */
20592060
*dst++ = '\n';
2061+
--n;
20602062
skipnextlf = 1;
2061-
} else if (skipnextlf && c == '\n') {
2062-
/* Skip an LF, and remember that we saw CR LF
2063-
*/
2063+
}
2064+
else if (skipnextlf && c == '\n') {
2065+
/* Skip LF, and remember we saw CR LF. */
20642066
skipnextlf = 0;
20652067
newlinetypes |= NEWLINE_CRLF;
2066-
} else {
2067-
/* Normal char to be stored in buffer. Also update
2068-
** the newlinetypes flag if either this is an LF
2069-
** or the previous char was a CR.
2070-
*/
2068+
}
2069+
else {
2070+
/* Normal char to be stored in buffer. Also
2071+
* update the newlinetypes flag if either this
2072+
* is an LF or the previous char was a CR.
2073+
*/
20712074
if (c == '\n')
20722075
newlinetypes |= NEWLINE_LF;
20732076
else if (skipnextlf)
20742077
newlinetypes |= NEWLINE_CR;
20752078
*dst++ = c;
2079+
--n;
20762080
skipnextlf = 0;
20772081
}
20782082
}
2083+
if (shortread) {
2084+
/* If this is EOF, update type flags. */
2085+
if (skipnextlf && feof(stream))
2086+
newlinetypes |= NEWLINE_CR;
2087+
break;
2088+
}
20792089
}
2080-
((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2081-
((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2082-
return dst - (char *)buf;
2090+
f->f_newlinetypes = newlinetypes;
2091+
f->f_skipnextlf = skipnextlf;
2092+
return dst - buf;
20832093
}
20842094
#endif

0 commit comments

Comments
 (0)