-
Notifications
You must be signed in to change notification settings - Fork 227
Expand file tree
/
Copy path_csv.java
More file actions
234 lines (206 loc) · 9.45 KB
/
_csv.java
File metadata and controls
234 lines (206 loc) · 9.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
/* Copyright (c) Jython Developers */
package org.python.modules._csv;
import org.python.core.ArgParser;
import org.python.core.ClassDictInit;
import org.python.core.Py;
import org.python.core.PyBaseString;
import org.python.core.PyDictionary;
import org.python.core.PyException;
import org.python.core.PyInteger;
import org.python.core.PyObject;
import org.python.core.PyString;
import org.python.core.PyStringMap;
/**
* The Python _csv module.
*
* Provides the low-level underpinnings of a CSV reading/writing module. Users should not
* use this module directly, but import the csv.py module instead.
*/
public class _csv implements ClassDictInit {
public static PyString __doc__ = Py.newString(
"CSV parsing and writing.\n" +
"\n" +
"This module provides classes that assist in the reading and writing\n" +
"of Comma Separated Value (CSV) files, and implements the interface\n" +
"described by PEP 305. Although many CSV files are simple to parse,\n" +
"the format is not formally defined by a stable specification and\n" +
"is subtle enough that parsing lines of a CSV file with something\n" +
"like line.split(\",\") is bound to fail. The module supports three\n" +
"basic APIs: reading, writing, and registration of dialects.\n" +
"\n" +
"\n" +
"DIALECT REGISTRATION:\n" +
"\n" +
"Readers and writers support a dialect argument, which is a convenient\n" +
"handle on a group of settings. When the dialect argument is a string,\n" +
"it identifies one of the dialects previously registered with the module.\n" +
"If it is a class or instance, the attributes of the argument are used as\n" +
"the settings for the reader or writer:\n" +
"\n" +
" class excel:\n" +
" delimiter = ','\n" +
" quotechar = '\"'\n" +
" escapechar = None\n" +
" doublequote = True\n" +
" skipinitialspace = False\n" +
" lineterminator = '\r\n'\n" +
" quoting = QUOTE_MINIMAL\n" +
"\n" +
"SETTINGS:\n" +
"\n" +
" * quotechar - specifies a one-character string to use as the \n" +
" quoting character. It defaults to '\"'.\n" +
" * delimiter - specifies a one-character string to use as the \n" +
" field separator. It defaults to ','.\n" +
" * skipinitialspace - specifies how to interpret whitespace which\n" +
" immediately follows a delimiter. It defaults to False, which\n" +
" means that whitespace immediately following a delimiter is part\n" +
" of the following field.\n" +
" * lineterminator - specifies the character sequence which should \n" +
" terminate rows.\n" +
" * quoting - controls when quotes should be generated by the writer.\n" +
" It can take on any of the following module constants:\n" +
"\n" +
" csv.QUOTE_MINIMAL means only when required, for example, when a\n" +
" field contains either the quotechar or the delimiter\n" +
" csv.QUOTE_ALL means that quotes are always placed around fields.\n" +
" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" +
" fields which do not parse as integers or floating point\n" +
" numbers.\n" +
" csv.QUOTE_NONE means that quotes are never placed around fields.\n" +
" * escapechar - specifies a one-character string used to escape \n" +
" the delimiter when quoting is set to QUOTE_NONE.\n" +
" * doublequote - controls the handling of quotes inside fields. When\n" +
" True, two consecutive quotes are interpreted as one during read,\n" +
" and when writing, each quote character embedded in the data is\n" +
" written as two quotes\n" +
"\n");
// XXX: should be per PySystemState
/** Dialect registry. */
public static PyDictionary _dialects = new PyDictionary();
// XXX: should be per PySystemState
/** Max parsed field size */
public static volatile int field_limit = 128 * 1024;
/** _csv.Error exception. */
public static final PyObject Error = Py.makeClass("Error", Py.Exception, exceptionNamespace());
public static PyException Error(String message) {
return new PyException(Error, message);
}
/** Module version. */
public static PyString __version__ = new PyString("1.0");
public static void classDictInit(PyObject dict) {
dict.__setitem__("__name__", Py.newString("_csv"));
dict.__setitem__("__doc__", __doc__);
dict.__setitem__("Dialect", PyDialect.TYPE);
dict.__setitem__("Error", Error);
for (QuoteStyle style : QuoteStyle.values()) {
dict.__setitem__(style.name(), Py.newInteger(style.ordinal()));
}
dict.__setitem__("classDictInit", null);
dict.__setitem__("field_limit", null);
}
public static void register_dialect(PyObject[] args, String[] keywords) {
int argc = args.length - keywords.length;
if (argc > 2) {
throw Py.TypeError("register_dialect() expected at most 2 arguments, got " + argc);
}
ArgParser ap = parseArgs("register_dialect", args, keywords);
PyObject name = ap.getPyObject(0);
PyObject dialect = ap.getPyObject(1, null);
if (!(name instanceof PyBaseString)) {
throw Py.TypeError("dialect name must be a string or unicode");
}
_dialects.__setitem__(name, dialectFromKwargs(dialect, args, keywords));
return;
}
public static void unregister_dialect(PyObject name) {
if (!_dialects.has_key(name)) {
throw Error("unknown dialect");
}
_dialects.__delitem__(name);
}
public static PyObject get_dialect(PyObject name) {
return get_dialect_from_registry(name);
}
public static PyObject list_dialects() {
return _dialects.keys();
}
public static PyObject reader(PyObject[] args, String[] keywords) {
ArgParser ap = parseArgs("reader", args, keywords);
PyObject iterator = Py.iter(ap.getPyObject(0), "argument 1 must be an iterator");
PyObject dialect = ap.getPyObject(1, null);
return new PyReader(iterator, dialectFromKwargs(dialect, args, keywords));
}
public static PyObject writer(PyObject[] args, String[] keywords) {
ArgParser ap = parseArgs("writer", args, keywords);
PyObject outputFile = ap.getPyObject(0);
PyObject dialect = ap.getPyObject(1, null);
PyObject writeline = outputFile.__findattr__("write");
if (writeline == null || !writeline.isCallable()) {
throw Py.TypeError("argument 1 must have a \"write\" method");
}
return new PyWriter(writeline, dialectFromKwargs(dialect, args, keywords));
}
public static PyInteger field_size_limit() {
return Py.newInteger(field_limit);
}
public static PyInteger field_size_limit(PyObject new_limit) {
if (!(new_limit instanceof PyInteger)) {
throw Py.TypeError("limit must be an integer");
}
int old_limit = field_limit;
field_limit = new_limit.asInt();
return Py.newInteger(old_limit);
}
static PyObject get_dialect_from_registry(PyObject name) {
PyObject dialect = _dialects.__finditem__(name);
if (dialect == null) {
throw Error("unknown dialect");
}
return dialect;
}
/**
* Return an ArgParser that ignores keyword args.
*/
private static ArgParser parseArgs(String funcName, PyObject[] args, String[] keywords) {
// XXX: _weakref.ReferenceType has the same code
if (keywords.length > 0) {
int argc = args.length - keywords.length;
PyObject[] justArgs = new PyObject[argc];
System.arraycopy(args, 0, justArgs, 0, argc);
args = justArgs;
}
return new ArgParser(funcName, args, Py.NoKeywords, Py.NoKeywords);
}
/**
* Return a Dialect instance created or updated from keyword arguments.
*/
private static PyDialect dialectFromKwargs(PyObject dialect, PyObject[] args,
String[] keywords) {
PyObject[] dialectArgs;
int argc = args.length - keywords.length;
// was a dialect keyword specified?
boolean dialectKeyword = false;
for (String keyword : keywords) {
if (keyword.equals("dialect")) {
dialectKeyword = true;
}
}
if (dialect == null || dialectKeyword) {
// dialect wasn't passed as a positional arg
dialectArgs = new PyObject[keywords.length];
System.arraycopy(args, argc, dialectArgs, 0, keywords.length);
} else {
// have dialect -- pass it to dialect_new as a positional arg
dialectArgs = new PyObject[1 + keywords.length];
dialectArgs[0] = dialect;
System.arraycopy(args, argc, dialectArgs, 1, keywords.length);
}
return (PyDialect)PyDialect.TYPE.__call__(dialectArgs, keywords);
}
private static PyObject exceptionNamespace() {
PyObject dict = new PyStringMap();
dict.__setitem__("__module__", new PyString("_csv"));
return dict;
}
}