@@ -23,14 +23,24 @@ import * as fs from 'fs';
2323import * as stream from 'stream' ;
2424import { valueToString } from '../util/pretty' ;
2525import { RecordBatch , RecordBatchReader , AsyncByteQueue } from '../Arrow.node' ;
26+ import { Schema } from '../schema' ;
2627
2728const padLeft = require ( 'pad-left' ) ;
2829const bignumJSONParse = require ( 'json-bignum' ) . parse ;
2930const pipeline = require ( 'util' ) . promisify ( stream . pipeline ) ;
3031const argv = require ( `command-line-args` ) ( cliOpts ( ) , { partial : true } ) ;
3132const files = argv . help ? [ ] : [ ...( argv . file || [ ] ) , ...( argv . _unknown || [ ] ) ] . filter ( Boolean ) ;
3233
33- const state = { ...argv , closed : false , hasRecords : false } ;
34+ const state = { ...argv , closed : false , maxColWidths : [ 10 ] } ;
35+
36+ type ToStringState = {
37+ hr : string ;
38+ sep: string ;
39+ schema: any ;
40+ closed: boolean ;
41+ metadata: boolean ;
42+ maxColWidths: number [ ] ;
43+ } ;
3444
3545( async ( ) => {
3646
@@ -40,20 +50,22 @@ const state = { ...argv, closed: false, hasRecords: false };
4050 ] . filter ( Boolean ) as ( ( ) => NodeJS . ReadableStream ) [ ] ;
4151
4252 let reader : RecordBatchReader | null ;
53+ let hasReaders = false ;
4354
4455 for ( const source of sources ) {
4556 if ( state . closed ) { break ; }
4657 for await ( reader of recordBatchReaders ( source ) ) {
58+ hasReaders = true ;
4759 const source = reader . toNodeStream ( ) ;
48- const xform = batchesToString ( state ) ;
60+ const xform = batchesToString ( state , reader . schema ) ;
4961 const sink = new stream . PassThrough ( ) ;
5062 sink . pipe ( process . stdout , { end : false } ) ;
5163 await pipeline ( source , xform , sink ) . catch ( ( ) => state . closed = true ) ;
5264 }
5365 if ( state . closed ) { break ; }
5466 }
5567
56- return state . hasRecords ? 0 : print_usage ( ) ;
68+ return hasReaders ? 0 : print_usage ( ) ;
5769} ) ( )
5870. then ( ( x ) => + x || 0 , ( err ) => {
5971 if ( err ) {
@@ -93,44 +105,96 @@ async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStre
93105 }
94106}
95107
96- function batchesToString ( state : { closed : boolean , schema : any , separator : string , hasRecords : boolean } ) {
108+ function batchesToString ( state : ToStringState , schema : Schema ) {
109+
110+ let rowId = 0 ;
111+ let batchId = - 1 ;
112+ let maxColWidths = [ 10 ] ;
113+ const { hr, sep } = state ;
97114
98- let rowId = 0 , maxColWidths = [ 15 ] , separator = `${ state . separator || ' |' } ` ;
115+ const header = [ 'row_id' , ... schema . fields . map ( ( f ) => `${ f } ` ) ] . map ( valueToString ) ;
99116
100- return new stream . Transform ( { transform, encoding : 'utf8' , writableObjectMode : true , readableObjectMode : false } ) ;
117+ state . maxColWidths = header . map ( ( x , i ) => Math . max ( maxColWidths [ i ] || 0 , x . length ) ) ;
118+
119+ return new stream . Transform ( {
120+ transform,
121+ encoding : 'utf8' ,
122+ writableObjectMode : true ,
123+ readableObjectMode : false ,
124+ final ( this : stream . Transform , cb : ( error ? : Error | null ) => void ) {
125+ // if there were no batches, then print the Schema, and metadata
126+ if ( batchId === - 1 ) {
127+ this . push ( `${ horizontalRule ( state . maxColWidths , hr , sep ) } \n\n` ) ;
128+ this . push ( `${ formatRow ( header , maxColWidths , sep ) } \n` ) ;
129+ if ( state . metadata && schema . metadata . size > 0 ) {
130+ this . push ( `metadata:\n${ formatMetadata ( schema . metadata ) } \n` ) ;
131+ }
132+ }
133+ this . push ( `${ horizontalRule ( state . maxColWidths , hr , sep ) } \n\n` ) ;
134+ cb ( ) ;
135+ }
136+ } ) ;
101137
102138 function transform ( this : stream . Transform , batch : RecordBatch , _enc : string , cb : ( error ? : Error , data ? : any ) = > void ) {
139+
103140 batch = ! ( state . schema && state . schema . length ) ? batch : batch . select ( ...state . schema ) ;
104- if ( batch . length <= 0 || batch . numCols <= 0 || state . closed ) {
105- state . hasRecords || ( state . hasRecords = false ) ;
106- return cb ( undefined , null ) ;
107- }
108141
109- state . hasRecords = true ;
110- const header = [ 'row_id' , ...batch . schema . fields . map ( ( f ) => `${ f } ` ) ] . map ( valueToString ) ;
142+ if ( state . closed ) { return cb ( undefined , null ) ; }
111143
112144 // Pass one to convert to strings and count max column widths
113- const newMaxWidths = measureColumnWidths ( rowId , batch , header . map ( ( x , i ) => Math . max ( maxColWidths [ i ] || 0 , x . length ) ) ) ;
145+ state . maxColWidths = measureColumnWidths ( rowId , batch , header . map ( ( x , i ) => Math . max ( maxColWidths [ i ] || 0 , x . length ) ) ) ;
114146
115- // If any of the column widths changed, print the header again
116- if ( ( rowId % 350 ) && JSON . stringify ( newMaxWidths ) !== JSON . stringify ( maxColWidths ) ) {
117- this . push ( `\n${ formatRow ( header , newMaxWidths , separator ) } ` ) ;
147+ // If this is the first batch in a stream, print a top horizontal rule, schema metadata, and
148+ if ( ++ batchId === 0 ) {
149+ this . push ( `${ horizontalRule ( state . maxColWidths , hr , sep ) } \n` ) ;
150+ if ( state . metadata && batch . schema . metadata . size > 0 ) {
151+ this . push ( `metadata:\n${ formatMetadata ( batch . schema . metadata ) } \n` ) ;
152+ this . push ( `${ horizontalRule ( state . maxColWidths , hr , sep ) } \n` ) ;
153+ }
154+ if ( batch . length <= 0 || batch . numCols <= 0 ) {
155+ this . push ( `${ formatRow ( header , maxColWidths = state . maxColWidths , sep ) } \n` ) ;
156+ }
118157 }
119158
120- maxColWidths = newMaxWidths ;
121-
122- for ( const row of batch ) {
123- if ( state . closed ) { break ; }
124- else if ( ! row ) { continue ; }
125- if ( ! ( rowId % 350 ) ) { this . push ( `\n${ formatRow ( header , maxColWidths , separator ) } ` ) ; }
126- this . push ( formatRow ( [ rowId ++ , ...row ] . map ( valueToString ) , maxColWidths , separator ) ) ;
159+ if ( batch . length > 0 && batch . numCols > 0 ) {
160+ // If any of the column widths changed, print the header again
161+ if ( rowId % 350 !== 0 && JSON . stringify ( state . maxColWidths ) !== JSON . stringify ( maxColWidths ) ) {
162+ this . push ( `${ formatRow ( header , state . maxColWidths , sep ) } \n` ) ;
163+ }
164+ maxColWidths = state . maxColWidths ;
165+ for ( const row of batch ) {
166+ if ( state . closed ) { break ; } else if ( ! row ) { continue ; }
167+ if ( rowId ++ % 350 === 0 ) {
168+ this . push ( `${ formatRow ( header , maxColWidths , sep ) } \n` ) ;
169+ }
170+ this . push ( `${ formatRow ( [ rowId , ...row ] . map ( valueToString ) , maxColWidths , sep ) } \n` ) ;
171+ }
127172 }
128173 cb ( ) ;
129174 }
130175}
131176
132- function formatRow ( row : string [ ] = [ ] , maxColWidths : number [ ] = [ ] , separator : string = ' |' ) {
133- return row . map ( ( x , j ) => padLeft ( x , maxColWidths [ j ] ) ) . join ( separator ) + '\n' ;
177+ function horizontalRule ( maxColWidths : number [ ] , hr = '-' , sep = ' |' ) {
178+ return ` ${padLeft ( '' , maxColWidths . reduce ( ( x , y ) => x + y , - 2 + maxColWidths . length * sep . length ) , hr ) } `;
179+ }
180+
181+ function formatRow(row: string[] = [], maxColWidths: number[] = [], sep = ' |') {
182+ return ` $ { row . map ( ( x , j ) => padLeft ( x , maxColWidths [ j ] ) ) . join ( sep ) } `;
183+ }
184+
185+ function formatMetadata(metadata: Map<string, string>) {
186+
187+ return [...metadata].map(([key, val]) =>
188+ ` $ { key } : ${formatMetadataValue ( val ) } `
189+ ).join(', \n');
190+
191+ function formatMetadataValue(value: string = '') {
192+ let parsed = value;
193+ try {
194+ parsed = JSON.stringify(JSON.parse(value), null, 2);
195+ } catch (e) { parsed = value; }
196+ return valueToString(parsed).split('\n').join('\n ');
197+ }
134198}
135199
136200function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: number[] = []) {
@@ -201,8 +265,19 @@ function cliOpts() {
201265 } ,
202266 {
203267 type : String ,
204- name : 'sep' , optional : true , default : '|' ,
205- description : 'The column separator character'
268+ name : 'sep' , optional : true , default : ' |' ,
269+ description : 'The column separator character (default: " |")'
270+ } ,
271+ {
272+ type : String ,
273+ name : 'hr' , optional : true , default : '-' ,
274+ description : 'The horizontal border character (default: "-")'
275+ } ,
276+ {
277+ type : Boolean ,
278+ name : 'metadata' , alias : 'm' ,
279+ optional : true , default : false ,
280+ description : 'Flag to print Schema metadata (default: false)'
206281 } ,
207282 {
208283 type : Boolean ,
@@ -234,14 +309,15 @@ function print_usage() {
234309 {
235310 header : 'Example' ,
236311 content : [
237- '$ arrow2csv --schema foo baz -f simple.arrow --sep ","' ,
238- ' ' ,
239- '> "row_id", "foo: Int32", "bar: Float64", "baz: Utf8"' ,
240- '> 0, 1, 1, "aa"' ,
241- '> 1, null, null, null' ,
242- '> 2, 3, null, null' ,
243- '> 3, 4, 4, "bbb"' ,
244- '> 4, 5, 5, "cccc"' ,
312+ '$ arrow2csv --schema foo baz --sep "," -f simple.arrow' ,
313+ '>--------------------------------------' ,
314+ '> "row_id", "foo: Int32", "baz: Utf8"' ,
315+ '> 0, 1, "aa"' ,
316+ '> 1, null, null' ,
317+ '> 2, 3, null' ,
318+ '> 3, 4, "bbb"' ,
319+ '> 4, 5, "cccc"' ,
320+ '>--------------------------------------' ,
245321 ]
246322 }
247323 ] ) ) ;
0 commit comments