@@ -32,7 +32,21 @@ import { JSONVectorAssembler } from '../visitor/jsonvectorassembler';
3232import { ArrayBufferViewInput , toUint8Array } from '../util/buffer' ;
3333import { RecordBatch , _InternalEmptyPlaceholderRecordBatch } from '../recordbatch' ;
3434import { Writable , ReadableInterop , ReadableDOMStreamOptions } from '../io/interfaces' ;
35- import { isPromise , isAsyncIterable , isWritableDOMStream , isWritableNodeStream , isIterable } from '../util/compat' ;
35+ import { isPromise , isAsyncIterable , isWritableDOMStream , isWritableNodeStream , isIterable , isObject } from '../util/compat' ;
36+
37+ export interface RecordBatchStreamWriterOptions {
38+ /**
39+ *
40+ */
41+ autoDestroy ?: boolean ;
42+ /**
43+ * A flag indicating whether the RecordBatchWriter should construct pre-0.15.0
44+ * encapsulated IPC Messages, which reserves 4 bytes for the Message metadata
45+ * length instead of 8.
46+ * @see https://issues.apache.org/jira/browse/ARROW-6313
47+ */
48+ writeLegacyIpcFormat ?: boolean ;
49+ }
3650
3751export class RecordBatchWriter < T extends { [ key : string ] : DataType } = any > extends ReadableInterop < Uint8Array > implements Writable < RecordBatch < T > > {
3852
@@ -51,14 +65,17 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
5165 throw new Error ( `"throughDOM" not available in this environment` ) ;
5266 }
5367
54- constructor ( options ?: { autoDestroy : boolean } ) {
68+ constructor ( options ?: RecordBatchStreamWriterOptions ) {
5569 super ( ) ;
56- this . _autoDestroy = options && ( typeof options . autoDestroy === 'boolean' ) ? options . autoDestroy : true ;
70+ isObject ( options ) || ( options = { autoDestroy : true , writeLegacyIpcFormat : false } ) ;
71+ this . _autoDestroy = ( typeof options . autoDestroy === 'boolean' ) ? options . autoDestroy : true ;
72+ this . _writeLegacyIpcFormat = ( typeof options . writeLegacyIpcFormat === 'boolean' ) ? options . writeLegacyIpcFormat : false ;
5773 }
5874
5975 protected _position = 0 ;
6076 protected _started = false ;
6177 protected _autoDestroy : boolean ;
78+ protected _writeLegacyIpcFormat : boolean ;
6279 // @ts -ignore
6380 protected _sink = new AsyncByteQueue ( ) ;
6481 protected _schema : Schema | null = null ;
@@ -178,17 +195,22 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
178195 const a = alignment - 1 ;
179196 const buffer = Message . encode ( message ) ;
180197 const flatbufferSize = buffer . byteLength ;
181- const alignedSize = ( flatbufferSize + 4 + a ) & ~ a ;
182- const nPaddingBytes = alignedSize - flatbufferSize - 4 ;
198+ const prefixSize = ! this . _writeLegacyIpcFormat ? 8 : 4 ;
199+ const alignedSize = ( flatbufferSize + prefixSize + a ) & ~ a ;
200+ const nPaddingBytes = alignedSize - flatbufferSize - prefixSize ;
183201
184202 if ( message . headerType === MessageHeader . RecordBatch ) {
185203 this . _recordBatchBlocks . push ( new FileBlock ( alignedSize , message . bodyLength , this . _position ) ) ;
186204 } else if ( message . headerType === MessageHeader . DictionaryBatch ) {
187205 this . _dictionaryBlocks . push ( new FileBlock ( alignedSize , message . bodyLength , this . _position ) ) ;
188206 }
189207
208+ // If not in legacy pre-0.15.0 mode, write the stream continuation indicator
209+ if ( ! this . _writeLegacyIpcFormat ) {
210+ this . _write ( Int32Array . of ( - 1 ) ) ;
211+ }
190212 // Write the flatbuffer size prefix including padding
191- this . _write ( Int32Array . of ( alignedSize - 4 ) ) ;
213+ this . _write ( Int32Array . of ( alignedSize - prefixSize ) ) ;
192214 // Write the flatbuffer
193215 if ( flatbufferSize > 0 ) { this . _write ( buffer ) ; }
194216 // Write any padding
@@ -212,7 +234,10 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
212234
213235 // @ts -ignore
214236 protected _writeFooter ( schema : Schema < T > ) {
215- return this . _writePadding ( 4 ) ; // eos bytes
237+ // eos bytes
238+ return this . _writeLegacyIpcFormat
239+ ? this . _write ( Int32Array . of ( 0 ) )
240+ : this . _write ( Int32Array . of ( - 1 , 0 ) ) ;
216241 }
217242
218243 protected _writeMagic ( ) {
@@ -275,12 +300,12 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
275300
276301/** @ignore */
277302export class RecordBatchStreamWriter < T extends { [ key : string ] : DataType } = any > extends RecordBatchWriter < T > {
278- public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : Table < T > | Iterable < RecordBatch < T > > , options ?: { autoDestroy : true } ) : RecordBatchStreamWriter < T > ;
279- public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : AsyncIterable < RecordBatch < T > > , options ?: { autoDestroy : true } ) : Promise < RecordBatchStreamWriter < T > > ;
280- public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : PromiseLike < AsyncIterable < RecordBatch < T > > > , options ?: { autoDestroy : true } ) : Promise < RecordBatchStreamWriter < T > > ;
281- public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : PromiseLike < Table < T > | Iterable < RecordBatch < T > > > , options ?: { autoDestroy : true } ) : Promise < RecordBatchStreamWriter < T > > ;
303+ public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : Table < T > | Iterable < RecordBatch < T > > , options ?: RecordBatchStreamWriterOptions ) : RecordBatchStreamWriter < T > ;
304+ public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : AsyncIterable < RecordBatch < T > > , options ?: RecordBatchStreamWriterOptions ) : Promise < RecordBatchStreamWriter < T > > ;
305+ public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : PromiseLike < AsyncIterable < RecordBatch < T > > > , options ?: RecordBatchStreamWriterOptions ) : Promise < RecordBatchStreamWriter < T > > ;
306+ public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : PromiseLike < Table < T > | Iterable < RecordBatch < T > > > , options ?: RecordBatchStreamWriterOptions ) : Promise < RecordBatchStreamWriter < T > > ;
282307 /** @nocollapse */
283- public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : any , options ?: { autoDestroy : true } ) {
308+ public static writeAll < T extends { [ key : string ] : DataType } = any > ( input : any , options ?: RecordBatchStreamWriterOptions ) {
284309 const writer = new RecordBatchStreamWriter < T > ( options ) ;
285310 if ( isPromise < any > ( input ) ) {
286311 return input . then ( ( x ) => writer . writeAll ( x ) ) ;
@@ -323,8 +348,8 @@ export class RecordBatchFileWriter<T extends { [key: string]: DataType } = any>
323348 schema , MetadataVersion . V4 ,
324349 this . _recordBatchBlocks , this . _dictionaryBlocks
325350 ) ) ;
326- return this
327- . _writePadding ( 4 ) // EOS bytes for sequential readers
351+ return super
352+ . _writeFooter ( schema ) // EOS bytes for sequential readers
328353 . _write ( buffer ) // Write the flatbuffer
329354 . _write ( Int32Array . of ( buffer . byteLength ) ) // then the footer size suffix
330355 . _writeMagic ( ) ; // then the magic suffix
@@ -355,6 +380,8 @@ export class RecordBatchJSONWriter<T extends { [key: string]: DataType } = any>
355380 }
356381
357382 protected _writeMessage ( ) { return this ; }
383+ // @ts -ignore
384+ protected _writeFooter ( schema : Schema < T > ) { return this ; }
358385 protected _writeSchema ( schema : Schema < T > ) {
359386 return this . _write ( `{\n "schema": ${
360387 JSON . stringify ( { fields : schema . fields . map ( fieldToJSON ) } , null , 2 )
0 commit comments