1- use crate :: datasource:: schema_adapter:: { SchemaAdapter , SchemaMapper } ;
2- use arrow_array:: RecordBatch ;
3- use arrow_schema:: { Fields , Schema , SchemaRef } ;
4- use datafusion_common:: deep:: { can_rewrite_field, try_rewrite_record_batch, try_rewrite_record_batch_with_mappings} ;
5- use datafusion_common:: plan_err;
6- use std:: sync:: Arc ;
7- use log:: trace;
8-
9- #[ derive( Clone , Debug ) ]
10- pub ( crate ) struct NestedSchemaAdapter {
11- /// The schema for the table, projected to include only the fields being output (projected) by the
12- /// associated ParquetExec
13- pub projected_table_schema : SchemaRef ,
14- /// The entire table schema for the table we're using this to adapt.
15- ///
16- /// This is used to evaluate any filters pushed down into the scan
17- /// which may refer to columns that are not referred to anywhere
18- /// else in the plan.
19- pub table_schema : SchemaRef ,
20- }
21-
22- impl NestedSchemaAdapter {
23- fn map_schema_nested (
24- & self ,
25- fields : & Fields ,
26- ) -> datafusion_common:: Result < ( Arc < NestedSchemaMapping > , Vec < usize > ) > {
27- let mut projection = Vec :: with_capacity ( fields. len ( ) ) ;
28- let mut field_mappings = vec ! [ None ; self . table_schema. fields( ) . len( ) ] ;
29-
30- // start from the destination fields
31- for ( table_idx, table_field) in self . table_schema . fields . iter ( ) . enumerate ( ) {
32- // if the file exists in the source, check if we can rewrite it to the destination,
33- // and add it to the projections
34- if let Some ( ( file_idx, file_field) ) = fields. find ( table_field. name ( ) ) {
35- if can_rewrite_field ( table_field. clone ( ) , file_field. clone ( ) , true ) {
36- field_mappings[ table_idx] = Some ( projection. len ( ) ) ;
37- projection. push ( file_idx) ;
38- } else {
39- return plan_err ! (
40- "Cannot cast file schema field {} of type {:?} to table schema field of type {:?}" ,
41- file_field. name( ) ,
42- file_field. data_type( ) ,
43- table_field. data_type( )
44- ) ;
45- }
46- }
47- }
48- Ok ( (
49- Arc :: new ( NestedSchemaMapping {
50- projected_table_schema : self . projected_table_schema . clone ( ) ,
51- field_mappings,
52- table_schema : self . table_schema . clone ( ) ,
53- } ) ,
54- projection,
55- ) )
56- }
57- }
58-
59- impl SchemaAdapter for NestedSchemaAdapter {
60- fn map_column_index ( & self , index : usize , file_schema : & Schema ) -> Option < usize > {
61- let field = self . projected_table_schema . field ( index) ;
62- Some ( file_schema. fields . find ( field. name ( ) ) ?. 0 )
63- }
1+ //! TODO: module doc
642
65- fn map_schema (
66- & self ,
67- file_schema : & Schema ,
68- ) -> datafusion_common:: Result < ( Arc < dyn SchemaMapper > , Vec < usize > ) > {
69- // self.map_schema_nested(file_schema.fields())
70- // .map(|(s, v)| (s as Arc<dyn SchemaMapper>, v))
71- trace ! ( target: "deep" , "map_schema: file_schema: {:#?}" , file_schema) ;
72- trace ! ( target: "deep" , "map_schema: table_schema: {:#?}" , self . table_schema) ;
73- trace ! ( target: "deep" , "map_schema: projected_table_schema: {:#?}" , self . projected_table_schema) ;
74-
75- let mut projection = Vec :: with_capacity ( file_schema. fields ( ) . len ( ) ) ;
76- let mut field_mappings = vec ! [ None ; self . projected_table_schema. fields( ) . len( ) ] ;
77-
78- for ( file_idx, file_field) in file_schema. fields . iter ( ) . enumerate ( ) {
79- if let Some ( ( table_idx, table_field) ) =
80- self . projected_table_schema . fields ( ) . find ( file_field. name ( ) )
81- {
82- match can_rewrite_field ( table_field. clone ( ) , file_field. clone ( ) , true ) {
83- true => {
84- field_mappings[ table_idx] = Some ( projection. len ( ) ) ;
85- projection. push ( file_idx) ;
86- }
87- false => {
88- return plan_err ! (
89- "Cannot cast file schema field {} of type {:?} to table schema field of type {:?}" ,
90- file_field. name( ) ,
91- file_field. data_type( ) ,
92- table_field. data_type( )
93- )
94- }
95- }
96- }
97- }
98-
99- Ok ( (
100- Arc :: new ( NestedSchemaMapping {
101- projected_table_schema : self . projected_table_schema . clone ( ) ,
102- field_mappings,
103- table_schema : self . table_schema . clone ( ) ,
104- } ) ,
105- projection,
106- ) )
107- }
108- }
3+ use crate :: datasource:: schema_adapter:: SchemaMapper ;
4+ use arrow_array:: RecordBatch ;
5+ use arrow_schema:: SchemaRef ;
6+ use datafusion_common:: deep:: { try_rewrite_record_batch, try_rewrite_record_batch_with_mappings} ;
1097
8+ /// TODO: struct doc
1109#[ derive( Debug ) ]
11110pub struct NestedSchemaMapping {
11211 /// The schema of the table. This is the expected schema after conversion and it should match
@@ -221,7 +120,7 @@ mod tests {
221120 true ,
222121 ) ,
223122 ] ) ) ;
224- let out = rewrite_schema (
123+ let _ = rewrite_schema (
225124 schema,
226125 & vec ! [ 1 ] ,
227126 & HashMap :: from ( [
@@ -237,7 +136,7 @@ mod tests {
237136 async fn test_rewrite ( ) -> crate :: error:: Result < ( ) > {
238137 let _ = env_logger:: try_init ( ) ;
239138
240- let message_type = "
139+ let _message_type = "
241140 message schema {
242141 REQUIRED INT32 int1;
243142 OPTIONAL INT32 int2;
@@ -634,13 +533,13 @@ mod tests {
634533 let _ = env_logger:: try_init ( ) ;
635534 let ctx = SessionContext :: new ( ) ;
636535
637- let dfr = ctx
536+ let _dfr = ctx
638537 . sql (
639538 r#"
640539 create external table
641540 test
642541 stored as parquet
643- location '/Users/adragomi/work/arrow/ benchmark/profile_export_prod_delta/part-00001-1b493913-ef97-4da6-9f8c-da1506b378f1-c000.snappy .parquet'
542+ location '../ benchmark/adobe_1day_sorted .parquet'
644543 "# ,
645544 )
646545 . await
@@ -672,7 +571,7 @@ mod tests {
672571 let results = df
673572 . collect ( )
674573 . await ?;
675- print_batches ( results. as_slice ( ) ) ;
574+ print_batches ( results. as_slice ( ) ) . ok ( ) ;
676575 info ! ( "results: {}" , results. len( ) ) ;
677576
678577 Ok ( ( ) )
0 commit comments