@@ -84,6 +84,7 @@ static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = DEFAULT_PAGE_SIZE;
8484static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024 ;
8585static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 64 * 1024 * 1024 ;
8686static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true ;
87+ static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096 ;
8788static constexpr Encoding::type DEFAULT_ENCODING = Encoding::PLAIN ;
8889static constexpr ParquetVersion::type DEFAULT_WRITER_VERSION =
8990 ParquetVersion::PARQUET_1_0 ;
@@ -95,16 +96,46 @@ class PARQUET_EXPORT ColumnProperties {
9596 ColumnProperties (Encoding::type encoding = DEFAULT_ENCODING ,
9697 Compression::type codec = DEFAULT_COMPRESSION_TYPE ,
9798 bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED ,
98- bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED )
99- : encoding(encoding),
100- codec (codec),
101- dictionary_enabled(dictionary_enabled),
102- statistics_enabled(statistics_enabled) {}
103-
104- Encoding::type encoding;
105- Compression::type codec;
106- bool dictionary_enabled;
107- bool statistics_enabled;
99+ bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED ,
100+ size_t max_stats_size = DEFAULT_MAX_STATISTICS_SIZE )
101+ : encoding_(encoding),
102+ codec_ (codec),
103+ dictionary_enabled_(dictionary_enabled),
104+ statistics_enabled_(statistics_enabled),
105+ max_stats_size_(max_stats_size) {}
106+
107+ void set_encoding (Encoding::type encoding) { encoding_ = encoding; }
108+
109+ void set_compression (Compression::type codec) { codec_ = codec; }
110+
111+ void set_dictionary_enabled (bool dictionary_enabled) {
112+ dictionary_enabled_ = dictionary_enabled;
113+ }
114+
115+ void set_statistics_enabled (bool statistics_enabled) {
116+ statistics_enabled_ = statistics_enabled;
117+ }
118+
119+ void set_max_statistics_size (size_t max_stats_size) {
120+ max_stats_size_ = max_stats_size;
121+ }
122+
123+ Encoding::type encoding () const { return encoding_; }
124+
125+ Compression::type compression () const { return codec_; }
126+
127+ bool dictionary_enabled () const { return dictionary_enabled_; }
128+
129+ bool statistics_enabled () const { return statistics_enabled_; }
130+
131+ size_t max_statistics_size () const { return max_stats_size_; }
132+
133+ private:
134+ Encoding::type encoding_;
135+ Compression::type codec_;
136+ bool dictionary_enabled_;
137+ bool statistics_enabled_;
138+ size_t max_stats_size_;
108139};
109140
110141class PARQUET_EXPORT WriterProperties {
@@ -127,12 +158,12 @@ class PARQUET_EXPORT WriterProperties {
127158 }
128159
129160 Builder* enable_dictionary () {
130- default_column_properties_.dictionary_enabled = true ;
161+ default_column_properties_.set_dictionary_enabled ( true ) ;
131162 return this ;
132163 }
133164
134165 Builder* disable_dictionary () {
135- default_column_properties_.dictionary_enabled = false ;
166+ default_column_properties_.set_dictionary_enabled ( false ) ;
136167 return this ;
137168 }
138169
@@ -196,7 +227,7 @@ class PARQUET_EXPORT WriterProperties {
196227 throw ParquetException (" Can't use dictionary encoding as fallback encoding" );
197228 }
198229
199- default_column_properties_.encoding = encoding_type;
230+ default_column_properties_.set_encoding ( encoding_type) ;
200231 return this ;
201232 }
202233
@@ -228,7 +259,12 @@ class PARQUET_EXPORT WriterProperties {
228259 }
229260
230261 Builder* compression (Compression::type codec) {
231- default_column_properties_.codec = codec;
262+ default_column_properties_.set_compression (codec);
263+ return this ;
264+ }
265+
266+ Builder* max_statistics_size (size_t max_stats_sz) {
267+ default_column_properties_.set_max_statistics_size (max_stats_sz);
232268 return this ;
233269 }
234270
@@ -243,12 +279,12 @@ class PARQUET_EXPORT WriterProperties {
243279 }
244280
245281 Builder* enable_statistics () {
246- default_column_properties_.statistics_enabled = true ;
282+ default_column_properties_.set_statistics_enabled ( true ) ;
247283 return this ;
248284 }
249285
250286 Builder* disable_statistics () {
251- default_column_properties_.statistics_enabled = false ;
287+ default_column_properties_.set_statistics_enabled ( false ) ;
252288 return this ;
253289 }
254290
@@ -280,12 +316,12 @@ class PARQUET_EXPORT WriterProperties {
280316 return it->second ;
281317 };
282318
283- for (const auto & item : encodings_) get (item.first ).encoding = item.second ;
284- for (const auto & item : codecs_) get (item.first ).codec = item.second ;
319+ for (const auto & item : encodings_) get (item.first ).set_encoding ( item.second ) ;
320+ for (const auto & item : codecs_) get (item.first ).set_compression ( item.second ) ;
285321 for (const auto & item : dictionary_enabled_)
286- get (item.first ).dictionary_enabled = item.second ;
322+ get (item.first ).set_dictionary_enabled ( item.second ) ;
287323 for (const auto & item : statistics_enabled_)
288- get (item.first ).statistics_enabled = item.second ;
324+ get (item.first ).set_statistics_enabled ( item.second ) ;
289325
290326 return std::shared_ptr<WriterProperties>(
291327 new WriterProperties (pool_, dictionary_pagesize_limit_, write_batch_size_,
@@ -348,19 +384,23 @@ class PARQUET_EXPORT WriterProperties {
348384 }
349385
350386 Encoding::type encoding (const std::shared_ptr<schema::ColumnPath>& path) const {
351- return column_properties (path).encoding ;
387+ return column_properties (path).encoding () ;
352388 }
353389
354390 Compression::type compression (const std::shared_ptr<schema::ColumnPath>& path) const {
355- return column_properties (path).codec ;
391+ return column_properties (path).compression () ;
356392 }
357393
358394 bool dictionary_enabled (const std::shared_ptr<schema::ColumnPath>& path) const {
359- return column_properties (path).dictionary_enabled ;
395+ return column_properties (path).dictionary_enabled () ;
360396 }
361397
362398 bool statistics_enabled (const std::shared_ptr<schema::ColumnPath>& path) const {
363- return column_properties (path).statistics_enabled ;
399+ return column_properties (path).statistics_enabled ();
400+ }
401+
402+ size_t max_statistics_size (const std::shared_ptr<schema::ColumnPath>& path) const {
403+ return column_properties (path).max_statistics_size ();
364404 }
365405
366406 private:
0 commit comments