Skip to content

Commit 654b5e4

Browse files
committed
feat: make online_write_batch_size configurable in MaterializationConfig
Signed-off-by: cutoutsy <cutoutsy@gmail.com>
1 parent de67bdd commit 654b5e4

3 files changed

Lines changed: 34 additions & 10 deletions

File tree

sdk/python/feast/infra/compute_engines/local/nodes.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -374,16 +374,32 @@ def execute(self, context: ExecutionContext) -> ArrowTableValue:
374374
for entity in self.feature_view.entity_columns
375375
}
376376

377-
rows_to_write = _convert_arrow_to_proto(
378-
input_table, self.feature_view, join_key_to_value_type
379-
)
380-
381-
online_store.online_write_batch(
382-
config=context.repo_config,
383-
table=self.feature_view,
384-
data=rows_to_write,
385-
progress=lambda x: None,
377+
batch_size = (
378+
context.repo_config.materialization_config.online_write_batch_size
386379
)
380+
if batch_size is None:
381+
# Default: write all rows in a single batch (backward compatible)
382+
rows_to_write = _convert_arrow_to_proto(
383+
input_table, self.feature_view, join_key_to_value_type
384+
)
385+
online_store.online_write_batch(
386+
config=context.repo_config,
387+
table=self.feature_view,
388+
data=rows_to_write,
389+
progress=lambda x: None,
390+
)
391+
else:
392+
# Batched writes when batch_size is configured
393+
for batch in input_table.to_batches(max_chunksize=batch_size):
394+
rows_to_write = _convert_arrow_to_proto(
395+
batch, self.feature_view, join_key_to_value_type
396+
)
397+
online_store.online_write_batch(
398+
config=context.repo_config,
399+
table=self.feature_view,
400+
data=rows_to_write,
401+
progress=lambda x: None,
402+
)
387403

388404
if self.feature_view.offline:
389405
offline_store = context.offline_store

sdk/python/feast/repo_config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ class MaterializationConfig(BaseModel):
214214
""" bool: If true, feature retrieval jobs will only pull the latest feature values for each entity.
215215
If false, feature retrieval jobs will pull all feature values within the specified time range. """
216216

217+
online_write_batch_size: Optional[int] = Field(default=None, gt=0)
218+
""" int: Number of rows to write to online store per batch during materialization.
219+
If None (default), all rows are written in a single batch for backward compatibility.
220+
Set to a positive integer (e.g., 10000) to enable batched writes. """
221+
217222

218223
class OpenLineageConfig(FeastBaseModel):
219224
"""Configuration for OpenLineage integration.

sdk/python/tests/unit/infra/compute_engines/local/test_nodes.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
LocalOutputNode,
1616
LocalTransformationNode,
1717
)
18+
from feast.repo_config import MaterializationConfig
1819

1920
backend = PandasBackend()
2021
now = pd.Timestamp.utcnow()
@@ -37,9 +38,11 @@
3738

3839
def create_context(node_outputs):
3940
# Setup execution context
41+
repo_config = MagicMock()
42+
repo_config.materialization_config = MaterializationConfig()
4043
return ExecutionContext(
4144
project="test_proj",
42-
repo_config=MagicMock(),
45+
repo_config=repo_config,
4346
offline_store=MagicMock(),
4447
online_store=MagicMock(),
4548
entity_defs=MagicMock(),

0 commit comments

Comments
 (0)