Skip to content

Commit 59dbb33

Browse files
authored
fix: Add possibility to overwrite send_receive_timeout for clickhouse offline store (#5792)
* add changes + fix linting issues Signed-off-by: lukas.valatka <lukas.valatka@cast.ai> * generalize based on pr suggestion Signed-off-by: lukas.valatka <lukas.valatka@cast.ai> * add test verifying config parsing Signed-off-by: lukas.valatka <lukas.valatka@cast.ai> * rework test into integration, since CH instance is needed Signed-off-by: lukas.valatka <lukas.valatka@cast.ai> --------- Signed-off-by: lukas.valatka <lukas.valatka@cast.ai>
1 parent edefc3f commit 59dbb33

File tree

4 files changed

+110
-7
lines changed

4 files changed

+110
-7
lines changed

sdk/python/feast/infra/offline_stores/contrib/clickhouse_offline_store/tests/data_source.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from feast.infra.offline_stores.contrib.clickhouse_offline_store.clickhouse_source import (
1616
ClickhouseSource,
1717
)
18+
from feast.infra.utils.clickhouse.clickhouse_config import ClickhouseConfig
19+
from feast.infra.utils.clickhouse.connection_utils import get_client
1820
from tests.integration.feature_repos.universal.data_source_creator import (
1921
DataSourceCreator,
2022
)
@@ -114,3 +116,29 @@ def create_saved_dataset_destination(self):
114116

115117
def teardown(self):
116118
pass
119+
120+
121+
def test_get_client_with_additional_params(clickhouse_container):
122+
"""
123+
Test that get_client works with a real ClickHouse container and properly passes
124+
additional settings like send_receive_timeout.
125+
"""
126+
# Create config with custom send_receive_timeout
127+
config = ClickhouseConfig(
128+
host=clickhouse_container.get_container_host_ip(),
129+
port=clickhouse_container.get_exposed_port(8123),
130+
user=CLICKHOUSE_USER,
131+
password=CLICKHOUSE_PASSWORD,
132+
database=CLICKHOUSE_OFFLINE_DB,
133+
additional_client_args={"send_receive_timeout": 60},
134+
)
135+
136+
# Get client and verify it works
137+
client = get_client(config)
138+
139+
# Verify client is connected and functional by running a simple query
140+
result = client.query("SELECT 1 AS test_value")
141+
assert result.result_rows == [(1,)]
142+
143+
# Verify the send_receive_timeout was applied
144+
assert client.timeout._read == 60

sdk/python/feast/infra/utils/clickhouse/clickhouse_config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Any
2+
13
from pydantic import ConfigDict, StrictStr
24

35
from feast.repo_config import FeastConfigBaseModel
@@ -11,4 +13,8 @@ class ClickhouseConfig(FeastConfigBaseModel):
1113
password: StrictStr
1214
use_temporary_tables_for_entity_df: bool = True
1315

16+
# See https://github.com/ClickHouse/clickhouse-connect/blob/main/clickhouse_connect/driver/__init__.py#L51
17+
# Some typical ones e.g. send_receive_timeout (read_timeout), etc
18+
additional_client_args: dict[str, Any] | None = None
19+
1420
model_config = ConfigDict(frozen=True)

sdk/python/feast/infra/utils/clickhouse/connection_utils.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,24 @@
1111
def get_client(config: ClickhouseConfig) -> Client:
1212
# Clickhouse client is not thread-safe, so we need to create a separate instance for each thread.
1313
if not hasattr(thread_local, "clickhouse_client"):
14-
thread_local.clickhouse_client = clickhouse_connect.get_client(
15-
host=config.host,
16-
port=config.port,
17-
user=config.user,
18-
password=config.password,
19-
database=config.database,
20-
)
14+
additional_client_args = config.additional_client_args
15+
16+
if additional_client_args:
17+
thread_local.clickhouse_client = clickhouse_connect.get_client(
18+
host=config.host,
19+
port=config.port,
20+
user=config.user,
21+
password=config.password,
22+
database=config.database,
23+
**additional_client_args,
24+
)
25+
else:
26+
thread_local.clickhouse_client = clickhouse_connect.get_client(
27+
host=config.host,
28+
port=config.port,
29+
user=config.user,
30+
password=config.password,
31+
database=config.database,
32+
)
2133

2234
return thread_local.clickhouse_client

sdk/python/tests/unit/infra/offline_stores/test_clickhouse.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import threading
23
from unittest.mock import MagicMock, patch
34

@@ -6,6 +7,8 @@
67
from feast.infra.utils.clickhouse.clickhouse_config import ClickhouseConfig
78
from feast.infra.utils.clickhouse.connection_utils import get_client, thread_local
89

10+
logger = logging.getLogger(__name__)
11+
912

1013
@pytest.fixture
1114
def clickhouse_config():
@@ -76,3 +79,57 @@ def thread_2_work():
7679
assert client_1a is not client_2, (
7780
"Different threads should get different client instances (not cached)"
7881
)
82+
83+
84+
def test_clickhouse_config_parses_additional_client_args():
85+
"""
86+
Test that ClickhouseConfig correctly parses additional_client_args from a dict,
87+
simulating how it would be parsed from YAML by Pydantic.
88+
"""
89+
# This simulates the dict that would come from yaml.safe_load()
90+
raw_config = {
91+
"host": "localhost",
92+
"port": 8123,
93+
"database": "default",
94+
"user": "default",
95+
"password": "password",
96+
"additional_client_args": {
97+
"send_receive_timeout": 60,
98+
"compress": True,
99+
"client_name": "feast_test",
100+
},
101+
}
102+
103+
# Pydantic should parse this dict into a ClickhouseConfig object
104+
config = ClickhouseConfig(**raw_config)
105+
106+
# Verify all fields are correctly parsed
107+
assert config.host == "localhost"
108+
assert config.port == 8123
109+
assert config.database == "default"
110+
assert config.user == "default"
111+
assert config.password == "password"
112+
113+
# Verify additional_client_args is correctly parsed as a dict
114+
assert config.additional_client_args is not None
115+
assert isinstance(config.additional_client_args, dict)
116+
assert config.additional_client_args["send_receive_timeout"] == 60
117+
assert config.additional_client_args["compress"] is True
118+
assert config.additional_client_args["client_name"] == "feast_test"
119+
120+
121+
def test_clickhouse_config_handles_none_additional_client_args():
122+
"""
123+
Test that ClickhouseConfig correctly handles when additional_client_args is not provided.
124+
"""
125+
raw_config = {
126+
"host": "localhost",
127+
"port": 8123,
128+
"database": "default",
129+
"user": "default",
130+
"password": "password",
131+
}
132+
133+
config = ClickhouseConfig(**raw_config)
134+
135+
assert config.additional_client_args is None

0 commit comments

Comments
 (0)