feldera · monochromatti · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/python/README.md b/python/README.md
@@ -12,6 +12,12 @@ The Python SDK documentation is available at: https://docs.feldera.com/python
 uv pip install feldera
 ```
 
+For Arrow IPC query support, install the optional Arrow extra:
+
+```bash
+uv pip install 'feldera[arrow]'
+```
+
 ### Example usage
 
 The Python client interacts with the API server of the Feldera instance.

diff --git a/python/feldera/pipeline.py b/python/feldera/pipeline.py
@@ -4,7 +4,7 @@
 from collections import deque
 from datetime import datetime
 from threading import Event
-from typing import Any, Callable, Dict, Generator, List, Mapping, Optional
+from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, List, Mapping, Optional
 from uuid import UUID
 
 import pandas
@@ -36,6 +36,9 @@
 from feldera.stats import InputEndpointStatus, OutputEndpointStatus, PipelineStatistics
 from feldera.types import CheckpointMetadata
 
+if TYPE_CHECKING:
+    import pyarrow as pa
+
 
 class Pipeline:
     def __init__(self, client: FelderaClient):
@@ -975,6 +978,25 @@ def query_parquet(self, query: str, path: str):
 
         self.client.query_as_parquet(self.name, query, path)
 
+    def query_arrow(self, query: str) -> Generator["pa.RecordBatch", None, None]:
+        """
+        Executes an ad-hoc SQL query on this pipeline and returns a generator
+        that yields the result as PyArrow RecordBatches.
+
+        Note:
+            You can only ``SELECT`` from materialized tables and views.
+
+        :param query: The SQL query to be executed.
+
+        :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
+            state.
+        :raises FelderaAPIError: If querying a non materialized table or view.
+        :raises FelderaAPIError: If the query is invalid.
+
+        :return: A generator that yields ``pyarrow.RecordBatch`` objects.
+        """
+        return self.client.query_as_arrow(self.name, query)
+
     def query_tabular(self, query: str) -> Generator[str, None, None]:
         """
         Executes a SQL query on this pipeline and returns the result as a

diff --git a/python/feldera/rest/feldera_client.py b/python/feldera/rest/feldera_client.py
@@ -3,7 +3,7 @@
 import pathlib
 import time
 from decimal import Decimal
-from typing import Any, Dict, Generator, Mapping, Optional
+from typing import TYPE_CHECKING, Any, Dict, Generator, Mapping, Optional
 from urllib.parse import quote
 
 import requests
@@ -18,6 +18,9 @@
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    import pyarrow as pa
+
 
 def _validate_no_none_keys_in_map(data):
     def validate_no_none_keys(d: Dict[Any, Any]) -> None:
@@ -38,6 +41,17 @@ def _prepare_boolean_input(value: bool) -> str:
     return "true" if value else "false"
 
 
+def _import_pyarrow_ipc():
+    try:
+        import pyarrow.ipc as ipc
+    except ImportError as exc:
+        raise ImportError(
+            "pyarrow is required for Arrow IPC queries. Install it with `pip install feldera[arrow]`."
+        ) from exc
+
+    return ipc
+
+
 class FelderaClient:
     """
     A client for the Feldera HTTP API.
@@ -1217,6 +1231,37 @@ def query_as_parquet(self, pipeline_name: str, query: str, path: str):
                 file.write(chunk)
         file.close()
 
+    def query_as_arrow(
+        self, pipeline_name: str, query: str
+    ) -> Generator["pa.RecordBatch", None, None]:
+        """
+        Executes an ad-hoc query on the specified pipeline and returns the result
+        as a generator that yields PyArrow RecordBatches.
+
+        :param pipeline_name: The name of the pipeline to query.
+        :param query: The SQL query to be executed.
+        :return: A generator that yields each query batch as a ``pyarrow.RecordBatch``.
+        """
+        ipc = _import_pyarrow_ipc()
+
+        params = {
+            "pipeline_name": pipeline_name,
+            "sql": query,
+            "format": "arrow_ipc",
+        }
+        resp: requests.Response = self.http.get(
+            path=f"/pipelines/{pipeline_name}/query",
+            params=params,
+            stream=True,
+        )
+
+        try:
+            with ipc.open_stream(resp.raw) as reader:
+                for batch in reader:
+                    yield batch
+        finally:
+            resp.close()
+
     def query_as_json(
         self, pipeline_name: str, query: str
     ) -> Generator[Mapping[str, Any], None, None]:

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -29,6 +29,12 @@ dependencies = [
     "ruff>=0.6.9",
     "PyJWT>=2.12.0",
 ]
+
+[project.optional-dependencies]
+arrow = [
+    "pyarrow>=14.0",
+]
+
 [project.urls]
 Homepage = "https://www.feldera.com"
 Documentation = "https://docs.feldera.com/python"
@@ -43,7 +49,8 @@ dev = [
     "sphinx-rtd-theme==2.0.0",
     "sphinx==7.3.7",
     "simplejson==3.20.1",
-    "confluent-kafka>=2.2.0"
+    "confluent-kafka>=2.2.0",
+    "pyarrow>=14.0",
 ]
 
 [tool.pytest.ini_options]

diff --git a/python/tests/platform/test_shared_pipeline.py b/python/tests/platform/test_shared_pipeline.py
@@ -1,3 +1,4 @@
+import gzip
 import io
 import json
 import os
@@ -6,9 +7,9 @@
 import time
 import unittest
 import zipfile
-import gzip
 
 import pandas as pd
+import pytest
 
 from feldera import Pipeline
 from feldera.enums import CompletionTokenStatus, PipelineFieldSelector, PipelineStatus
@@ -167,6 +168,36 @@ def test_adhoc_query_json(self):
         got = list(resp)
         self.assertCountEqual(got, expected)
 
+    def test_adhoc_query_arrow(self):
+        pa = pytest.importorskip("pyarrow")
+
+        data = "1\n2\n"
+        self.pipeline.start()
+        TEST_CLIENT.push_to_pipeline(self.pipeline.name, "tbl", "csv", data)
+
+        expected_rows = list(
+            TEST_CLIENT.query_as_json(
+                self.pipeline.name,
+                "SELECT * FROM tbl ORDER BY id",
+            )
+        )
+        expected_ids = [row["id"] for row in expected_rows]
+
+        batches_client = list(
+            TEST_CLIENT.query_as_arrow(
+                self.pipeline.name,
+                "SELECT * FROM tbl ORDER BY id",
+            )
+        )
+        table_client = pa.Table.from_batches(batches_client)
+        assert table_client.column("id").to_pylist() == expected_ids
+
+        batches_pipeline = list(
+            self.pipeline.query_arrow("SELECT * FROM tbl ORDER BY id")
+        )
+        table_pipeline = pa.Table.from_batches(batches_pipeline)
+        assert table_pipeline.column("id").to_pylist() == expected_ids
+
     def test_local(self):
         """
         CREATE TABLE students (

diff --git a/python/tests/unit/test_query_as_arrow.py b/python/tests/unit/test_query_as_arrow.py
@@ -0,0 +1,143 @@
+"""Unit tests for FelderaClient.query_as_arrow and Pipeline.query_arrow."""
+
+import builtins
+import io
+import sys
+from unittest.mock import MagicMock
+
+import pytest
+
+from feldera.rest.feldera_client import FelderaClient
+
+
+def _import_arrow_modules():
+    pa = pytest.importorskip("pyarrow")
+    ipc = pytest.importorskip("pyarrow.ipc")
+    return pa, ipc
+
+
+def _make_ipc_bytes(table) -> bytes:
+    """Serialise a ``pyarrow.Table`` to Arrow IPC stream bytes."""
+    _, ipc = _import_arrow_modules()
+    buf = io.BytesIO()
+    with ipc.new_stream(buf, table.schema) as writer:
+        if table.num_rows > 0:
+            writer.write_table(table)
+    return buf.getvalue()
+
+
+def _mock_response(ipc_bytes: bytes) -> MagicMock:
+    """Return a mock response whose ``raw`` is an Arrow IPC byte stream."""
+    resp = MagicMock()
+    resp.raw = io.BytesIO(ipc_bytes)
+    return resp
+
+
+@pytest.fixture()
+def client() -> FelderaClient:
+    """A ``FelderaClient`` with a mocked HTTP layer (no real network calls)."""
+    c = FelderaClient.__new__(FelderaClient)
+    c.http = MagicMock()
+    return c
+
+
+class TestQueryAsArrow:
+    def test_non_empty_result_yields_correct_data(self, client: FelderaClient):
+        pa, _ = _import_arrow_modules()
+        schema = pa.schema([("id", pa.int64()), ("name", pa.utf8())])
+        expected = pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]}, schema=schema)
+        client.http.get.return_value = _mock_response(_make_ipc_bytes(expected))
+
+        batches = list(client.query_as_arrow("my_pipeline", "SELECT id, name FROM t"))
+        result = pa.Table.from_batches(batches, schema=schema)
+
+        assert len(batches) > 0
+        assert result.schema == schema
+        assert result.num_rows == 3
+        assert result.column("id").to_pylist() == [1, 2, 3]
+        assert result.column("name").to_pylist() == ["a", "b", "c"]
+
+    def test_http_called_with_correct_params(self, client: FelderaClient):
+        pa, _ = _import_arrow_modules()
+        schema = pa.schema([("id", pa.int64())])
+        table = pa.table({"id": [42]}, schema=schema)
+        client.http.get.return_value = _mock_response(_make_ipc_bytes(table))
+
+        list(client.query_as_arrow("my_pipeline", "SELECT id FROM t"))
+
+        client.http.get.assert_called_once_with(
+            path="/pipelines/my_pipeline/query",
+            params={
+                "pipeline_name": "my_pipeline",
+                "sql": "SELECT id FROM t",
+                "format": "arrow_ipc",
+            },
+            stream=True,
+        )
+
+    def test_empty_result_yields_no_batches(self, client: FelderaClient):
+        pa, _ = _import_arrow_modules()
+        schema = pa.schema([("id", pa.int64()), ("value", pa.float64())])
+        empty = pa.table(
+            {
+                "id": pa.array([], type=pa.int64()),
+                "value": pa.array([], type=pa.float64()),
+            },
+            schema=schema,
+        )
+        client.http.get.return_value = _mock_response(_make_ipc_bytes(empty))
+
+        result_batches = list(
+            client.query_as_arrow("my_pipeline", "SELECT id, value FROM t WHERE false")
+        )
+
+        assert result_batches == []
+
+    def test_missing_pyarrow_raises_helpful_import_error(
+        self, client: FelderaClient, monkeypatch
+    ):
+        real_import = builtins.__import__
+
+        def _import(name, globals=None, locals=None, fromlist=(), level=0):
+            if name == "pyarrow" or name.startswith("pyarrow."):
+                raise ImportError("No module named 'pyarrow'")
+            return real_import(name, globals, locals, fromlist, level)
+
+        monkeypatch.delitem(sys.modules, "pyarrow", raising=False)
+        monkeypatch.delitem(sys.modules, "pyarrow.ipc", raising=False)
+        monkeypatch.setattr(builtins, "__import__", _import)
+
+        with pytest.raises(ImportError, match="pip install feldera\\[arrow\\]"):
+            next(client.query_as_arrow("my_pipeline", "SELECT 1"))
+
+        client.http.get.assert_not_called()
+
+    def test_response_closed_after_full_consumption(self, client: FelderaClient):
+        pa, _ = _import_arrow_modules()
+        schema = pa.schema([("id", pa.int64())])
+        table = pa.table({"id": [1, 2]}, schema=schema)
+        resp = _mock_response(_make_ipc_bytes(table))
+        client.http.get.return_value = resp
+
+        list(client.query_as_arrow("my_pipeline", "SELECT id FROM t"))
+
+        resp.close.assert_called_once()
+
+
+class TestPipelineQueryArrow:
+    def test_query_arrow_delegates_to_client(self):
+        """Pipeline.query_arrow must forward to client.query_as_arrow."""
+        from feldera.pipeline import Pipeline
+
+        pipeline = Pipeline.__new__(Pipeline)
+        pipeline._inner = MagicMock()
+        pipeline._inner.name = "pipe1"
+        pipeline.client = MagicMock()
+
+        expected = object()
+        pipeline.client.query_as_arrow.return_value = expected
+
+        result = pipeline.query_arrow("SELECT x FROM v")
+
+        pipeline.client.query_as_arrow.assert_called_once_with("pipe1", "SELECT x FROM v")
+        assert result is expected