feldera · abhizer · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jul 3, 2025
diff --git a/python/feldera/_callback_runner.py b/python/feldera/_callback_runner.py
@@ -21,6 +21,7 @@ def __init__(
         view_name: str,
         callback: Callable[[pd.DataFrame, int], None],
         queue: Optional[Queue],
+        case_sensitive: bool,
     ):
         super().__init__()
         self.daemon = True
@@ -30,6 +31,7 @@ def __init__(
         self.callback: Callable[[pd.DataFrame, int], None] = callback
         self.queue: Optional[Queue] = queue
         self.schema: Optional[dict] = None
+        self.case_sensitive: bool = case_sensitive
 
     def run(self):
         """
@@ -66,7 +68,10 @@ def run(self):
             case _CallbackRunnerInstruction.PipelineStarted:
                 # listen to the pipeline
                 gen_obj = self.client.listen_to_pipeline(
-                    self.pipeline_name, self.view_name, format="json"
+                    self.pipeline_name,
+                    self.view_name,
+                    format="json",
+                    case_sensitive=self.case_sensitive,
                 )
 
                 # if there is a queue set up, inform the main thread that the listener has been started, and it can

diff --git a/python/feldera/output_handler.py b/python/feldera/output_handler.py
@@ -13,6 +13,7 @@ def __init__(
         pipeline_name: str,
         view_name: str,
         queue: Optional[Queue],
+        case_sensitive: bool,
     ):
         """
         Initializes the output handler, but doesn't start it.
@@ -32,7 +33,12 @@ def callback(df: pd.DataFrame, _: int):
 
         # sets up the callback runner
         self.handler = CallbackRunner(
-            self.client, self.pipeline_name, self.view_name, callback, queue
+            self.client,
+            self.pipeline_name,
+            self.view_name,
+            callback,
+            queue,
+            case_sensitive,
         )
 
     def start(self):

diff --git a/python/feldera/pipeline.py b/python/feldera/pipeline.py
@@ -3,8 +3,10 @@
 from datetime import datetime
 
 import pandas
+import warnings
+import pyarrow
 
-from typing import List, Dict, Callable, Optional, Generator, Mapping, Any
+from typing import List, Dict, Callable, Optional, Generator, Mapping, Any, Tuple
 from collections import deque
 from queue import Queue
 
@@ -204,7 +206,7 @@ def resume_connector(self, table_name: str, connector_name: str):
 
         self.client.resume_connector(self.name, table_name, connector_name)
 
-    def listen(self, view_name: str) -> OutputHandler:
+    def listen(self, view_name: str, case_sensitive: bool = False) -> OutputHandler:
         """
         Follow the change stream (i.e., the output) of the provided view.
         Returns an output handler to read the changes.
@@ -215,6 +217,7 @@ def listen(self, view_name: str) -> OutputHandler:
         If this method is called once the pipeline has started, you will only get the output from that point onwards.
 
         :param view_name: The name of the view to listen to.
+        :param case_sensitive: True if the view name is case sensitive.
         """
 
         queue: Optional[Queue] = None
@@ -223,13 +226,18 @@ def listen(self, view_name: str) -> OutputHandler:
             queue = Queue(maxsize=1)
             self.views_tx.append({view_name: queue})
 
-        handler = OutputHandler(self.client, self.name, view_name, queue)
+        handler = OutputHandler(
+            self.client, self.name, view_name, queue, case_sensitive
+        )
         handler.start()
 
         return handler
 
     def foreach_chunk(
-        self, view_name: str, callback: Callable[[pandas.DataFrame, int], None]
+        self,
+        view_name: str,
+        callback: Callable[[pandas.DataFrame, int], None],
+        case_sensitive: bool = False,
     ):
         """
         Run the given callback on each chunk of the output of the specified view.
@@ -244,6 +252,8 @@ def foreach_chunk(
                 - **seq_no** -> The sequence number. The sequence number is a monotonically increasing integer that
                   starts from 0. Note that the sequence number is unique for each chunk, but not necessarily contiguous.
 
+        :param case_sensitive: True if the view name is case sensitive.
+
         Please note that the callback is run in a separate thread, so it should be thread-safe.
         Please note that the callback should not block for a long time, as by default, backpressure is enabled and
         will block the pipeline.
@@ -259,7 +269,9 @@ def foreach_chunk(
             queue = Queue(maxsize=1)
             self.views_tx.append({view_name: queue})
 
-        handler = CallbackRunner(self.client, self.name, view_name, callback, queue)
+        handler = CallbackRunner(
+            self.client, self.name, view_name, callback, queue, case_sensitive
+        )
         handler.start()
 
     def wait_for_completion(
@@ -692,9 +704,56 @@ def query(self, query: str) -> Generator[Mapping[str, Any], None, None]:
         :raises FelderaAPIError: If querying a non materialized table or view.
         :raises FelderaAPIError: If the query is invalid.
         """
+        warnings.warn(
+            "function query is deprecated and will be removed in a future version",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
 
         return self.client.query_as_json(self.name, query)
 
+    def query_pyarrow(self, query: str) -> pyarrow.Table:
+        """
+        Executes an ad-hoc SQL query on this pipeline and returns a
+        :class:`.pyarrow.Table` containing all the results.
+
+        Note:
+            You can only ``SELECT`` from materialized tables and views.
+
+        :param query: The SQL query to be executed.
+
+        :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
+            state.
+        :raises FelderaAPIError: If querying a non materialized table or view.
+        :raises FelderaAPIError: If the query is invalid.
+        """
+
+        return self.client.query_as_pyarrow(self.name, query)
+
+    def query_pylist(self, query: str) -> List[List[Tuple[str, Any]]]:
+        """
+        Executes an ad-hoc SQL query on this pipeline and returns the results
+        as a list of rows. Each row is represented as a list of
+        (column_name, value) tuples.
+
+        Note:
+            You can only ``SELECT`` from materialized tables and views.
+
+        :param query: The SQL query to be executed.
+
+        :return: A list of rows, where each row is a list of
+            (column_name, value) tuples.
+
+        :raises FelderaAPIError: If the pipeline is not in a RUNNING or PAUSED
+            state.
+        :raises FelderaAPIError: If querying a non materialized table or view.
+        :raises FelderaAPIError: If the query is invalid.
+        """
+
+        table = self.query_pyarrow(query)
+        columns = [col.to_pylist() for col in table.itercolumns()]
+        return [list(zip(table.schema.names, row)) for row in zip(*columns)]
+
     def query_parquet(self, query: str, path: str):
         """
         Executes an ad-hoc SQL query on this pipeline and saves the result to the specified path as a parquet file.

diff --git a/python/feldera/rest/feldera_client.py b/python/feldera/rest/feldera_client.py
@@ -1,11 +1,14 @@
 import pathlib
 from typing import Any, Dict, Optional
 import logging
+import warnings
 import time
 import json
 from decimal import Decimal
 from typing import Generator
 
+import pyarrow
+
 from feldera.rest.config import Config
 from feldera.rest.feldera_config import FelderaConfig
 from feldera.rest.errors import FelderaTimeoutError
@@ -340,7 +343,9 @@ def shutdown_pipeline(self, pipeline_name: str, timeout_s: Optional[float] = 300
             time.sleep(0.1)
 
         raise FelderaTimeoutError(
-            f"timeout error: pipeline '{pipeline_name}' did not shutdown in {timeout_s} seconds"
+            f"timeout error: pipeline '{pipeline_name}' did not shutdown in {
+                timeout_s
+            } seconds"
         )
 
     def suspend_pipeline(self, pipeline_name: str, timeout_s: Optional[float] = 300):
@@ -379,7 +384,9 @@ def suspend_pipeline(self, pipeline_name: str, timeout_s: Optional[float] = 300)
             time.sleep(0.1)
 
         raise FelderaTimeoutError(
-            f"timeout error: pipeline '{pipeline_name}' did not suspend in {timeout_s} seconds"
+            f"timeout error: pipeline '{pipeline_name}' did not suspend in {
+                timeout_s
+            } seconds"
         )
 
     def checkpoint_pipeline(self, pipeline_name: str) -> int:
@@ -531,6 +538,7 @@ def listen_to_pipeline(
         pipeline_name: str,
         table_name: str,
         format: str,
+        case_sensitive: bool = False,
         backpressure: bool = True,
         array: bool = False,
         timeout: Optional[float] = None,
@@ -559,6 +567,9 @@ def listen_to_pipeline(
         if format == "json":
             params["array"] = _prepare_boolean_input(array)
 
+        if case_sensitive:
+            table_name = f'"{table_name}"'
+
         resp = self.http.post(
             path=f"/pipelines/{pipeline_name}/egress/{table_name}",
             params=params,
@@ -650,6 +661,13 @@ def query_as_json(
         :param query: The SQL query to be executed.
         :return: A generator that yields each row of the result as a Python dictionary, deserialized from JSON.
         """
+
+        warnings.warn(
+            "function query_as_json is deprecated and will be removed in a future version",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+
         params = {
             "pipeline_name": pipeline_name,
             "sql": query,
@@ -666,6 +684,30 @@ def query_as_json(
             if chunk:
                 yield json.loads(chunk, parse_float=Decimal)
 
+    def query_as_pyarrow(self, pipeline_name: str, query: str) -> pyarrow.Table:
+        """
+        Executes an ad-hoc query on the specified pipeline and returns a pyarrow
+        Table consisting of all the data.
+
+        :param pipeline_name: The name of the pipeline to query.
+        :param query: The SQL query to be executed.
+        :return: A pyarrow.Table consisting of all the records.
+        """
+        params = {
+            "pipeline_name": pipeline_name,
+            "sql": query,
+            "format": "arrow_ipc",
+        }
+
+        resp = self.http.get(
+            path=f"/pipelines/{pipeline_name}/query",
+            params=params,
+            stream=True,
+        )
+
+        with pyarrow.ipc.RecordBatchStreamReader(resp.raw) as reader:
+            return reader.read_all()
+
     def pause_connector(self, pipeline_name, table_name, connector_name):
         """
         Pause the specified input connector.
@@ -685,7 +727,9 @@ def pause_connector(self, pipeline_name, table_name, connector_name):
         """
 
         self.http.post(
-            path=f"/pipelines/{pipeline_name}/tables/{table_name}/connectors/{connector_name}/pause",
+            path=f"/pipelines/{pipeline_name}/tables/{table_name}/connectors/{
+                connector_name
+            }/pause",
         )
 
     def resume_connector(
@@ -709,7 +753,9 @@ def resume_connector(
         """
 
         self.http.post(
-            path=f"/pipelines/{pipeline_name}/tables/{table_name}/connectors/{connector_name}/start",
+            path=f"/pipelines/{pipeline_name}/tables/{table_name}/connectors/{
+                connector_name
+            }/start",
         )
 
     def get_config(self) -> FelderaConfig:

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "numpy>=2.2.4",
     "pretty-errors",
     "ruff>=0.6.9",
+    "pyarrow>=20.0.0",
 ]
 [project.urls]
 Homepage = "https://www.feldera.com"

diff --git a/python/tests/test_pipeline.py b/python/tests/test_pipeline.py
@@ -4,6 +4,7 @@
 import uuid
 import threading
 
+from decimal import Decimal
 from tests import TEST_CLIENT
 
 from feldera.rest.pipeline import Pipeline
@@ -236,6 +237,33 @@ def test_adhoc_query_json(self):
         TEST_CLIENT.shutdown_pipeline(name)
         TEST_CLIENT.delete_pipeline(name)
 
+    def test_adhoc_query_pyarrow(self):
+        data = "1\n2\n"
+        name = str(uuid.uuid4())
+
+        sql = """
+        CREATE TABLE tbl(id INT) with ('materialized' = 'true');
+        CREATE MATERIALIZED VIEW v0 AS SELECT id, '3.14'::DECIMAL(5, 2) as d, '3.14159'::DOUBLE as dbl, MAP[1, 10] as m FROM tbl;
+        """
+
+        pipeline = Pipeline(name, sql, "", "", {}, {})
+        pipeline = TEST_CLIENT.create_pipeline(pipeline)
+
+        TEST_CLIENT.start_pipeline(name)
+
+        TEST_CLIENT.push_to_pipeline(name, "tbl", "csv", data)
+        resp = TEST_CLIENT.query_as_pyarrow(pipeline.name, "select * from v0")
+        expected = [
+            {"id": 2, "d": Decimal("3.14"), "dbl": 3.14159, "m": [(1, 10)]},
+            {"id": 1, "d": Decimal("3.14"), "dbl": 3.14159, "m": [(1, 10)]},
+        ]
+        got = resp.to_pylist()
+
+        self.assertCountEqual(got, expected)
+
+        TEST_CLIENT.shutdown_pipeline(name)
+        TEST_CLIENT.delete_pipeline(name)
+
 
 if __name__ == "__main__":
     unittest.main()