Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 71 additions & 1 deletion docs/reference/feature-servers/python-feature-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,14 @@ feature_server:
push: true # push request counters
materialization: true # materialization counters & duration
freshness: true # feature freshness gauges
offline_features: true # offline store retrieval counters & latency
audit_logging: false # structured JSON audit logs (see below)
```

Any category set to `false` will emit no metrics and start no background
threads (e.g., setting `freshness: false` prevents the registry polling
thread from starting). All categories default to `true`.
thread from starting). All categories default to `true` except
`audit_logging`, which defaults to `false`.

### Available metrics

Expand All @@ -375,6 +378,9 @@ thread from starting). All categories default to `true`.
| `feast_materialization_result_total` | Counter | `feature_view`, `status` | `materialization` | Materialization runs (success/failure) |
| `feast_materialization_duration_seconds` | Histogram | `feature_view` | `materialization` | Materialization duration per feature view |
| `feast_feature_freshness_seconds` | Gauge | `feature_view`, `project` | `freshness` | Seconds since last materialization |
| `feast_offline_store_request_total` | Counter | `method`, `status` | `offline_features` | Total offline store retrieval requests |
| `feast_offline_store_request_latency_seconds` | Histogram | `method` | `offline_features` | Latency of offline store retrieval operations |
| `feast_offline_store_row_count` | Histogram | `method` | `offline_features` | Rows returned by offline store retrieval |

### Per-ODFV transformation metrics

Expand Down Expand Up @@ -405,6 +411,70 @@ The `odfv_name` label lets you filter or group by individual ODFV,
and the `mode` label (`python`, `pandas`, `substrait`) lets you compare
transformation engines.

### Audit logging

Feast can emit structured JSON audit log entries for every online and offline
feature retrieval. These are written via the standard `feast.audit` Python
logger, so you can route them to a dedicated file, SIEM, or log aggregator
independently of application logs.

Audit logging is **disabled by default**. Enable it in `feature_store.yaml`:

```yaml
feature_server:
type: local
metrics:
enabled: true
audit_logging: true
```

**Online audit log** (emitted per `/get-online-features` call):

```json
{
"event": "online_feature_request",
"timestamp": "2026-05-11T08:30:00.123456+00:00",
"requestor_id": "user@example.com",
"entity_keys": ["driver_id"],
"entity_count": 3,
"feature_views": ["driver_hourly_stats"],
"feature_count": 3,
"status": "success",
"latency_ms": 12.34
}
```

**Offline audit log** (emitted per `RetrievalJob.to_arrow()` call):

```json
{
"event": "offline_feature_retrieval",
"timestamp": "2026-05-11T08:31:00.456789+00:00",
"method": "to_arrow",
"start_time": "2026-05-11T08:30:59.226789+00:00",
"end_time": "2026-05-11T08:31:00.456789+00:00",
"feature_views": ["driver_hourly_stats"],
"feature_count": 3,
"row_count": 500,
"status": "success",
"duration_ms": 1230.0
}
```

The `requestor_id` field in online audit logs is populated from the
security manager's current user when authentication is configured, and
falls back to `"anonymous"` otherwise.

To route audit logs to a separate file:

```python
import logging

handler = logging.FileHandler("/var/log/feast/audit.log")
handler.setFormatter(logging.Formatter("%(message)s"))
logging.getLogger("feast.audit").addHandler(handler)
```

### Scraping with Prometheus

```yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ spec:
push: true # push/write request counters
materialization: true # materialization counters and duration histograms
freshness: false # feature freshness gauges (can be expensive at scale)
# Example: when a future SDK adds "registry_sync", enable it here
# registry_sync: false
offline_features: true # offline store retrieval counters, latency, row count
audit_logging: false # structured JSON audit logs via the feast.audit logger
offlinePushBatching:
enabled: true
batchSize: 1000 # max rows per offline write batch
Expand Down
78 changes: 66 additions & 12 deletions sdk/python/feast/feature_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,28 +152,71 @@ class ChatRequest(BaseModel):
messages: List[ChatMessage]


def _resolve_feature_counts(
def _parse_feature_info(
features: Union[List[str], "feast.FeatureService"],
) -> tuple:
"""Return (feature_count, feature_view_count) from the resolved features.
"""Return ``(feature_view_names, feature_count)`` from resolved features.

``features`` is either a list of ``"feature_view:feature"`` strings or
a ``FeatureService`` with ``feature_view_projections``.

Returns:
(fv_names, feat_count) where fv_names is a list of unique feature
view name strings and feat_count is the total number of features.
"""
from feast.feature_service import FeatureService
from feast.utils import _parse_feature_ref

if isinstance(features, FeatureService):
projections = features.feature_view_projections
fv_count = len(projections)
fv_names = [p.name for p in projections]
feat_count = sum(len(p.features) for p in projections)
elif isinstance(features, list):
feat_count = len(features)
fv_names = {ref.split(":")[0].split("@")[0] for ref in features if ":" in ref}
fv_count = len(fv_names)
fv_names = list({_parse_feature_ref(ref)[0] for ref in features if ":" in ref})
else:
fv_names = []
feat_count = 0
fv_count = 0
return str(feat_count), str(fv_count)
return fv_names, feat_count


def _resolve_feature_counts(
features: Union[List[str], "feast.FeatureService"],
) -> tuple:
"""Return ``(feature_count_str, feature_view_count_str)`` for Prometheus labels."""
fv_names, feat_count = _parse_feature_info(features)
return str(feat_count), str(len(fv_names))


def _emit_online_audit(
request: GetOnlineFeaturesRequest,
features: Union[List[str], "feast.FeatureService"],
entity_count: int,
status: str,
latency_ms: float,
):
"""Best-effort audit log emission for online feature requests."""
try:
from feast.permissions.security_manager import get_security_manager

requestor_id = "anonymous"
sm = get_security_manager()
if sm and sm.current_user:
requestor_id = sm.current_user.username or "anonymous"

fv_names, feat_count = _parse_feature_info(features)

feast_metrics.emit_online_audit_log(
requestor_id=requestor_id,
entity_keys=list(request.entities.keys()),
entity_count=entity_count,
feature_views=fv_names,
feature_count=feat_count,
status=status,
latency_ms=latency_ms,
)
except Exception:
logger.warning("Failed to emit online audit log", exc_info=True)


async def _get_features(
Expand Down Expand Up @@ -390,11 +433,22 @@ async def get_online_features(request: GetOnlineFeaturesRequest) -> Any:
include_feature_view_version_metadata=request.include_feature_view_version_metadata,
)

if store._get_provider().async_supported.online.read:
response = await store.get_online_features_async(**read_params) # type: ignore
else:
response = await run_in_threadpool(
lambda: store.get_online_features(**read_params) # type: ignore
audit_start_ms = time.monotonic() * 1000
audit_status = "success"
try:
if store._get_provider().async_supported.online.read:
response = await store.get_online_features_async(**read_params) # type: ignore
else:
response = await run_in_threadpool(
lambda: store.get_online_features(**read_params) # type: ignore
)
except Exception:
audit_status = "error"
raise
finally:
audit_latency_ms = time.monotonic() * 1000 - audit_start_ms
_emit_online_audit(
request, features, entity_count, audit_status, audit_latency_ms
)

response_dict = await run_in_threadpool(
Expand Down
11 changes: 11 additions & 0 deletions sdk/python/feast/infra/feature_servers/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ class MetricsConfig(FeastConfigBaseModel):
"""Emit per-feature-view freshness gauges
(feast_feature_freshness_seconds)."""

offline_features: StrictBool = True
"""Emit offline store retrieval metrics
(feast_offline_store_request_total,
feast_offline_store_request_latency_seconds,
feast_offline_store_row_count)."""

audit_logging: StrictBool = False
Comment thread
jyejare marked this conversation as resolved.
"""Emit structured JSON audit log entries for online and offline
feature requests via the ``feast.audit`` logger. Captures requestor
identity, entity keys, feature views, row counts, and latency."""


class BaseFeatureServerConfig(FeastConfigBaseModel):
"""Base Feature Server config that should be extended"""
Expand Down
67 changes: 65 additions & 2 deletions sdk/python/feast/infra/offline_stores/offline_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import time
import warnings
from abc import ABC
from datetime import datetime
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -70,6 +72,23 @@ def __init__(
self.max_event_timestamp = max_event_timestamp


def _extract_retrieval_metadata(job: "RetrievalJob") -> tuple:
"""Return ``(feature_view_names, feature_count)`` from a RetrievalJob's metadata."""
from feast.utils import _parse_feature_ref

try:
meta = job.metadata
if meta:
feature_count = len(meta.features)
feature_views = list(
{_parse_feature_ref(ref)[0] for ref in meta.features if ":" in ref}
)
return feature_views, feature_count
except (NotImplementedError, AttributeError):
pass
return [], 0


class RetrievalJob(ABC):
"""A RetrievalJob manages the execution of a query to retrieve data from the offline store."""

Expand Down Expand Up @@ -152,7 +171,51 @@ def to_arrow(
validation_reference (optional): The validation to apply against the retrieved dataframe.
timeout (optional): The query timeout if applicable.
"""
features_table = self._to_arrow_internal(timeout=timeout)
start_wall = time.monotonic()
status_label = "success"
row_count = 0
try:
features_table = self._to_arrow_internal(timeout=timeout)
row_count = features_table.num_rows
except Exception:
status_label = "error"
raise
finally:
try:
from feast import metrics as feast_metrics

elapsed = time.monotonic() - start_wall

if feast_metrics._config.offline_features:
feast_metrics.offline_store_request_total.labels(
method="to_arrow", status=status_label
).inc()
feast_metrics.offline_store_request_latency_seconds.labels(
method="to_arrow"
).observe(elapsed)
feast_metrics.offline_store_row_count.labels(
method="to_arrow"
).observe(row_count)

if feast_metrics._config.audit_logging:
feature_views, feature_count = _extract_retrieval_metadata(self)
end_dt = datetime.now(tz=timezone.utc)
start_dt = end_dt - timedelta(seconds=elapsed)
feast_metrics.emit_offline_audit_log(
method="to_arrow",
feature_views=feature_views,
feature_count=feature_count,
row_count=row_count,
status=status_label,
start_time=start_dt.isoformat(),
end_time=end_dt.isoformat(),
duration_ms=elapsed * 1000,
)
except Exception:
logging.getLogger(__name__).debug(
"Failed to record offline store metrics", exc_info=True
)

if self.on_demand_feature_views:
# Build a mapping of ODFV name to requested feature names
# This ensures we only return the features that were explicitly requested
Expand Down
Loading
Loading