Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions .github/workflows/dbt-integration-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
name: dbt-integration-tests

# Run dbt integration tests on PRs
on:
pull_request:
paths:
- 'sdk/python/feast/dbt/**'
- 'sdk/python/tests/integration/dbt/**'
- 'sdk/python/tests/unit/dbt/**'
- '.github/workflows/dbt-integration-tests.yml'

jobs:
dbt-integration-test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12"]
env:
PYTHON: ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64

- name: Install the latest version of uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Install dependencies
run: make install-python-dependencies-ci

- name: Install dbt and dbt-duckdb
run: |
uv pip install --system dbt-core dbt-duckdb

- name: Run dbt commands
run: |
cd sdk/python/tests/integration/dbt/test_dbt_project
dbt deps
dbt build

- name: Setup Feast project for dbt import test
run: |
cd sdk/python/tests/integration/dbt
mkdir -p feast_repo
cd feast_repo
cat > feature_store.yaml << EOF
project: feast_dbt_test
registry: data/registry.db
provider: local
online_store:
type: sqlite
path: data/online_store.db
EOF
mkdir -p data

- name: Test feast dbt import
run: |
cd sdk/python/tests/integration/dbt/feast_repo
feast dbt import \
-m ../test_dbt_project/target/manifest.json \
-e driver_id \
-d file \
--tag feast

- name: Verify feast objects were created
run: |
cd sdk/python/tests/integration/dbt/feast_repo
feast feature-views list
feast entities list

- name: Run dbt integration tests
run: |
cd sdk/python
python -m pytest tests/integration/dbt/test_dbt_integration.py -v --tb=short

- name: Minimize uv cache
run: uv cache prune --ci
50 changes: 48 additions & 2 deletions docs/how-to-guides/dbt-integration.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

**Current Limitations**:
- Supported data sources: BigQuery, Snowflake, and File-based sources only
- Single entity per model
- Manual entity column specification required

Breaking changes may occur in future releases.
Expand Down Expand Up @@ -185,6 +184,53 @@ driver_features_fv = FeatureView(
```
{% endcode %}

## Multiple Entity Support

The dbt integration supports feature views with multiple entities, useful for modeling relationships involving multiple keys.

### Usage

Specify multiple entity columns using repeated `-e` flags:

```bash
feast dbt import \
-m target/manifest.json \
-e user_id \
-e merchant_id \
--tag feast \
-o features/transactions.py
```

This creates a FeatureView with both `user_id` and `merchant_id` as entities, useful for:
- Transaction features keyed by both user and merchant
- Interaction features keyed by multiple parties
- Association tables in many-to-many relationships

Single entity usage:
```bash
feast dbt import -m target/manifest.json -e driver_id --tag feast
```

### Requirements

All specified entity columns must exist in each dbt model being imported. Models missing any entity column will be skipped with a warning.

### Generated Code

The `--output` flag generates code like:

```python
user_id = Entity(name="user_id", join_keys=["user_id"], ...)
merchant_id = Entity(name="merchant_id", join_keys=["merchant_id"], ...)

transaction_fv = FeatureView(
name="transactions",
entities=[user_id, merchant_id], # Multiple entities
schema=[...],
...
)
```

## CLI Reference

### `feast dbt list`
Expand Down Expand Up @@ -217,7 +263,7 @@ feast dbt import <manifest_path> [OPTIONS]

| Option | Description | Default |
|--------|-------------|---------|
| `--entity-column`, `-e` | Column to use as entity key | (required) |
| `--entity-column`, `-e` | Entity column name (can be specified multiple times) | (required) |
| `--data-source-type`, `-d` | Data source type: `bigquery`, `snowflake`, `file` | `bigquery` |
| `--tag-filter`, `-t` | Filter models by dbt tag | None |
| `--model`, `-m` | Import specific model(s) only | None |
Expand Down
66 changes: 47 additions & 19 deletions sdk/python/feast/cli/dbt_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ def dbt_cmd():
@click.option(
"--entity-column",
"-e",
"entity_columns",
multiple=True,
required=True,
help="Primary key / entity column name (e.g., driver_id, customer_id)",
help="Entity column name (can be specified multiple times, e.g., -e user_id -e merchant_id)",
)
@click.option(
"--data-source-type",
Expand Down Expand Up @@ -89,7 +91,7 @@ def dbt_cmd():
def import_command(
ctx: click.Context,
manifest_path: str,
entity_column: str,
entity_columns: tuple,
data_source_type: str,
timestamp_field: str,
tag_filter: Optional[str],
Expand Down Expand Up @@ -141,6 +143,28 @@ def import_command(
if parser.project_name:
click.echo(f" Project: {parser.project_name}")

# Convert tuple to list and validate
entity_cols: List[str] = list(entity_columns) if entity_columns else []

# Validation: At least one entity required (redundant with required=True but explicit)
if not entity_cols:
click.echo(
f"{Fore.RED}Error: At least one entity column required{Style.RESET_ALL}",
err=True,
)
raise SystemExit(1)

# Validation: No duplicate entity columns
if len(entity_cols) != len(set(entity_cols)):
duplicates = [col for col in entity_cols if entity_cols.count(col) > 1]
click.echo(
f"{Fore.RED}Error: Duplicate entity columns: {', '.join(set(duplicates))}{Style.RESET_ALL}",
err=True,
)
raise SystemExit(1)

click.echo(f"Entity columns: {', '.join(entity_cols)}")

# Get models with filters
model_list: Optional[List[str]] = list(model_names) if model_names else None
models = parser.get_models(model_names=model_list, tag_filter=tag_filter)
Expand Down Expand Up @@ -188,24 +212,28 @@ def import_command(
)
continue

# Validate entity column exists
if entity_column not in column_names:
# Validate ALL entity columns exist
missing_entities = [e for e in entity_cols if e not in column_names]
if missing_entities:
click.echo(
f"{Fore.YELLOW}Warning: Model '{model.name}' missing entity "
f"column '{entity_column}'. Skipping.{Style.RESET_ALL}"
f"column(s): {', '.join(missing_entities)}. Skipping.{Style.RESET_ALL}"
)
continue

# Create or reuse entity
if entity_column not in entities_created:
entity = mapper.create_entity(
name=entity_column,
description="Entity key for dbt models",
)
entities_created[entity_column] = entity
all_objects.append(entity)
else:
entity = entities_created[entity_column]
# Create or reuse entities (one per entity column)
model_entities: List[Any] = []
for entity_col in entity_cols:
if entity_col not in entities_created:
entity = mapper.create_entity(
name=entity_col,
description="Entity key for dbt models",
)
entities_created[entity_col] = entity
all_objects.append(entity)
else:
entity = entities_created[entity_col]
model_entities.append(entity)

# Create data source
data_source = mapper.create_data_source(
Expand All @@ -218,8 +246,8 @@ def import_command(
feature_view = mapper.create_feature_view(
model=model,
source=data_source,
entity_column=entity_column,
entity=entity,
entity_columns=entity_cols,
entities=model_entities,
timestamp_field=timestamp_field,
ttl_days=ttl_days,
exclude_columns=excluded,
Expand All @@ -242,7 +270,7 @@ def import_command(
m
for m in models
if timestamp_field in [c.name for c in m.columns]
and entity_column in [c.name for c in m.columns]
and all(e in [c.name for c in m.columns] for e in entity_cols)
]

# Summary
Expand All @@ -257,7 +285,7 @@ def import_command(

code = generate_feast_code(
models=valid_models,
entity_column=entity_column,
entity_columns=entity_cols,
data_source_type=data_source_type,
timestamp_field=timestamp_field,
ttl_days=ttl_days,
Expand Down
Loading
Loading