Skip to content

Commit c2b0824

Browse files
committed
fix: Add get_table_query_string_with_alias() for PostgreSQL subquery aliasing
PostgreSQL requires all subqueries in FROM clauses to have aliases. This adds a new method get_table_query_string_with_alias() to PostgreSQLSource that automatically adds an alias when the source is query-based. The existing get_table_query_string() is preserved for backward compatibility and updated with documentation explaining when to use the new method. Fixes #5605 Signed-off-by: Yassin Nouh <yassinmnouh@gmail.com>
1 parent ce35ce6 commit c2b0824

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,11 +145,44 @@ def get_table_column_names_and_types(
145145
)
146146

147147
def get_table_query_string(self) -> str:
148+
"""Returns a string that can be used to reference this table in SQL.
149+
150+
For query-based sources, returns the query wrapped in parentheses.
151+
152+
Note:
153+
When using the returned string directly in a FROM clause with PostgreSQL,
154+
you may need to add an alias if this is a query-based source. PostgreSQL
155+
requires all subqueries in FROM clauses to have aliases. Consider using
156+
get_table_query_string_with_alias() for automatic aliasing.
157+
"""
148158
if self._postgres_options._table:
149159
return f"{self._postgres_options._table}"
150160
else:
151161
return f"({self._postgres_options._query})"
152162

163+
def get_table_query_string_with_alias(self, alias: str = "subquery") -> str:
164+
"""Returns a string for use in FROM clause with alias for PostgreSQL compatibility.
165+
166+
PostgreSQL requires all subqueries in FROM clauses to have aliases. This method
167+
automatically adds an alias when the source is query-based.
168+
169+
Args:
170+
alias: The alias to use for query-based sources. Defaults to "subquery".
171+
172+
Returns:
173+
For table-based sources: the table name (no alias needed).
174+
For query-based sources: "(query) AS alias".
175+
176+
Example:
177+
>>> source = PostgreSQLSource(query="SELECT * FROM my_table", ...)
178+
>>> entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}"
179+
# Results in: "SELECT id, ts FROM (SELECT * FROM my_table) AS subquery"
180+
"""
181+
if self._postgres_options._table:
182+
return f"{self._postgres_options._table}"
183+
else:
184+
return f"({self._postgres_options._query}) AS {alias}"
185+
153186

154187
class PostgreSQLOptions:
155188
def __init__(

sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,3 +950,139 @@ def test_cli_date_combinations(self):
950950
# Should not fail on parameter validation
951951
stderr_output = result.stderr.decode()
952952
assert "must be provided" not in stderr_output
953+
954+
955+
class TestPostgreSQLSourceQueryStringAlias:
956+
"""
957+
Test suite for get_table_query_string_with_alias() method.
958+
959+
This addresses GitHub issue #5605: PostgreSQL requires all subqueries
960+
in FROM clauses to have aliases.
961+
"""
962+
963+
def test_table_source_get_table_query_string(self):
964+
"""Test get_table_query_string() with table-based source"""
965+
source = PostgreSQLSource(
966+
name="test_source",
967+
table="my_schema.my_table",
968+
timestamp_field="event_timestamp",
969+
)
970+
result = source.get_table_query_string()
971+
assert result == "my_schema.my_table"
972+
973+
def test_query_source_get_table_query_string(self):
974+
"""Test get_table_query_string() with query-based source"""
975+
source = PostgreSQLSource(
976+
name="test_source",
977+
query="SELECT * FROM my_table WHERE active = true",
978+
timestamp_field="event_timestamp",
979+
)
980+
result = source.get_table_query_string()
981+
assert result == "(SELECT * FROM my_table WHERE active = true)"
982+
983+
def test_table_source_with_alias(self):
984+
"""Test get_table_query_string_with_alias() with table-based source returns table without alias"""
985+
source = PostgreSQLSource(
986+
name="test_source",
987+
table="my_schema.my_table",
988+
timestamp_field="event_timestamp",
989+
)
990+
result = source.get_table_query_string_with_alias()
991+
# Table sources don't need aliases
992+
assert result == "my_schema.my_table"
993+
994+
def test_query_source_with_default_alias(self):
995+
"""Test get_table_query_string_with_alias() with query-based source uses default alias"""
996+
source = PostgreSQLSource(
997+
name="test_source",
998+
query="SELECT * FROM my_table WHERE active = true",
999+
timestamp_field="event_timestamp",
1000+
)
1001+
result = source.get_table_query_string_with_alias()
1002+
assert result == "(SELECT * FROM my_table WHERE active = true) AS subquery"
1003+
1004+
def test_query_source_with_custom_alias(self):
1005+
"""Test get_table_query_string_with_alias() with custom alias"""
1006+
source = PostgreSQLSource(
1007+
name="test_source",
1008+
query="SELECT id, name FROM users",
1009+
timestamp_field="event_timestamp",
1010+
)
1011+
result = source.get_table_query_string_with_alias(alias="user_data")
1012+
assert result == "(SELECT id, name FROM users) AS user_data"
1013+
1014+
def test_table_source_with_custom_alias_ignored(self):
1015+
"""Test get_table_query_string_with_alias() ignores alias for table-based sources"""
1016+
source = PostgreSQLSource(
1017+
name="test_source",
1018+
table="events",
1019+
timestamp_field="event_timestamp",
1020+
)
1021+
result = source.get_table_query_string_with_alias(alias="ignored_alias")
1022+
# Alias should be ignored for table sources
1023+
assert result == "events"
1024+
1025+
def test_sql_query_with_alias_is_valid(self):
1026+
"""Test that SQL using get_table_query_string_with_alias() is syntactically valid"""
1027+
source = PostgreSQLSource(
1028+
name="test_source",
1029+
query="SELECT id, ts FROM raw_data",
1030+
timestamp_field="ts",
1031+
)
1032+
1033+
# Construct a SQL query using the new method
1034+
entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}"
1035+
1036+
# Verify SQL is valid using sqlglot
1037+
parsed = sqlglot.parse(entity_sql, dialect="postgres")
1038+
assert len(parsed) == 1
1039+
assert parsed[0] is not None
1040+
1041+
def test_sql_query_without_alias_fails_in_postgres(self):
1042+
"""Test that SQL using get_table_query_string() for query source produces invalid PostgreSQL
1043+
1044+
This demonstrates the issue that get_table_query_string_with_alias() fixes:
1045+
PostgreSQL requires all subqueries in FROM clauses to have aliases.
1046+
"""
1047+
source = PostgreSQLSource(
1048+
name="test_source",
1049+
query="SELECT id, ts FROM raw_data",
1050+
timestamp_field="ts",
1051+
)
1052+
1053+
# Using the old method (without alias) for query-based source
1054+
entity_sql_without_alias = (
1055+
f"SELECT id, ts FROM {source.get_table_query_string()}"
1056+
)
1057+
1058+
# This produces: SELECT id, ts FROM (SELECT id, ts FROM raw_data)
1059+
# which is invalid in PostgreSQL (subquery needs alias)
1060+
# sqlglot is lenient and may parse it, but PostgreSQL would reject it
1061+
assert (
1062+
"AS" not in entity_sql_without_alias
1063+
), "get_table_query_string() should not add alias"
1064+
1065+
# Using the new method (with alias) produces valid SQL
1066+
entity_sql_with_alias = (
1067+
f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}"
1068+
)
1069+
assert "AS subquery" in entity_sql_with_alias
1070+
1071+
def test_complex_query_with_alias(self):
1072+
"""Test get_table_query_string_with_alias() with complex nested query"""
1073+
complex_query = """
1074+
SELECT u.id, u.name, o.total
1075+
FROM users u
1076+
JOIN orders o ON u.id = o.user_id
1077+
WHERE o.created_at > '2023-01-01'
1078+
"""
1079+
source = PostgreSQLSource(
1080+
name="test_source",
1081+
query=complex_query,
1082+
timestamp_field="created_at",
1083+
)
1084+
1085+
result = source.get_table_query_string_with_alias(alias="user_orders")
1086+
assert result.startswith("(")
1087+
assert result.endswith(") AS user_orders")
1088+
assert "SELECT u.id" in result

0 commit comments

Comments
 (0)