Skip to content

Commit 9f5a58a

Browse files
committed
Don't allow CTEs to determine semantic levels of aggregates.
The fix for bug #19055 (commit b0cc0a7) allowed CTE references in sub-selects within aggregate functions to affect the semantic levels assigned to such aggregates. It turns out this broke some related cases, leading to assertion failures or strange planner errors such as "unexpected outer reference in CTE query". After experimenting with some alternative rules for assigning the semantic level in such cases, we've come to the conclusion that changing the level is more likely to break things than be helpful. Therefore, this patch undoes what b0cc0a7 changed, and instead installs logic to throw an error if there is any reference to a CTE that's below the semantic level that standard SQL rules would assign to the aggregate based on its contained Var and Aggref nodes. (The SQL standard disallows sub-selects within aggregate functions, so it can't reach the troublesome case and hence has no rule for what to do.) Perhaps someone will come along with a legitimate query that this logic rejects, and if so probably the example will help us craft a level-adjustment rule that works better than what b0cc0a7 did. I'm not holding my breath for that though, because the previous logic had been there for a very long time before bug #19055 without complaints, and that bug report sure looks to have originated from fuzzing not from real usage. Like b0cc0a7, back-patch to all supported branches, though sadly that no longer includes v13. Bug: #19106 Reported-by: Kamil Monicz <kamil@monicz.dev> Author: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/19106-9dd3668a0734cd72@postgresql.org Backpatch-through: 14
1 parent 3995e4a commit 9f5a58a

File tree

3 files changed

+88
-43
lines changed

3 files changed

+88
-43
lines changed

src/backend/parser/parse_agg.c

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ typedef struct
3535
ParseState *pstate;
3636
int min_varlevel;
3737
int min_agglevel;
38+
int min_ctelevel;
39+
RangeTblEntry *min_cte;
3840
int sublevels_up;
3941
} check_agg_arguments_context;
4042

@@ -54,7 +56,8 @@ typedef struct
5456
static int check_agg_arguments(ParseState *pstate,
5557
List *directargs,
5658
List *args,
57-
Expr *filter);
59+
Expr *filter,
60+
int agglocation);
5861
static bool check_agg_arguments_walker(Node *node,
5962
check_agg_arguments_context *context);
6063
static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
@@ -332,7 +335,8 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
332335
min_varlevel = check_agg_arguments(pstate,
333336
directargs,
334337
args,
335-
filter);
338+
filter,
339+
location);
336340

337341
*p_levelsup = min_varlevel;
338342

@@ -633,14 +637,17 @@ static int
633637
check_agg_arguments(ParseState *pstate,
634638
List *directargs,
635639
List *args,
636-
Expr *filter)
640+
Expr *filter,
641+
int agglocation)
637642
{
638643
int agglevel;
639644
check_agg_arguments_context context;
640645

641646
context.pstate = pstate;
642647
context.min_varlevel = -1; /* signifies nothing found yet */
643648
context.min_agglevel = -1;
649+
context.min_ctelevel = -1;
650+
context.min_cte = NULL;
644651
context.sublevels_up = 0;
645652

646653
(void) check_agg_arguments_walker((Node *) args, &context);
@@ -678,6 +685,20 @@ check_agg_arguments(ParseState *pstate,
678685
parser_errposition(pstate, aggloc)));
679686
}
680687

688+
/*
689+
* If there's a non-local CTE that's below the aggregate's semantic level,
690+
* complain. It's not quite clear what we should do to fix up such a case
691+
* (treating the CTE reference like a Var seems wrong), and it's also
692+
* unclear whether there is a real-world use for such cases.
693+
*/
694+
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
695+
ereport(ERROR,
696+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
697+
errmsg("outer-level aggregate cannot use a nested CTE"),
698+
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
699+
context.min_cte->eref->aliasname),
700+
parser_errposition(pstate, agglocation)));
701+
681702
/*
682703
* Now check for vars/aggs in the direct arguments, and throw error if
683704
* needed. Note that we allow a Var of the agg's semantic level, but not
@@ -691,6 +712,7 @@ check_agg_arguments(ParseState *pstate,
691712
{
692713
context.min_varlevel = -1;
693714
context.min_agglevel = -1;
715+
context.min_ctelevel = -1;
694716
(void) check_agg_arguments_walker((Node *) directargs, &context);
695717
if (context.min_varlevel >= 0 && context.min_varlevel < agglevel)
696718
ereport(ERROR,
@@ -706,6 +728,13 @@ check_agg_arguments(ParseState *pstate,
706728
parser_errposition(pstate,
707729
locate_agg_of_level((Node *) directargs,
708730
context.min_agglevel))));
731+
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
732+
ereport(ERROR,
733+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
734+
errmsg("outer-level aggregate cannot use a nested CTE"),
735+
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
736+
context.min_cte->eref->aliasname),
737+
parser_errposition(pstate, agglocation)));
709738
}
710739
return agglevel;
711740
}
@@ -786,11 +815,6 @@ check_agg_arguments_walker(Node *node,
786815

787816
if (IsA(node, RangeTblEntry))
788817
{
789-
/*
790-
* CTE references act similarly to Vars of the CTE's level. Without
791-
* this we might conclude that the Agg can be evaluated above the CTE,
792-
* leading to trouble.
793-
*/
794818
RangeTblEntry *rte = (RangeTblEntry *) node;
795819

796820
if (rte->rtekind == RTE_CTE)
@@ -802,9 +826,12 @@ check_agg_arguments_walker(Node *node,
802826
/* ignore local CTEs of subqueries */
803827
if (ctelevelsup >= 0)
804828
{
805-
if (context->min_varlevel < 0 ||
806-
context->min_varlevel > ctelevelsup)
807-
context->min_varlevel = ctelevelsup;
829+
if (context->min_ctelevel < 0 ||
830+
context->min_ctelevel > ctelevelsup)
831+
{
832+
context->min_ctelevel = ctelevelsup;
833+
context->min_cte = rte;
834+
}
808835
}
809836
}
810837
return false; /* allow range_table_walker to continue */

src/test/regress/expected/with.out

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2226,36 +2226,44 @@ from int4_tbl;
22262226
--
22272227
-- test for bug #19055: interaction of WITH with aggregates
22282228
--
2229-
-- The reference to cte1 must determine the aggregate's level,
2230-
-- even though it contains no Vars referencing cte1
2231-
explain (verbose, costs off)
2229+
-- For now, we just throw an error if there's a use of a CTE below the
2230+
-- semantic level that the SQL standard assigns to the aggregate.
2231+
-- It's not entirely clear what we could do instead that doesn't risk
2232+
-- breaking more things than it fixes.
22322233
select f1, (with cte1(x,y) as (select 1,2)
22332234
select count((select i4.f1 from cte1))) as ss
22342235
from int4_tbl i4;
2235-
QUERY PLAN
2236-
-----------------------------------
2237-
Seq Scan on public.int4_tbl i4
2238-
Output: i4.f1, (SubPlan 2)
2239-
SubPlan 2
2236+
ERROR: outer-level aggregate cannot use a nested CTE
2237+
LINE 2: select count((select i4.f1 from cte1))) as ss
2238+
^
2239+
DETAIL: CTE "cte1" is below the aggregate's semantic level.
2240+
--
2241+
-- test for bug #19106: interaction of WITH with aggregates
2242+
--
2243+
-- the initial fix for #19055 was too aggressive and broke this case
2244+
explain (verbose, costs off)
2245+
with a as ( select id from (values (1), (2)) as v(id) ),
2246+
b as ( select max((select sum(id) from a)) as agg )
2247+
select agg from b;
2248+
QUERY PLAN
2249+
--------------------------------------------
2250+
Aggregate
2251+
Output: max($0)
2252+
InitPlan 1 (returns $0)
22402253
-> Aggregate
2241-
Output: count($1)
2242-
InitPlan 1 (returns $1)
2243-
-> Result
2244-
Output: i4.f1
2245-
-> Result
2246-
(9 rows)
2247-
2248-
select f1, (with cte1(x,y) as (select 1,2)
2249-
select count((select i4.f1 from cte1))) as ss
2250-
from int4_tbl i4;
2251-
f1 | ss
2252-
-------------+----
2253-
0 | 1
2254-
123456 | 1
2255-
-123456 | 1
2256-
2147483647 | 1
2257-
-2147483647 | 1
2258-
(5 rows)
2254+
Output: sum("*VALUES*".column1)
2255+
-> Values Scan on "*VALUES*"
2256+
Output: "*VALUES*".column1
2257+
-> Result
2258+
(8 rows)
2259+
2260+
with a as ( select id from (values (1), (2)) as v(id) ),
2261+
b as ( select max((select sum(id) from a)) as agg )
2262+
select agg from b;
2263+
agg
2264+
-----
2265+
3
2266+
(1 row)
22592267

22602268
--
22612269
-- test for nested-recursive-WITH bug

src/test/regress/sql/with.sql

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,16 +1067,26 @@ from int4_tbl;
10671067
--
10681068
-- test for bug #19055: interaction of WITH with aggregates
10691069
--
1070-
-- The reference to cte1 must determine the aggregate's level,
1071-
-- even though it contains no Vars referencing cte1
1072-
explain (verbose, costs off)
1070+
-- For now, we just throw an error if there's a use of a CTE below the
1071+
-- semantic level that the SQL standard assigns to the aggregate.
1072+
-- It's not entirely clear what we could do instead that doesn't risk
1073+
-- breaking more things than it fixes.
10731074
select f1, (with cte1(x,y) as (select 1,2)
10741075
select count((select i4.f1 from cte1))) as ss
10751076
from int4_tbl i4;
10761077

1077-
select f1, (with cte1(x,y) as (select 1,2)
1078-
select count((select i4.f1 from cte1))) as ss
1079-
from int4_tbl i4;
1078+
--
1079+
-- test for bug #19106: interaction of WITH with aggregates
1080+
--
1081+
-- the initial fix for #19055 was too aggressive and broke this case
1082+
explain (verbose, costs off)
1083+
with a as ( select id from (values (1), (2)) as v(id) ),
1084+
b as ( select max((select sum(id) from a)) as agg )
1085+
select agg from b;
1086+
1087+
with a as ( select id from (values (1), (2)) as v(id) ),
1088+
b as ( select max((select sum(id) from a)) as agg )
1089+
select agg from b;
10801090

10811091
--
10821092
-- test for nested-recursive-WITH bug

0 commit comments

Comments
 (0)