Skip to content

Commit b853974

Browse files
committed
Don't allow CTEs to determine semantic levels of aggregates.
The fix for bug #19055 (commit b0cc0a7) allowed CTE references in sub-selects within aggregate functions to affect the semantic levels assigned to such aggregates. It turns out this broke some related cases, leading to assertion failures or strange planner errors such as "unexpected outer reference in CTE query". After experimenting with some alternative rules for assigning the semantic level in such cases, we've come to the conclusion that changing the level is more likely to break things than be helpful. Therefore, this patch undoes what b0cc0a7 changed, and instead installs logic to throw an error if there is any reference to a CTE that's below the semantic level that standard SQL rules would assign to the aggregate based on its contained Var and Aggref nodes. (The SQL standard disallows sub-selects within aggregate functions, so it can't reach the troublesome case and hence has no rule for what to do.) Perhaps someone will come along with a legitimate query that this logic rejects, and if so probably the example will help us craft a level-adjustment rule that works better than what b0cc0a7 did. I'm not holding my breath for that though, because the previous logic had been there for a very long time before bug #19055 without complaints, and that bug report sure looks to have originated from fuzzing not from real usage. Like b0cc0a7, back-patch to all supported branches, though sadly that no longer includes v13. Bug: #19106 Reported-by: Kamil Monicz <kamil@monicz.dev> Author: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/19106-9dd3668a0734cd72@postgresql.org Backpatch-through: 14
1 parent 29a3e22 commit b853974

File tree

3 files changed

+88
-43
lines changed

3 files changed

+88
-43
lines changed

src/backend/parser/parse_agg.c

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ typedef struct
3535
ParseState *pstate;
3636
int min_varlevel;
3737
int min_agglevel;
38+
int min_ctelevel;
39+
RangeTblEntry *min_cte;
3840
int sublevels_up;
3941
} check_agg_arguments_context;
4042

@@ -54,7 +56,8 @@ typedef struct
5456
static int check_agg_arguments(ParseState *pstate,
5557
List *directargs,
5658
List *args,
57-
Expr *filter);
59+
Expr *filter,
60+
int agglocation);
5861
static bool check_agg_arguments_walker(Node *node,
5962
check_agg_arguments_context *context);
6063
static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
@@ -332,7 +335,8 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
332335
min_varlevel = check_agg_arguments(pstate,
333336
directargs,
334337
args,
335-
filter);
338+
filter,
339+
location);
336340

337341
*p_levelsup = min_varlevel;
338342

@@ -626,14 +630,17 @@ static int
626630
check_agg_arguments(ParseState *pstate,
627631
List *directargs,
628632
List *args,
629-
Expr *filter)
633+
Expr *filter,
634+
int agglocation)
630635
{
631636
int agglevel;
632637
check_agg_arguments_context context;
633638

634639
context.pstate = pstate;
635640
context.min_varlevel = -1; /* signifies nothing found yet */
636641
context.min_agglevel = -1;
642+
context.min_ctelevel = -1;
643+
context.min_cte = NULL;
637644
context.sublevels_up = 0;
638645

639646
(void) check_agg_arguments_walker((Node *) args, &context);
@@ -671,6 +678,20 @@ check_agg_arguments(ParseState *pstate,
671678
parser_errposition(pstate, aggloc)));
672679
}
673680

681+
/*
682+
* If there's a non-local CTE that's below the aggregate's semantic level,
683+
* complain. It's not quite clear what we should do to fix up such a case
684+
* (treating the CTE reference like a Var seems wrong), and it's also
685+
* unclear whether there is a real-world use for such cases.
686+
*/
687+
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
688+
ereport(ERROR,
689+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
690+
errmsg("outer-level aggregate cannot use a nested CTE"),
691+
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
692+
context.min_cte->eref->aliasname),
693+
parser_errposition(pstate, agglocation)));
694+
674695
/*
675696
* Now check for vars/aggs in the direct arguments, and throw error if
676697
* needed. Note that we allow a Var of the agg's semantic level, but not
@@ -684,6 +705,7 @@ check_agg_arguments(ParseState *pstate,
684705
{
685706
context.min_varlevel = -1;
686707
context.min_agglevel = -1;
708+
context.min_ctelevel = -1;
687709
(void) check_agg_arguments_walker((Node *) directargs, &context);
688710
if (context.min_varlevel >= 0 && context.min_varlevel < agglevel)
689711
ereport(ERROR,
@@ -699,6 +721,13 @@ check_agg_arguments(ParseState *pstate,
699721
parser_errposition(pstate,
700722
locate_agg_of_level((Node *) directargs,
701723
context.min_agglevel))));
724+
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
725+
ereport(ERROR,
726+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
727+
errmsg("outer-level aggregate cannot use a nested CTE"),
728+
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
729+
context.min_cte->eref->aliasname),
730+
parser_errposition(pstate, agglocation)));
702731
}
703732
return agglevel;
704733
}
@@ -779,11 +808,6 @@ check_agg_arguments_walker(Node *node,
779808

780809
if (IsA(node, RangeTblEntry))
781810
{
782-
/*
783-
* CTE references act similarly to Vars of the CTE's level. Without
784-
* this we might conclude that the Agg can be evaluated above the CTE,
785-
* leading to trouble.
786-
*/
787811
RangeTblEntry *rte = (RangeTblEntry *) node;
788812

789813
if (rte->rtekind == RTE_CTE)
@@ -795,9 +819,12 @@ check_agg_arguments_walker(Node *node,
795819
/* ignore local CTEs of subqueries */
796820
if (ctelevelsup >= 0)
797821
{
798-
if (context->min_varlevel < 0 ||
799-
context->min_varlevel > ctelevelsup)
800-
context->min_varlevel = ctelevelsup;
822+
if (context->min_ctelevel < 0 ||
823+
context->min_ctelevel > ctelevelsup)
824+
{
825+
context->min_ctelevel = ctelevelsup;
826+
context->min_cte = rte;
827+
}
801828
}
802829
}
803830
return false; /* allow range_table_walker to continue */

src/test/regress/expected/with.out

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2195,36 +2195,44 @@ from int4_tbl;
21952195
--
21962196
-- test for bug #19055: interaction of WITH with aggregates
21972197
--
2198-
-- The reference to cte1 must determine the aggregate's level,
2199-
-- even though it contains no Vars referencing cte1
2200-
explain (verbose, costs off)
2198+
-- For now, we just throw an error if there's a use of a CTE below the
2199+
-- semantic level that the SQL standard assigns to the aggregate.
2200+
-- It's not entirely clear what we could do instead that doesn't risk
2201+
-- breaking more things than it fixes.
22012202
select f1, (with cte1(x,y) as (select 1,2)
22022203
select count((select i4.f1 from cte1))) as ss
22032204
from int4_tbl i4;
2204-
QUERY PLAN
2205-
-----------------------------------
2206-
Seq Scan on public.int4_tbl i4
2207-
Output: i4.f1, (SubPlan 2)
2208-
SubPlan 2
2205+
ERROR: outer-level aggregate cannot use a nested CTE
2206+
LINE 2: select count((select i4.f1 from cte1))) as ss
2207+
^
2208+
DETAIL: CTE "cte1" is below the aggregate's semantic level.
2209+
--
2210+
-- test for bug #19106: interaction of WITH with aggregates
2211+
--
2212+
-- the initial fix for #19055 was too aggressive and broke this case
2213+
explain (verbose, costs off)
2214+
with a as ( select id from (values (1), (2)) as v(id) ),
2215+
b as ( select max((select sum(id) from a)) as agg )
2216+
select agg from b;
2217+
QUERY PLAN
2218+
--------------------------------------------
2219+
Aggregate
2220+
Output: max($0)
2221+
InitPlan 1 (returns $0)
22092222
-> Aggregate
2210-
Output: count($1)
2211-
InitPlan 1 (returns $1)
2212-
-> Result
2213-
Output: i4.f1
2214-
-> Result
2215-
(9 rows)
2216-
2217-
select f1, (with cte1(x,y) as (select 1,2)
2218-
select count((select i4.f1 from cte1))) as ss
2219-
from int4_tbl i4;
2220-
f1 | ss
2221-
-------------+----
2222-
0 | 1
2223-
123456 | 1
2224-
-123456 | 1
2225-
2147483647 | 1
2226-
-2147483647 | 1
2227-
(5 rows)
2223+
Output: sum("*VALUES*".column1)
2224+
-> Values Scan on "*VALUES*"
2225+
Output: "*VALUES*".column1
2226+
-> Result
2227+
(8 rows)
2228+
2229+
with a as ( select id from (values (1), (2)) as v(id) ),
2230+
b as ( select max((select sum(id) from a)) as agg )
2231+
select agg from b;
2232+
agg
2233+
-----
2234+
3
2235+
(1 row)
22282236

22292237
--
22302238
-- test for nested-recursive-WITH bug

src/test/regress/sql/with.sql

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,16 +1059,26 @@ from int4_tbl;
10591059
--
10601060
-- test for bug #19055: interaction of WITH with aggregates
10611061
--
1062-
-- The reference to cte1 must determine the aggregate's level,
1063-
-- even though it contains no Vars referencing cte1
1064-
explain (verbose, costs off)
1062+
-- For now, we just throw an error if there's a use of a CTE below the
1063+
-- semantic level that the SQL standard assigns to the aggregate.
1064+
-- It's not entirely clear what we could do instead that doesn't risk
1065+
-- breaking more things than it fixes.
10651066
select f1, (with cte1(x,y) as (select 1,2)
10661067
select count((select i4.f1 from cte1))) as ss
10671068
from int4_tbl i4;
10681069

1069-
select f1, (with cte1(x,y) as (select 1,2)
1070-
select count((select i4.f1 from cte1))) as ss
1071-
from int4_tbl i4;
1070+
--
1071+
-- test for bug #19106: interaction of WITH with aggregates
1072+
--
1073+
-- the initial fix for #19055 was too aggressive and broke this case
1074+
explain (verbose, costs off)
1075+
with a as ( select id from (values (1), (2)) as v(id) ),
1076+
b as ( select max((select sum(id) from a)) as agg )
1077+
select agg from b;
1078+
1079+
with a as ( select id from (values (1), (2)) as v(id) ),
1080+
b as ( select max((select sum(id) from a)) as agg )
1081+
select agg from b;
10721082

10731083
--
10741084
-- test for nested-recursive-WITH bug

0 commit comments

Comments
 (0)