Skip to content

Commit 1c8c320

Browse files
committed
Don't allow CTEs to determine semantic levels of aggregates.
The fix for bug #19055 (commit b0cc0a7) allowed CTE references in sub-selects within aggregate functions to affect the semantic levels assigned to such aggregates. It turns out this broke some related cases, leading to assertion failures or strange planner errors such as "unexpected outer reference in CTE query". After experimenting with some alternative rules for assigning the semantic level in such cases, we've come to the conclusion that changing the level is more likely to break things than be helpful. Therefore, this patch undoes what b0cc0a7 changed, and instead installs logic to throw an error if there is any reference to a CTE that's below the semantic level that standard SQL rules would assign to the aggregate based on its contained Var and Aggref nodes. (The SQL standard disallows sub-selects within aggregate functions, so it can't reach the troublesome case and hence has no rule for what to do.) Perhaps someone will come along with a legitimate query that this logic rejects, and if so probably the example will help us craft a level-adjustment rule that works better than what b0cc0a7 did. I'm not holding my breath for that though, because the previous logic had been there for a very long time before bug #19055 without complaints, and that bug report sure looks to have originated from fuzzing not from real usage. Like b0cc0a7, back-patch to all supported branches, though sadly that no longer includes v13. Bug: #19106 Reported-by: Kamil Monicz <kamil@monicz.dev> Author: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/19106-9dd3668a0734cd72@postgresql.org Backpatch-through: 14
1 parent 9a991d4 commit 1c8c320

File tree

3 files changed

+88
-43
lines changed

3 files changed

+88
-43
lines changed

src/backend/parser/parse_agg.c

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ typedef struct
3636
ParseState *pstate;
3737
int min_varlevel;
3838
int min_agglevel;
39+
int min_ctelevel;
40+
RangeTblEntry *min_cte;
3941
int sublevels_up;
4042
} check_agg_arguments_context;
4143

@@ -55,7 +57,8 @@ typedef struct
5557
static int check_agg_arguments(ParseState *pstate,
5658
List *directargs,
5759
List *args,
58-
Expr *filter);
60+
Expr *filter,
61+
int agglocation);
5962
static bool check_agg_arguments_walker(Node *node,
6063
check_agg_arguments_context *context);
6164
static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
@@ -333,7 +336,8 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
333336
min_varlevel = check_agg_arguments(pstate,
334337
directargs,
335338
args,
336-
filter);
339+
filter,
340+
location);
337341

338342
*p_levelsup = min_varlevel;
339343

@@ -634,14 +638,17 @@ static int
634638
check_agg_arguments(ParseState *pstate,
635639
List *directargs,
636640
List *args,
637-
Expr *filter)
641+
Expr *filter,
642+
int agglocation)
638643
{
639644
int agglevel;
640645
check_agg_arguments_context context;
641646

642647
context.pstate = pstate;
643648
context.min_varlevel = -1; /* signifies nothing found yet */
644649
context.min_agglevel = -1;
650+
context.min_ctelevel = -1;
651+
context.min_cte = NULL;
645652
context.sublevels_up = 0;
646653

647654
(void) check_agg_arguments_walker((Node *) args, &context);
@@ -679,6 +686,20 @@ check_agg_arguments(ParseState *pstate,
679686
parser_errposition(pstate, aggloc)));
680687
}
681688

689+
/*
690+
* If there's a non-local CTE that's below the aggregate's semantic level,
691+
* complain. It's not quite clear what we should do to fix up such a case
692+
* (treating the CTE reference like a Var seems wrong), and it's also
693+
* unclear whether there is a real-world use for such cases.
694+
*/
695+
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
696+
ereport(ERROR,
697+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
698+
errmsg("outer-level aggregate cannot use a nested CTE"),
699+
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
700+
context.min_cte->eref->aliasname),
701+
parser_errposition(pstate, agglocation)));
702+
682703
/*
683704
* Now check for vars/aggs in the direct arguments, and throw error if
684705
* needed. Note that we allow a Var of the agg's semantic level, but not
@@ -692,6 +713,7 @@ check_agg_arguments(ParseState *pstate,
692713
{
693714
context.min_varlevel = -1;
694715
context.min_agglevel = -1;
716+
context.min_ctelevel = -1;
695717
(void) check_agg_arguments_walker((Node *) directargs, &context);
696718
if (context.min_varlevel >= 0 && context.min_varlevel < agglevel)
697719
ereport(ERROR,
@@ -707,6 +729,13 @@ check_agg_arguments(ParseState *pstate,
707729
parser_errposition(pstate,
708730
locate_agg_of_level((Node *) directargs,
709731
context.min_agglevel))));
732+
if (context.min_ctelevel >= 0 && context.min_ctelevel < agglevel)
733+
ereport(ERROR,
734+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
735+
errmsg("outer-level aggregate cannot use a nested CTE"),
736+
errdetail("CTE \"%s\" is below the aggregate's semantic level.",
737+
context.min_cte->eref->aliasname),
738+
parser_errposition(pstate, agglocation)));
710739
}
711740
return agglevel;
712741
}
@@ -787,11 +816,6 @@ check_agg_arguments_walker(Node *node,
787816

788817
if (IsA(node, RangeTblEntry))
789818
{
790-
/*
791-
* CTE references act similarly to Vars of the CTE's level. Without
792-
* this we might conclude that the Agg can be evaluated above the CTE,
793-
* leading to trouble.
794-
*/
795819
RangeTblEntry *rte = (RangeTblEntry *) node;
796820

797821
if (rte->rtekind == RTE_CTE)
@@ -803,9 +827,12 @@ check_agg_arguments_walker(Node *node,
803827
/* ignore local CTEs of subqueries */
804828
if (ctelevelsup >= 0)
805829
{
806-
if (context->min_varlevel < 0 ||
807-
context->min_varlevel > ctelevelsup)
808-
context->min_varlevel = ctelevelsup;
830+
if (context->min_ctelevel < 0 ||
831+
context->min_ctelevel > ctelevelsup)
832+
{
833+
context->min_ctelevel = ctelevelsup;
834+
context->min_cte = rte;
835+
}
809836
}
810837
}
811838
return false; /* allow range_table_walker to continue */

src/test/regress/expected/with.out

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2226,36 +2226,44 @@ from int4_tbl;
22262226
--
22272227
-- test for bug #19055: interaction of WITH with aggregates
22282228
--
2229-
-- The reference to cte1 must determine the aggregate's level,
2230-
-- even though it contains no Vars referencing cte1
2231-
explain (verbose, costs off)
2229+
-- For now, we just throw an error if there's a use of a CTE below the
2230+
-- semantic level that the SQL standard assigns to the aggregate.
2231+
-- It's not entirely clear what we could do instead that doesn't risk
2232+
-- breaking more things than it fixes.
22322233
select f1, (with cte1(x,y) as (select 1,2)
22332234
select count((select i4.f1 from cte1))) as ss
22342235
from int4_tbl i4;
2235-
QUERY PLAN
2236-
-----------------------------------
2237-
Seq Scan on public.int4_tbl i4
2238-
Output: i4.f1, (SubPlan 2)
2239-
SubPlan 2
2236+
ERROR: outer-level aggregate cannot use a nested CTE
2237+
LINE 2: select count((select i4.f1 from cte1))) as ss
2238+
^
2239+
DETAIL: CTE "cte1" is below the aggregate's semantic level.
2240+
--
2241+
-- test for bug #19106: interaction of WITH with aggregates
2242+
--
2243+
-- the initial fix for #19055 was too aggressive and broke this case
2244+
explain (verbose, costs off)
2245+
with a as ( select id from (values (1), (2)) as v(id) ),
2246+
b as ( select max((select sum(id) from a)) as agg )
2247+
select agg from b;
2248+
QUERY PLAN
2249+
--------------------------------------------
2250+
Aggregate
2251+
Output: max($0)
2252+
InitPlan 1 (returns $0)
22402253
-> Aggregate
2241-
Output: count($1)
2242-
InitPlan 1 (returns $1)
2243-
-> Result
2244-
Output: i4.f1
2245-
-> Result
2246-
(9 rows)
2247-
2248-
select f1, (with cte1(x,y) as (select 1,2)
2249-
select count((select i4.f1 from cte1))) as ss
2250-
from int4_tbl i4;
2251-
f1 | ss
2252-
-------------+----
2253-
0 | 1
2254-
123456 | 1
2255-
-123456 | 1
2256-
2147483647 | 1
2257-
-2147483647 | 1
2258-
(5 rows)
2254+
Output: sum("*VALUES*".column1)
2255+
-> Values Scan on "*VALUES*"
2256+
Output: "*VALUES*".column1
2257+
-> Result
2258+
(8 rows)
2259+
2260+
with a as ( select id from (values (1), (2)) as v(id) ),
2261+
b as ( select max((select sum(id) from a)) as agg )
2262+
select agg from b;
2263+
agg
2264+
-----
2265+
3
2266+
(1 row)
22592267

22602268
--
22612269
-- test for nested-recursive-WITH bug

src/test/regress/sql/with.sql

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,16 +1067,26 @@ from int4_tbl;
10671067
--
10681068
-- test for bug #19055: interaction of WITH with aggregates
10691069
--
1070-
-- The reference to cte1 must determine the aggregate's level,
1071-
-- even though it contains no Vars referencing cte1
1072-
explain (verbose, costs off)
1070+
-- For now, we just throw an error if there's a use of a CTE below the
1071+
-- semantic level that the SQL standard assigns to the aggregate.
1072+
-- It's not entirely clear what we could do instead that doesn't risk
1073+
-- breaking more things than it fixes.
10731074
select f1, (with cte1(x,y) as (select 1,2)
10741075
select count((select i4.f1 from cte1))) as ss
10751076
from int4_tbl i4;
10761077

1077-
select f1, (with cte1(x,y) as (select 1,2)
1078-
select count((select i4.f1 from cte1))) as ss
1079-
from int4_tbl i4;
1078+
--
1079+
-- test for bug #19106: interaction of WITH with aggregates
1080+
--
1081+
-- the initial fix for #19055 was too aggressive and broke this case
1082+
explain (verbose, costs off)
1083+
with a as ( select id from (values (1), (2)) as v(id) ),
1084+
b as ( select max((select sum(id) from a)) as agg )
1085+
select agg from b;
1086+
1087+
with a as ( select id from (values (1), (2)) as v(id) ),
1088+
b as ( select max((select sum(id) from a)) as agg )
1089+
select agg from b;
10801090

10811091
--
10821092
-- test for nested-recursive-WITH bug

0 commit comments

Comments
 (0)