Skip to content

Commit f00484c

Browse files
author
Richard Guo
committed
Fix distinctness check for queries with grouping sets
query_is_distinct_for() is intended to determine whether a query never returns duplicates of the specified columns. For queries using grouping sets, if there are no grouping expressions, the query may contain one or more empty grouping sets. The goal is to detect whether there is exactly one empty grouping set, in which case the query would return a single row and thus be distinct. The previous logic in query_is_distinct_for() was incomplete because the check was insufficiently thorough and could return false when it could have returned true. It failed to consider cases where the DISTINCT clause is used on the GROUP BY, in which case duplicate empty grouping sets are removed, leaving only one. It also did not correctly handle all possible structures of GroupingSet nodes that represent a single empty grouping set. To fix, add a check for the groupDistinct flag, and expand the query's groupingSets tree into a flat list, then verify that the expanded list contains only one element. No backpatch as this could result in plan changes. Author: Richard Guo <guofenglinux@gmail.com> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Discussion: https://postgr.es/m/CAMbWs480Z04NtP8-O55uROq2Zego309+h3hhaZhz6ztmgWLEBw@mail.gmail.com
1 parent c925ad3 commit f00484c

File tree

3 files changed

+127
-7
lines changed

3 files changed

+127
-7
lines changed

src/backend/optimizer/plan/analyzejoins.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "optimizer/placeholder.h"
3232
#include "optimizer/planmain.h"
3333
#include "optimizer/restrictinfo.h"
34+
#include "parser/parse_agg.h"
3435
#include "rewrite/rewriteManip.h"
3536
#include "utils/lsyscache.h"
3637

@@ -1175,6 +1176,8 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
11751176
}
11761177
else if (query->groupingSets)
11771178
{
1179+
List *gsets;
1180+
11781181
/*
11791182
* If we have grouping sets with expressions, we probably don't have
11801183
* uniqueness and analysis would be hard. Punt.
@@ -1184,15 +1187,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
11841187

11851188
/*
11861189
* If we have no groupClause (therefore no grouping expressions), we
1187-
* might have one or many empty grouping sets. If there's just one,
1188-
* then we're returning only one row and are certainly unique. But
1189-
* otherwise, we know we're certainly not unique.
1190+
* might have one or many empty grouping sets. If there's just one,
1191+
* or if the DISTINCT clause is used on the GROUP BY, then we're
1192+
* returning only one row and are certainly unique. But otherwise, we
1193+
* know we're certainly not unique.
11901194
*/
1191-
if (list_length(query->groupingSets) == 1 &&
1192-
((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
1195+
if (query->groupDistinct)
11931196
return true;
1194-
else
1195-
return false;
1197+
1198+
gsets = expand_grouping_sets(query->groupingSets, false, -1);
1199+
1200+
return (list_length(gsets) == 1);
11961201
}
11971202
else
11981203
{

src/test/regress/expected/join.out

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6161,6 +6161,40 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
61616161
Seq Scan on d
61626162
(1 row)
61636163

6164+
-- check that join removal works for a left join when joining a subquery
6165+
-- that is guaranteed to be unique by GROUPING SETS
6166+
explain (costs off)
6167+
select d.* from d left join (select 1 as x from b group by ()) s
6168+
on d.a = s.x;
6169+
QUERY PLAN
6170+
---------------
6171+
Seq Scan on d
6172+
(1 row)
6173+
6174+
explain (costs off)
6175+
select d.* from d left join (select 1 as x from b group by grouping sets(())) s
6176+
on d.a = s.x;
6177+
QUERY PLAN
6178+
---------------
6179+
Seq Scan on d
6180+
(1 row)
6181+
6182+
explain (costs off)
6183+
select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s
6184+
on d.a = s.x;
6185+
QUERY PLAN
6186+
---------------
6187+
Seq Scan on d
6188+
(1 row)
6189+
6190+
explain (costs off)
6191+
select d.* from d left join (select 1 as x from b group by distinct grouping sets((), ())) s
6192+
on d.a = s.x;
6193+
QUERY PLAN
6194+
---------------
6195+
Seq Scan on d
6196+
(1 row)
6197+
61646198
-- similarly, but keying off a DISTINCT clause
61656199
explain (costs off)
61666200
select d.* from d left join (select distinct * from b) s
@@ -6189,6 +6223,55 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
61896223
-> Seq Scan on d
61906224
(8 rows)
61916225

6226+
-- join removal is not possible when the GROUP BY contains non-empty grouping
6227+
-- sets or multiple empty grouping sets
6228+
explain (costs off)
6229+
select d.* from d left join (select 1 as x from b group by rollup(x)) s
6230+
on d.a = s.x;
6231+
QUERY PLAN
6232+
---------------------------------
6233+
Hash Left Join
6234+
Hash Cond: (d.a = (1))
6235+
-> Seq Scan on d
6236+
-> Hash
6237+
-> MixedAggregate
6238+
Hash Key: 1
6239+
Group Key: ()
6240+
-> Seq Scan on b
6241+
(8 rows)
6242+
6243+
explain (costs off)
6244+
select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s
6245+
on d.a = s.x;
6246+
QUERY PLAN
6247+
-----------------------------------------
6248+
Hash Left Join
6249+
Hash Cond: (d.a = (1))
6250+
-> Seq Scan on d
6251+
-> Hash
6252+
-> Append
6253+
-> Result
6254+
Replaces: Aggregate
6255+
-> Result
6256+
Replaces: Aggregate
6257+
(9 rows)
6258+
6259+
explain (costs off)
6260+
select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s
6261+
on d.a = s.x;
6262+
QUERY PLAN
6263+
-----------------------------------------
6264+
Hash Left Join
6265+
Hash Cond: (d.a = (1))
6266+
-> Seq Scan on d
6267+
-> Hash
6268+
-> Append
6269+
-> Result
6270+
Replaces: Aggregate
6271+
-> Result
6272+
Replaces: Aggregate
6273+
(9 rows)
6274+
61926275
-- similarly, but keying off a DISTINCT clause
61936276
explain (costs off)
61946277
select d.* from d left join (select distinct * from b) s

src/test/regress/sql/join.sql

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,6 +2232,24 @@ explain (costs off)
22322232
select d.* from d left join (select * from b group by b.id, b.c_id) s
22332233
on d.a = s.id and d.b = s.c_id;
22342234

2235+
-- check that join removal works for a left join when joining a subquery
2236+
-- that is guaranteed to be unique by GROUPING SETS
2237+
explain (costs off)
2238+
select d.* from d left join (select 1 as x from b group by ()) s
2239+
on d.a = s.x;
2240+
2241+
explain (costs off)
2242+
select d.* from d left join (select 1 as x from b group by grouping sets(())) s
2243+
on d.a = s.x;
2244+
2245+
explain (costs off)
2246+
select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s
2247+
on d.a = s.x;
2248+
2249+
explain (costs off)
2250+
select d.* from d left join (select 1 as x from b group by distinct grouping sets((), ())) s
2251+
on d.a = s.x;
2252+
22352253
-- similarly, but keying off a DISTINCT clause
22362254
explain (costs off)
22372255
select d.* from d left join (select distinct * from b) s
@@ -2245,6 +2263,20 @@ explain (costs off)
22452263
select d.* from d left join (select * from b group by b.id, b.c_id) s
22462264
on d.a = s.id;
22472265

2266+
-- join removal is not possible when the GROUP BY contains non-empty grouping
2267+
-- sets or multiple empty grouping sets
2268+
explain (costs off)
2269+
select d.* from d left join (select 1 as x from b group by rollup(x)) s
2270+
on d.a = s.x;
2271+
2272+
explain (costs off)
2273+
select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s
2274+
on d.a = s.x;
2275+
2276+
explain (costs off)
2277+
select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s
2278+
on d.a = s.x;
2279+
22482280
-- similarly, but keying off a DISTINCT clause
22492281
explain (costs off)
22502282
select d.* from d left join (select distinct * from b) s

0 commit comments

Comments
 (0)