Implement GROUP BY DISTINCT

author Tomas Vondra <tomas.vondra@postgresql.org>

Thu, 18 Mar 2021 16:45:38 +0000 (17:45 +0100)

committer Tomas Vondra <tomas.vondra@postgresql.org>

Thu, 18 Mar 2021 17:22:18 +0000 (18:22 +0100)
author Tomas Vondra <tomas.vondra@postgresql.org>
Thu, 18 Mar 2021 16:45:38 +0000 (17:45 +0100)
committer Tomas Vondra <tomas.vondra@postgresql.org>
Thu, 18 Mar 2021 17:22:18 +0000 (18:22 +0100)
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
 index bc0b3cc9fe13825e91cead6264562e31dcadf846..834b83b50982f9572d09ce7c90b6e7af0c4578b0 100644 (file)
 
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -1372,6 +1372,55 @@ GROUP BY GROUPING SETS (
  </programlisting>
     </para>
  
+   <para>
+    <indexterm zone="queries-grouping-sets">
+     <primary>ALL</primary>
+     <secondary>GROUP BY ALL</secondary>
+    </indexterm>
+    <indexterm zone="queries-grouping-sets">
+     <primary>DISTINCT</primary>
+     <secondary>GROUP BY DISTINCT</secondary>
+    </indexterm>
+    When specifying multiple grouping items together, the final set of grouping
+    sets might contain duplicates. For example:
+<programlisting>
+GROUP BY ROLLUP (a, b), ROLLUP (a, c)
+</programlisting>
+    is equivalent to
+<programlisting>
+GROUP BY GROUPING SETS (
+    (a, b, c),
+    (a, b),
+    (a, b),
+    (a, c),
+    (a),
+    (a),
+    (a, c),
+    (a),
+    ()
+)
+</programlisting>
+    If these duplicates are undesirable, they can be removed using the
+    <literal>DISTINCT</literal> clause directly on the <literal>GROUP BY</literal>.
+    Therefore:
+<programlisting>
+GROUP BY <emphasis>DISTINCT</emphasis> ROLLUP (a, b), ROLLUP (a, c)
+</programlisting>
+    is equivalent to
+<programlisting>
+GROUP BY GROUPING SETS (
+    (a, b, c),
+    (a, b),
+    (a, c),
+    (a),
+    ()
+)
+</programlisting>
+    This is not the same as using <literal>SELECT DISTINCT</literal> because the output
+    rows may still contain duplicates.  If any of the ungrouped columns contains NULL,
+    it will be indistinguishable from the NULL used when that same column is grouped.
+   </para>
+
    <note>
     <para>
      The construct <literal>(a, b)</literal> is normally recognized in expressions as
@@ -1560,8 +1609,13 @@ SELECT a "from", b + c AS sum FROM ...
    <sect2 id="queries-distinct">
     <title><literal>DISTINCT</literal></title>
  
+   <indexterm zone="queries-distinct">
+    <primary>ALL</primary>
+    <secondary>SELECT ALL</secondary>
+   </indexterm>
     <indexterm zone="queries-distinct">
      <primary>DISTINCT</primary>
+    <secondary>SELECT DISTINCT</secondary>
     </indexterm>
  
     <indexterm zone="queries-distinct">
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
 index ab9110559948184c063895ee27953d63e94244f3..9c5cf50ef0c4e62a4b569c623760cabb3512577c 100644 (file)
 
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
      [ * | <replaceable class="parameter">expression</replaceable> [ [ AS ] <replaceable class="parameter">output_name</replaceable> ] [, ...] ]
      [ FROM <replaceable class="parameter">from_item</replaceable> [, ...] ]
      [ WHERE <replaceable class="parameter">condition</replaceable> ]
-    [ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
+    [ GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
      [ HAVING <replaceable class="parameter">condition</replaceable> ]
      [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ]
      [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] <replaceable class="parameter">select</replaceable> ]
@@ -778,7 +778,7 @@ WHERE <replaceable class="parameter">condition</replaceable>
     <para>
      The optional <literal>GROUP BY</literal> clause has the general form
  <synopsis>
-GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
+GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...]
  </synopsis>
     </para>
  
@@ -802,7 +802,10 @@ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
      independent <replaceable>grouping sets</replaceable>.  The effect of this is
      equivalent to constructing a <literal>UNION ALL</literal> between
      subqueries with the individual grouping sets as their
-    <literal>GROUP BY</literal> clauses.  For further details on the handling
+    <literal>GROUP BY</literal> clauses.  The optional <literal>DISTINCT</literal>
+    clause removes duplicate sets before processing; it does <emphasis>not</emphasis>
+    transform the <literal>UNION ALL</literal> into a <literal>UNION DISTINCT</literal>.
+    For further details on the handling
      of grouping sets see <xref linkend="queries-grouping-sets"/>.
     </para>
  
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
 index 32eed988ab0763f9b185f6f59a50c144351cef7d..b7165404cdb625770cf9b053b1c05222bd18fb3f 100644 (file)
 
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -482,7 +482,7 @@ T351    Bracketed SQL comments (/*...*/ comments)           YES
  T431   Extended grouping capabilities          YES 
  T432   Nested and concatenated GROUPING SETS           YES 
  T433   Multiargument GROUPING function         YES 
-T434   GROUP BY DISTINCT           NO  
+T434   GROUP BY DISTINCT           YES 
  T441   ABS and MOD functions           YES 
  T461   Symmetric BETWEEN predicate         YES 
  T471   Result sets return value            NO  
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
 index da91cbd2b1e582af5a521d6d0ad435ad8de4c84b..bda379ba91de4b289241fc2bae23c404425bd736 100644 (file)
 
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3135,6 +3135,7 @@ _copyQuery(const Query *from)
     COPY_NODE_FIELD(onConflict);
     COPY_NODE_FIELD(returningList);
     COPY_NODE_FIELD(groupClause);
+   COPY_SCALAR_FIELD(groupDistinct);
     COPY_NODE_FIELD(groupingSets);
     COPY_NODE_FIELD(havingQual);
     COPY_NODE_FIELD(windowClause);
@@ -3221,6 +3222,7 @@ _copySelectStmt(const SelectStmt *from)
     COPY_NODE_FIELD(fromClause);
     COPY_NODE_FIELD(whereClause);
     COPY_NODE_FIELD(groupClause);
+   COPY_SCALAR_FIELD(groupDistinct);
     COPY_NODE_FIELD(havingClause);
     COPY_NODE_FIELD(windowClause);
     COPY_NODE_FIELD(valuesLists);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
 index c2d73626fcc228e7d154f7ce6204582106843949..bc5e9e52fe4267f6965d00f9ec679da1b2698337 100644 (file)
 
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -977,6 +977,7 @@ _equalQuery(const Query *a, const Query *b)
     COMPARE_NODE_FIELD(onConflict);
     COMPARE_NODE_FIELD(returningList);
     COMPARE_NODE_FIELD(groupClause);
+   COMPARE_SCALAR_FIELD(groupDistinct);
     COMPARE_NODE_FIELD(groupingSets);
     COMPARE_NODE_FIELD(havingQual);
     COMPARE_NODE_FIELD(windowClause);
@@ -1053,6 +1054,7 @@ _equalSelectStmt(const SelectStmt *a, const SelectStmt *b)
     COMPARE_NODE_FIELD(fromClause);
     COMPARE_NODE_FIELD(whereClause);
     COMPARE_NODE_FIELD(groupClause);
+   COMPARE_SCALAR_FIELD(groupDistinct);
     COMPARE_NODE_FIELD(havingClause);
     COMPARE_NODE_FIELD(windowClause);
     COMPARE_NODE_FIELD(valuesLists);
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
 index dbf6b30233aa9722863f5a7d1abe909bfd89146e..94fb236dafe6322dc1b636d4f4208572ea2958bc 100644 (file)
 
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -1506,6 +1506,22 @@ list_sort(List *list, list_sort_comparator cmp)
         qsort(list->elements, len, sizeof(ListCell), (qsort_comparator) cmp);
  }
  
+/*
+ * list_sort comparator for sorting a list into ascending int order.
+ */
+int
+list_int_cmp(const ListCell *p1, const ListCell *p2)
+{
+   int         v1 = lfirst_int(p1);
+   int         v2 = lfirst_int(p2);
+
+   if (v1 < v2)
+       return -1;
+   if (v1 > v2)
+       return 1;
+   return 0;
+}
+
  /*
   * list_sort comparator for sorting a list into ascending OID order.
   */
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
 index 6493a03ff80c599f435dea6971e4e5b0088ad074..5054490c58f18df008ff7c36c7bb86325f488339 100644 (file)
 
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2771,6 +2771,7 @@ _outSelectStmt(StringInfo str, const SelectStmt *node)
     WRITE_NODE_FIELD(fromClause);
     WRITE_NODE_FIELD(whereClause);
     WRITE_NODE_FIELD(groupClause);
+   WRITE_BOOL_FIELD(groupDistinct);
     WRITE_NODE_FIELD(havingClause);
     WRITE_NODE_FIELD(windowClause);
     WRITE_NODE_FIELD(valuesLists);
@@ -2996,6 +2997,7 @@ _outQuery(StringInfo str, const Query *node)
     WRITE_NODE_FIELD(onConflict);
     WRITE_NODE_FIELD(returningList);
     WRITE_NODE_FIELD(groupClause);
+   WRITE_BOOL_FIELD(groupDistinct);
     WRITE_NODE_FIELD(groupingSets);
     WRITE_NODE_FIELD(havingQual);
     WRITE_NODE_FIELD(windowClause);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
 index c5e136e9c3c1805ae679bb4178f717939e302518..9b8f81c5231baa9e9b7466bc2f83b530c4d6e875 100644 (file)
 
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -271,6 +271,7 @@ _readQuery(void)
     READ_NODE_FIELD(onConflict);
     READ_NODE_FIELD(returningList);
     READ_NODE_FIELD(groupClause);
+   READ_BOOL_FIELD(groupDistinct);
     READ_NODE_FIELD(groupingSets);
     READ_NODE_FIELD(havingQual);
     READ_NODE_FIELD(windowClause);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
 index 424d25cbd514ce2f07b165c314c19cf8c934b5d4..28b40dd905b065a711f06b135ea67e71b299b107 100644 (file)
 
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2442,7 +2442,7 @@ preprocess_grouping_sets(PlannerInfo *root)
     ListCell   *lc_set;
     grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
  
-   parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+   parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
  
     gd->any_hashable = false;
     gd->unhashable_refs = NULL;
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
 index 0f3a70c49a871408a8c3ef8b4df4a0dacef6ecca..7149724953d1e5d86aee110adb52926bdaae0b5b 100644 (file)
 
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1264,6 +1264,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
                                             qry->sortClause,
                                             EXPR_KIND_GROUP_BY,
                                             false /* allow SQL92 rules */ );
+   qry->groupDistinct = stmt->groupDistinct;
  
     if (stmt->distinctClause == NIL)
     {
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
 index 652be0b96da9519035a8e1b9da07257dc1272a77..fd07e7107d8c8a73ea594b05dbd7c52da140e83c 100644 (file)
 
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -134,6 +134,13 @@ typedef struct SelectLimit
     LimitOption limitOption;
  } SelectLimit;
  
+/* Private struct for the result of group_clause production */
+typedef struct GroupClause
+{
+   bool    distinct;
+   List   *list;
+} GroupClause;
+
  /* ConstraintAttributeSpec yields an integer bitmask of these flags: */
  #define CAS_NOT_DEFERRABLE         0x01
  #define CAS_DEFERRABLE             0x02
@@ -250,6 +257,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
     PartitionBoundSpec  *partboundspec;
     RoleSpec            *rolespec;
     struct SelectLimit  *selectlimit;
+   SetQuantifier    setquantifier;
+   struct GroupClause  *groupclause;
  }
  
  %type <node>   stmt schema_stmt
@@ -405,7 +414,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                 target_list opt_target_list insert_column_list set_target_list
                 set_clause_list set_clause
                 def_list operator_def_list indirection opt_indirection
-               reloption_list group_clause TriggerFuncArgs opclass_item_list opclass_drop_list
+               reloption_list TriggerFuncArgs opclass_item_list opclass_drop_list
                 opclass_purpose opt_opfamily transaction_mode_list_or_empty
                 OptTableFuncElementList TableFuncElementList opt_type_modifiers
                 prep_type_clause
@@ -418,6 +427,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                 vacuum_relation_list opt_vacuum_relation_list
                 drop_option_list
  
+%type <groupclause> group_clause
  %type <list>   group_by_list
  %type <node>   group_by_item empty_grouping_set rollup_clause cube_clause
  %type <node>   grouping_sets_clause
@@ -443,7 +453,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  %type <node>   for_locking_item
  %type <list>   for_locking_clause opt_for_locking_clause for_locking_items
  %type <list>   locked_rels_list
-%type <boolean>    all_or_distinct
+%type <setquantifier> set_quantifier
  
  %type <node>   join_qual
  %type <jtype>  join_type
@@ -11294,7 +11304,8 @@ simple_select:
                     n->intoClause = $4;
                     n->fromClause = $5;
                     n->whereClause = $6;
-                   n->groupClause = $7;
+                   n->groupClause = ($7)->list;
+                   n->groupDistinct = ($7)->distinct;
                     n->havingClause = $8;
                     n->windowClause = $9;
                     $$ = (Node *)n;
@@ -11309,7 +11320,8 @@ simple_select:
                     n->intoClause = $4;
                     n->fromClause = $5;
                     n->whereClause = $6;
-                   n->groupClause = $7;
+                   n->groupClause = ($7)->list;
+                   n->groupDistinct = ($7)->distinct;
                     n->havingClause = $8;
                     n->windowClause = $9;
                     $$ = (Node *)n;
@@ -11334,17 +11346,17 @@ simple_select:
                     n->fromClause = list_make1($2);
                     $$ = (Node *)n;
                 }
-           | select_clause UNION all_or_distinct select_clause
+           | select_clause UNION set_quantifier select_clause
                 {
-                   $$ = makeSetOp(SETOP_UNION, $3, $1, $4);
+                   $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
                 }
-           | select_clause INTERSECT all_or_distinct select_clause
+           | select_clause INTERSECT set_quantifier select_clause
                 {
-                   $$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4);
+                   $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
                 }
-           | select_clause EXCEPT all_or_distinct select_clause
+           | select_clause EXCEPT set_quantifier select_clause
                 {
-                   $$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4);
+                   $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
                 }
         ;
  
@@ -11542,10 +11554,10 @@ opt_table:    TABLE
             | /*EMPTY*/
         ;
  
-all_or_distinct:
-           ALL                                     { $$ = true; }
-           | DISTINCT                              { $$ = false; }
-           | /*EMPTY*/                             { $$ = false; }
+set_quantifier:
+           ALL                                     { $$ = SET_QUANTIFIER_ALL; }
+           | DISTINCT                              { $$ = SET_QUANTIFIER_DISTINCT; }
+           | /*EMPTY*/                             { $$ = SET_QUANTIFIER_DEFAULT; }
         ;
  
  /* We use (NIL) as a placeholder to indicate that all target expressions
@@ -11771,8 +11783,20 @@ first_or_next: FIRST_P                             { $$ = 0; }
   * GroupingSet node of some type.
   */
  group_clause:
-           GROUP_P BY group_by_list                { $$ = $3; }
-           | /*EMPTY*/                             { $$ = NIL; }
+           GROUP_P BY set_quantifier group_by_list
+               {
+                   GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+                   n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+                   n->list = $4;
+                   $$ = n;
+               }
+           | /*EMPTY*/
+               {
+                   GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+                   n->distinct = false;
+                   n->list = NIL;
+                   $$ = n;
+               }
         ;
  
  group_by_list:
@@ -15145,7 +15169,8 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
                     n->targetList = $2;
                     n->fromClause = $3;
                     n->whereClause = $4;
-                   n->groupClause = $5;
+                   n->groupClause = ($5)->list;
+                   n->groupDistinct = ($5)->distinct;
                     n->havingClause = $6;
                     n->windowClause = $7;
                     n->sortClause = $8;
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
 index fd08b9eeff0991172c863c3b64e43bcb69b9d69c..899327aaf4ec6d69a92ab704742a392f85b4bd35 100644 (file)
 
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1071,7 +1071,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
          * The limit of 4096 is arbitrary and exists simply to avoid resource
          * issues from pathological constructs.
          */
-       List       *gsets = expand_grouping_sets(qry->groupingSets, 4096);
+       List       *gsets = expand_grouping_sets(qry->groupingSets, qry->groupDistinct, 4096);
  
         if (!gsets)
             ereport(ERROR,
@@ -1735,6 +1735,33 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
     return (la > lb) ? 1 : (la == lb) ? 0 : -1;
  }
  
+/* list_sort comparator to sort sub-lists by length and contents */
+static int
+cmp_list_len_contents_asc(const ListCell *a, const ListCell *b)
+{
+   int     res = cmp_list_len_asc(a, b);
+
+   if (res == 0)
+   {
+       List           *la = (List *) lfirst(a);
+       List           *lb = (List *) lfirst(b);
+       ListCell       *lca;
+       ListCell       *lcb;
+
+       forboth(lca, la, lcb, lb)
+       {
+           int     va = intVal(lca);
+           int     vb = intVal(lcb);
+           if (va > vb)
+               return 1;
+           if (va < vb)
+               return -1;
+       }
+   }
+
+   return res;
+}
+
  /*
   * Expand a groupingSets clause to a flat list of grouping sets.
   * The returned list is sorted by length, shortest sets first.
@@ -1743,7 +1770,7 @@ cmp_list_len_asc(const ListCell *a, const ListCell *b)
   * some consistency checks.
   */
  List *
-expand_grouping_sets(List *groupingSets, int limit)
+expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit)
  {
     List       *expanded_groups = NIL;
     List       *result = NIL;
@@ -1801,8 +1828,31 @@ expand_grouping_sets(List *groupingSets, int limit)
         result = new_result;
     }
  
-   /* Now sort the lists by length */
-   list_sort(result, cmp_list_len_asc);
+   /* Now sort the lists by length and deduplicate if necessary */
+   if (!groupDistinct || list_length(result) < 2)
+       list_sort(result, cmp_list_len_asc);
+   else
+   {
+       ListCell   *cell;
+       List       *prev;
+
+       /* Sort each groupset individually */
+       foreach(cell, result)
+           list_sort(lfirst(cell), list_int_cmp);
+
+       /* Now sort the list of groupsets by length and contents */
+       list_sort(result, cmp_list_len_contents_asc);
+
+       /* Finally, remove duplicates */
+       prev = list_nth_node(List, result, 0);
+       for_each_from(cell, result, 1)
+       {
+           if (equal(lfirst(cell), prev))
+               foreach_delete_current(result, cell);
+           else
+               prev = lfirst(cell);
+       }
+   }
  
     return result;
  }
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
 index 879288c1394489dc08f8f786ff21b0d989fd279b..f0de2a25c96c2bf2692287515b05efc667c2705b 100644 (file)
 
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -5517,6 +5517,8 @@ get_basic_select_query(Query *query, deparse_context *context,
  
         appendContextKeyword(context, " GROUP BY ",
                              -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+       if (query->groupDistinct)
+           appendStringInfoString(buf, "DISTINCT ");
  
         save_exprkind = context->special_exprkind;
         context->special_exprkind = EXPR_KIND_GROUP_BY;
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
 index 236832a2ca779cd7089c69a061442d266fd92dc3..3a81d4f2670515f1174749bb525fe14c5d667076 100644 (file)
 
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -62,6 +62,14 @@ typedef enum SortByNulls
     SORTBY_NULLS_LAST
  } SortByNulls;
  
+/* Options for [ ALL | DISTINCT ] */
+typedef enum SetQuantifier
+{
+   SET_QUANTIFIER_DEFAULT,
+   SET_QUANTIFIER_ALL,
+   SET_QUANTIFIER_DISTINCT
+} SetQuantifier;
+
  /*
   * Grantable rights are encoded so that we can OR them together in a bitmask.
   * The present representation of AclItem limits us to 16 distinct rights,
@@ -146,6 +154,7 @@ typedef struct Query
     List       *returningList;  /* return-values list (of TargetEntry) */
  
     List       *groupClause;    /* a list of SortGroupClause's */
+   bool        groupDistinct;  /* is the group by clause distinct? */
  
     List       *groupingSets;   /* a list of GroupingSet's if present */
  
@@ -1629,6 +1638,7 @@ typedef struct SelectStmt
     List       *fromClause;     /* the FROM clause */
     Node       *whereClause;    /* WHERE qualification */
     List       *groupClause;    /* GROUP BY clauses */
+   bool        groupDistinct;  /* Is this GROUP BY DISTINCT? */
     Node       *havingClause;   /* HAVING conditional-expression */
     List       *windowClause;   /* WINDOW window_name AS (...), ... */
  
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
 index 404e03f132d6b1185cce257f0f5546764ed36550..30f98c4595ffcf0066c76f634d5d041765779916 100644 (file)
 
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -604,6 +604,7 @@ extern pg_nodiscard List *list_copy_deep(const List *oldlist);
  typedef int (*list_sort_comparator) (const ListCell *a, const ListCell *b);
  extern void list_sort(List *list, list_sort_comparator cmp);
  
+extern int list_int_cmp(const ListCell *p1, const ListCell *p2);
  extern int list_oid_cmp(const ListCell *p1, const ListCell *p2);
  
  #endif                         /* PG_LIST_H */
diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h
 index 0a2546c3ea977fc8d2c4837a1ce47755a2d9f2ca..4dea01752af3a5283039fa1cb141e50c81fccbe8 100644 (file)
 
--- a/src/include/parser/parse_agg.h
+++ b/src/include/parser/parse_agg.h
@@ -26,7 +26,7 @@ extern void transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
  
  extern void parseCheckAggregates(ParseState *pstate, Query *qry);
  
-extern List *expand_grouping_sets(List *groupingSets, int limit);
+extern List *expand_grouping_sets(List *groupingSets, bool groupDistinct, int limit);
  
  extern int get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes);
  
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
 index 7c844c6e09e890894388ced26a728edd32648760..4c467c1b15e69d33fa34d4f99559ba97bd9ae311 100644 (file)
 
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -1929,4 +1929,115 @@ set work_mem to default;
  
  drop table gs_group_1;
  drop table gs_hash_1;
+-- GROUP BY DISTINCT
+-- "normal" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by all rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   | 3
+ 1 |   |  
+ 1 |   |  
+ 1 |   |  
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   | 9
+ 7 |   |  
+ 7 |   |  
+ 7 |   |  
+   |   |  
+(25 rows)
+
+-- ...which is also the default
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   | 3
+ 1 |   |  
+ 1 |   |  
+ 1 |   |  
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   | 9
+ 7 |   |  
+ 7 |   |  
+ 7 |   |  
+   |   |  
+(25 rows)
+
+-- "group by distinct" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by distinct rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   |  
+ 4 |   | 6
+ 4 |   | 6
+ 4 |   |  
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   |  
+   |   |  
+(13 rows)
+
+-- ...which is not the same as "select distinct"
+select distinct a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 |  
+ 1 |   | 3
+ 1 |   |  
+ 4 |   | 6
+ 4 |   |  
+ 7 | 8 | 9
+ 7 | 8 |  
+ 7 |   | 9
+ 7 |   |  
+   |   |  
+(11 rows)
+
  -- end
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
 index 18ae803e9debdf87db0ebf42859a15662c0b4a07..394494470449a28fc3f2e475ea9ff6817ed18902 100644 (file)
 
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -529,4 +529,30 @@ set work_mem to default;
  drop table gs_group_1;
  drop table gs_hash_1;
  
+-- GROUP BY DISTINCT
+
+-- "normal" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by all rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- ...which is also the default
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- "group by distinct" behavior...
+select a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by distinct rollup(a, b), rollup(a, c)
+order by a, b, c;
+
+-- ...which is not the same as "select distinct"
+select distinct a, b, c
+from (values (1, 2, 3), (4, null, 6), (7, 8, 9)) as t (a, b, c)
+group by rollup(a, b), rollup(a, c)
+order by a, b, c;
+
  -- end
author	Tomas Vondra <tomas.vondra@postgresql.org>
	Thu, 18 Mar 2021 16:45:38 +0000 (17:45 +0100)
committer	Tomas Vondra <tomas.vondra@postgresql.org>
	Thu, 18 Mar 2021 17:22:18 +0000 (18:22 +0100)
doc/src/sgml/queries.sgml		patch \| blob \| blame \| history
doc/src/sgml/ref/select.sgml		patch \| blob \| blame \| history
src/backend/catalog/sql_features.txt		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/list.c		patch \| blob \| blame \| history
src/backend/nodes/outfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/readfuncs.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/parser/analyze.c		patch \| blob \| blame \| history
src/backend/parser/gram.y		patch \| blob \| blame \| history
src/backend/parser/parse_agg.c		patch \| blob \| blame \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| blame \| history
src/include/nodes/parsenodes.h		patch \| blob \| blame \| history
src/include/nodes/pg_list.h		patch \| blob \| blame \| history
src/include/parser/parse_agg.h		patch \| blob \| blame \| history
src/test/regress/expected/groupingsets.out		patch \| blob \| blame \| history
src/test/regress/sql/groupingsets.sql		patch \| blob \| blame \| history