From 72b31ddb6e3806981b92e863019af53ef9fa949c Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 19 Dec 2023 12:21:02 +0800 Subject: [PATCH] [AQUMV] Support Group By in origin query. Support Clauses like: Group By column(s), GROUPING SETS, ROLLUP, CUBE in origin query if they could be computed from materialized views's target list. create incremental materialized view mv as select c1 as mc1, c2 as mc2, c3 as mc3 from t1 where c1 > 90; Origin query: select c1, c3, count(c2) from t1 where c1 > 90 group by c1, c3; Could be rewritten to: select mc1, mc3, count(mc2) from mv group by mc1, mc3; Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/optimizer/README.cbdb.aqumv | 2 +- src/backend/optimizer/plan/aqumv.c | 31 +- src/backend/optimizer/plan/planner.c | 2 +- src/include/optimizer/planmain.h | 5 + src/include/optimizer/planner.h | 2 - src/test/regress/expected/aqumv.out | 390 ++++++++++++++++++++++++ src/test/regress/sql/aqumv.sql | 57 ++++ 7 files changed, 476 insertions(+), 13 deletions(-) diff --git a/src/backend/optimizer/README.cbdb.aqumv b/src/backend/optimizer/README.cbdb.aqumv index 3b98267d653..792791e3690 100644 --- a/src/backend/optimizer/README.cbdb.aqumv +++ b/src/backend/optimizer/README.cbdb.aqumv @@ -225,7 +225,7 @@ Below are not supported now: Order by(for origin_query) Join Sublink - Group by + Group By/Grouping Sets/Rollup/Cube (on mv_query) Window Functions CTE Distinct On diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c index e248dab55d5..6279c09d2ef 100644 --- a/src/backend/optimizer/plan/aqumv.c +++ b/src/backend/optimizer/plan/aqumv.c @@ -40,7 +40,10 @@ #include "nodes/pathnodes.h" #include "nodes/pg_list.h" -RelOptInfo *answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel); +RelOptInfo *answer_query_using_materialized_views(PlannerInfo *root, + RelOptInfo *current_rel, + query_pathkeys_callback qp_callback, + void *qp_extra); typedef struct { @@ -84,7 +87,10 @@ static inline Var *copyVarFromTatgetList(List* tlist, int var_index); * This function modifies root(parse and etc.), current_rel in-place. */ RelOptInfo* -answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel) +answer_query_using_materialized_views(PlannerInfo *root, + RelOptInfo *current_rel, + query_pathkeys_callback qp_callback, + void *qp_extra) { Query *parse = root->parse; /* Query of origin SQL. */ Query *mvQuery; /* Query of view. */ @@ -109,12 +115,11 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel List *post_quals = NIL; aqumv_equivalent_transformation_context *context; + /* Group By without agg could be possible though IMMV doesn't support it yet. */ bool can_not_use_mv = (parse->commandType != CMD_SELECT) || (parse->rowMarks != NIL) || parse->hasWindowFuncs || parse->hasDistinctOn || - /* Group By without agg could be possible though IMMV doesn't support it yet. */ - (parse->groupClause != NIL) || (parse->havingQual != NULL) || parse->hasModifyingCTE || parse->sortClause || @@ -343,6 +348,8 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel * It's safe to set hasAggs here. */ mvQuery->hasAggs = parse->hasAggs; + mvQuery->groupClause = parse->groupClause; + mvQuery->groupingSets = parse->groupingSets; /* * AQUMV @@ -400,14 +407,10 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel mvQuery->jointree->quals = (Node *)post_quals; /* Could be NULL, but doesn'y matter for now. */ /* - * AQUMV * Build a plan of new SQL. * AQUMV is cost-based, let planner decide which is better. - * AQUMV_FIXME_MVP: - * no qp_callback function now. - * replcace one-by-one? */ - mv_final_rel = query_planner(subroot, NULL, NULL); + mv_final_rel = query_planner(subroot, qp_callback, qp_extra); /* AQUMV_FIXME_MVP * We don't use STD_FUZZ_FACTOR for cost comparisons like compare_path_costs_fuzzily here. @@ -419,6 +422,16 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel { root->parse = mvQuery; root->processed_tlist = subroot->processed_tlist; + /* + * Update pathkeys which may be changed by qp_callback. + * Set belows if corresponding feature is supported. + * sort_pathkeys + * distinct_pathkey + * window_pathkeys + */ + root->group_pathkeys = subroot->group_pathkeys; + root->query_pathkeys = subroot->query_pathkeys; + /* * AQUMV_FIXME_MVP * Use new query's ecs. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 98cbde9fb6d..a1b9cc79d6f 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1878,7 +1878,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ if (Gp_role == GP_ROLE_DISPATCH && enable_answer_query_using_materialized_views) - current_rel = answer_query_using_materialized_views(root, current_rel); + current_rel = answer_query_using_materialized_views(root, current_rel, standard_qp_callback, &qp_extra); /* * Convert the query's result tlist into PathTarget format. diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index ff17bf6e456..13a0c95266c 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -165,4 +165,9 @@ extern void cdb_extract_plan_dependencies(PlannerInfo *root, Plan *plan); extern void add_proc_oids_for_dump(Oid funcid); +extern RelOptInfo* answer_query_using_materialized_views(PlannerInfo *root, + RelOptInfo *current_rel, + query_pathkeys_callback qp_callback, + void *qp_extra); + #endif /* PLANMAIN_H */ diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 3d58720dfbe..29aac021f38 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -62,8 +62,6 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr); extern bool optimizer_init; -extern RelOptInfo* answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel); - extern void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); #endif /* PLANNER_H */ diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out index 37f99d7e925..1d7ba0d0c0e 100644 --- a/src/test/regress/expected/aqumv.out +++ b/src/test/regress/expected/aqumv.out @@ -985,6 +985,396 @@ select count(c2), count(*) from aqumv_t2 where c1 > 90; 10 | 11 (1 row) +abort; +-- +-- Test Group By clause of origin query. +-- GROUPING SETS +-- ROLLUP +-- CUBE +-- +begin; +create table aqumv_t3(c1 int, c2 int, c3 int) distributed by (c1); +insert into aqumv_t3 select i, i+1, i+2 from generate_series(1, 100) i; +insert into aqumv_t3 values (91, NULL, 95); +analyze aqumv_t3; +create incremental materialized view aqumv_mvt3_0 as + select c1 as mc1, c2 as mc2, c3 as mc3 + from aqumv_t3 where c1 > 90; +analyze aqumv_mvt3_0; +-- Group By +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c3, (count(c2)) + -> HashAggregate + Output: c1, c3, count(c2) + Group Key: aqumv_t3.c1, aqumv_t3.c3 + -> Seq Scan on public.aqumv_t3 + Output: c1, c2, c3 + Filter: (aqumv_t3.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; + c1 | c3 | count +-----+-----+------- + 94 | 96 | 1 + 93 | 95 | 1 + 99 | 101 | 1 + 97 | 99 | 1 + 92 | 94 | 1 + 98 | 100 | 1 + 95 | 97 | 1 + 91 | 93 | 1 + 91 | 95 | 0 + 100 | 102 | 1 + 96 | 98 | 1 +(11 rows) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc3, (count(mc2)) + -> HashAggregate + Output: mc1, mc3, count(mc2) + Group Key: aqumv_mvt3_0.mc1, aqumv_mvt3_0.mc3 + -> Seq Scan on public.aqumv_mvt3_0 + Output: mc1, mc2, mc3 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(9 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; + c1 | c3 | count +-----+-----+------- + 94 | 96 | 1 + 93 | 95 | 1 + 99 | 101 | 1 + 97 | 99 | 1 + 92 | 94 | 1 + 98 | 100 | 1 + 95 | 97 | 1 + 91 | 93 | 1 + 91 | 95 | 0 + 100 | 102 | 1 + 96 | 98 | 1 +(11 rows) + +-- GROUPING SETS +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate + Output: c1, c3, count(c2) + Hash Key: aqumv_t3.c1 + Hash Key: aqumv_t3.c3 + -> Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c3, c2 + -> Seq Scan on public.aqumv_t3 + Output: c1, c3, c2 + Filter: (aqumv_t3.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(11 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); + c1 | c3 | count +-----+-----+------- + 92 | | 1 + 93 | | 1 + 99 | | 1 + 94 | | 1 + 100 | | 1 + 97 | | 1 + 96 | | 1 + 98 | | 1 + 95 | | 1 + 91 | | 1 + | 101 | 1 + | 93 | 1 + | 99 | 1 + | 94 | 1 + | 100 | 1 + | 102 | 1 + | 96 | 1 + | 97 | 1 + | 98 | 1 + | 95 | 1 +(20 rows) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate + Output: mc1, mc3, count(mc2) + Hash Key: aqumv_mvt3_0.mc1 + Hash Key: aqumv_mvt3_0.mc3 + -> Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc3, mc2 + -> Seq Scan on public.aqumv_mvt3_0 + Output: mc1, mc3, mc2 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); + c1 | c3 | count +-----+-----+------- + 92 | | 1 + 93 | | 1 + 99 | | 1 + 94 | | 1 + 100 | | 1 + 96 | | 1 + 97 | | 1 + 98 | | 1 + 95 | | 1 + 91 | | 1 + | 93 | 1 + | 101 | 1 + | 99 | 1 + | 94 | 1 + | 100 | 1 + | 102 | 1 + | 97 | 1 + | 96 | 1 + | 98 | 1 + | 95 | 1 +(20 rows) + +-- ROLLUP +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate + Output: c1, c3, count(c2) + Group Key: aqumv_t3.c1, aqumv_t3.c3 + Group Key: aqumv_t3.c1 + Group Key: () + -> Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c3, c2 + Merge Key: c1, c3 + -> Sort + Output: c1, c3, c2 + Sort Key: aqumv_t3.c1, aqumv_t3.c3 + -> Seq Scan on public.aqumv_t3 + Output: c1, c3, c2 + Filter: (aqumv_t3.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(16 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); + c1 | c3 | count +-----+-----+------- + 91 | 93 | 1 + 91 | 95 | 0 + 91 | | 1 + 92 | 94 | 1 + 92 | | 1 + 93 | 95 | 1 + 93 | | 1 + 94 | 96 | 1 + 94 | | 1 + 95 | 97 | 1 + 95 | | 1 + 96 | 98 | 1 + 96 | | 1 + 97 | 99 | 1 + 97 | | 1 + 98 | 100 | 1 + 98 | | 1 + 99 | 101 | 1 + 99 | | 1 + 100 | 102 | 1 + 100 | | 1 + | | 10 +(22 rows) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); + QUERY PLAN +---------------------------------------------------------------------------------- + GroupAggregate + Output: mc1, mc3, count(mc2) + Group Key: aqumv_mvt3_0.mc1, aqumv_mvt3_0.mc3 + Group Key: aqumv_mvt3_0.mc1 + Group Key: () + -> Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc3, mc2 + Merge Key: mc1, mc3 + -> Sort + Output: mc1, mc3, mc2 + Sort Key: aqumv_mvt3_0.mc1, aqumv_mvt3_0.mc3 + -> Seq Scan on public.aqumv_mvt3_0 + Output: mc1, mc3, mc2 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(15 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); + c1 | c3 | count +-----+-----+------- + 91 | 93 | 1 + 91 | 95 | 0 + 91 | | 1 + 92 | 94 | 1 + 92 | | 1 + 93 | 95 | 1 + 93 | | 1 + 94 | 96 | 1 + 94 | | 1 + 95 | 97 | 1 + 95 | | 1 + 96 | 98 | 1 + 96 | | 1 + 97 | 99 | 1 + 97 | | 1 + 98 | 100 | 1 + 98 | | 1 + 99 | 101 | 1 + 99 | | 1 + 100 | 102 | 1 + 100 | | 1 + | | 10 +(22 rows) + +-- CUBE +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); + QUERY PLAN +----------------------------------------------------------------------------------- + MixedAggregate + Output: c1, c3, count(c2) + Hash Key: aqumv_t3.c3 + Group Key: aqumv_t3.c1, aqumv_t3.c3 + Group Key: aqumv_t3.c1 + Group Key: () + -> Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c3, c2 + Merge Key: c1, c3 + -> Sort + Output: c1, c3, c2 + Sort Key: aqumv_t3.c1, aqumv_t3.c3 + -> Seq Scan on public.aqumv_t3 + Output: c1, c3, c2 + Filter: (aqumv_t3.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(17 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); + c1 | c3 | count +-----+-----+------- + 91 | 93 | 1 + 91 | 95 | 0 + 91 | | 1 + 92 | 94 | 1 + 92 | | 1 + 93 | 95 | 1 + 93 | | 1 + 94 | 96 | 1 + 94 | | 1 + 95 | 97 | 1 + 95 | | 1 + 96 | 98 | 1 + 96 | | 1 + 97 | 99 | 1 + 97 | | 1 + 98 | 100 | 1 + 98 | | 1 + 99 | 101 | 1 + 99 | | 1 + 100 | 102 | 1 + 100 | | 1 + | | 10 + | 101 | 1 + | 93 | 1 + | 99 | 1 + | 94 | 1 + | 100 | 1 + | 102 | 1 + | 97 | 1 + | 96 | 1 + | 98 | 1 + | 95 | 1 +(32 rows) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); + QUERY PLAN +---------------------------------------------------------------------------------- + MixedAggregate + Output: mc1, mc3, count(mc2) + Hash Key: aqumv_mvt3_0.mc3 + Group Key: aqumv_mvt3_0.mc1, aqumv_mvt3_0.mc3 + Group Key: aqumv_mvt3_0.mc1 + Group Key: () + -> Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc3, mc2 + Merge Key: mc1, mc3 + -> Sort + Output: mc1, mc3, mc2 + Sort Key: aqumv_mvt3_0.mc1, aqumv_mvt3_0.mc3 + -> Seq Scan on public.aqumv_mvt3_0 + Output: mc1, mc3, mc2 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(16 rows) + +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); + c1 | c3 | count +-----+-----+------- + 91 | 93 | 1 + 91 | 95 | 0 + 91 | | 1 + 92 | 94 | 1 + 92 | | 1 + 93 | 95 | 1 + 93 | | 1 + 94 | 96 | 1 + 94 | | 1 + 95 | 97 | 1 + 95 | | 1 + 96 | 98 | 1 + 96 | | 1 + 97 | 99 | 1 + 97 | | 1 + 98 | 100 | 1 + 98 | | 1 + 99 | 101 | 1 + 99 | | 1 + 100 | 102 | 1 + 100 | | 1 + | | 10 + | 101 | 1 + | 93 | 1 + | 99 | 1 + | 94 | 1 + | 100 | 1 + | 102 | 1 + | 97 | 1 + | 96 | 1 + | 98 | 1 + | 95 | 1 +(32 rows) + abort; reset optimizer; reset enable_answer_query_using_materialized_views; diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql index 4d81dee5226..86a4de7ac9f 100644 --- a/src/test/regress/sql/aqumv.sql +++ b/src/test/regress/sql/aqumv.sql @@ -287,7 +287,64 @@ set local enable_answer_query_using_materialized_views = on; explain(costs off, verbose) select count(c2), count(*) from aqumv_t2 where c1 > 90; select count(c2), count(*) from aqumv_t2 where c1 > 90; +abort; + +-- +-- Test Group By clause of origin query. +-- GROUPING SETS +-- ROLLUP +-- CUBE +-- +begin; +create table aqumv_t3(c1 int, c2 int, c3 int) distributed by (c1); +insert into aqumv_t3 select i, i+1, i+2 from generate_series(1, 100) i; +insert into aqumv_t3 values (91, NULL, 95); +analyze aqumv_t3; + +create incremental materialized view aqumv_mvt3_0 as + select c1 as mc1, c2 as mc2, c3 as mc3 + from aqumv_t3 where c1 > 90; +analyze aqumv_mvt3_0; + +-- Group By +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by c1, c3; + +-- GROUPING SETS +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by grouping sets((c1), (c3)); +-- ROLLUP +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by rollup(c1, c3); + +-- CUBE +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); +select c1, c3, count(c2) from aqumv_t3 where c1 > 90 group by cube(c1, c3); abort; reset optimizer;