From 4899debf66176fba3199f28d2147a8d113c0cbbc Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 28 Oct 2016 17:03:09 +0200 Subject: [PATCH 8/9] WIP: allow using multiple statistics in clauselist_selectivity --- src/backend/optimizer/path/clausesel.c | 31 +++++++----- src/test/regress/expected/mv_statistics.out | 78 +++++++++++++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/mv_statistics.sql | 60 ++++++++++++++++++++++ 5 files changed, 159 insertions(+), 13 deletions(-) create mode 100644 src/test/regress/expected/mv_statistics.out create mode 100644 src/test/regress/sql/mv_statistics.sql diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index da5c340..c449c96 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -228,15 +228,16 @@ clauselist_selectivity(PlannerInfo *root, (count_mv_attnums(clauses, relid, STATS_TYPE_MCV | STATS_TYPE_HIST) >= 2)) { + Bitmapset *mvattnums; + MVStatisticInfo *mvstat; + /* collect attributes from the compatible conditions */ - Bitmapset *mvattnums = collect_mv_attnums(clauses, relid, - STATS_TYPE_MCV | STATS_TYPE_HIST); + mvattnums = collect_mv_attnums(clauses, relid, + STATS_TYPE_MCV | STATS_TYPE_HIST); /* and search for the statistic covering the most attributes */ - MVStatisticInfo *mvstat = choose_mv_statistics(stats, mvattnums, - STATS_TYPE_MCV | STATS_TYPE_HIST); - - if (mvstat != NULL) /* we have a matching stats */ + while ((mvstat = choose_mv_statistics(stats, mvattnums, + STATS_TYPE_MCV | STATS_TYPE_HIST))) { /* clauses compatible with multi-variate stats */ List *mvclauses = NIL; @@ -250,6 +251,10 @@ clauselist_selectivity(PlannerInfo *root, /* compute the multivariate stats */ s1 *= clauselist_mv_selectivity(root, mvclauses, mvstat); + + /* update the bitmap if attnums using the remaining clauses) */ + mvattnums = collect_mv_attnums(clauses, relid, + STATS_TYPE_MCV | STATS_TYPE_HIST); } } @@ -264,9 +269,7 @@ clauselist_selectivity(PlannerInfo *root, mvattnums = collect_mv_attnums(clauses, relid, STATS_TYPE_FDEPS); /* and search for the statistic covering the most attributes */ - mvstat = choose_mv_statistics(stats, mvattnums, STATS_TYPE_FDEPS); - - if (mvstat != NULL) /* we have a matching stats */ + while ((mvstat = choose_mv_statistics(stats, mvattnums, STATS_TYPE_FDEPS))) { /* clauses compatible with multi-variate stats */ List *mvclauses = NIL; @@ -284,6 +287,9 @@ clauselist_selectivity(PlannerInfo *root, /* compute the multivariate stats (dependencies) */ s1 *= clauselist_mv_selectivity_deps(root, relid, mvclauses, mvstat, varRelid, jointype, sjinfo); + + /* update the bitmap if attnums using the remaining clauses) */ + mvattnums = collect_mv_attnums(clauses, relid, STATS_TYPE_FDEPS); } } @@ -298,9 +304,7 @@ clauselist_selectivity(PlannerInfo *root, mvattnums = collect_mv_attnums(clauses, relid, STATS_TYPE_NDIST); /* and search for the statistic covering the most attributes */ - mvstat = choose_mv_statistics(stats, mvattnums, STATS_TYPE_NDIST); - - if (mvstat != NULL) /* we have a matching stats */ + while ((mvstat = choose_mv_statistics(stats, mvattnums, STATS_TYPE_NDIST))) { /* clauses compatible with multi-variate stats */ List *mvclauses = NIL; @@ -315,6 +319,9 @@ clauselist_selectivity(PlannerInfo *root, /* compute the multivariate stats (dependencies) */ s1 *= clauselist_mv_selectivity_ndist(root, relid, mvclauses, mvstat, varRelid, jointype, sjinfo); + + /* collect attributes from the compatible conditions */ + mvattnums = collect_mv_attnums(clauses, relid, STATS_TYPE_NDIST); } } diff --git a/src/test/regress/expected/mv_statistics.out b/src/test/regress/expected/mv_statistics.out new file mode 100644 index 0000000..7eb6f2e --- /dev/null +++ b/src/test/regress/expected/mv_statistics.out @@ -0,0 +1,78 @@ +-- data type passed by value +CREATE TABLE multi_stats ( + a INT, + b INT, + c INT, + d INT, + e INT, + f INT, + g INT, + h INT +); +-- MCV list on (a,b) +CREATE STATISTICS m1 WITH (mcv) ON (a, b) FROM multi_stats; +-- histogram on (c,d) +CREATE STATISTICS m2 WITH (histogram) ON (c, d) FROM multi_stats; +-- functional dependencies on (e,f) +CREATE STATISTICS m3 WITH (dependencies) ON (e, f) FROM multi_stats; +-- ndistinct coefficients on (g,h) +CREATE STATISTICS m4 WITH (ndistinct) ON (g, h) FROM multi_stats; +-- perfectly correlated groups +INSERT INTO multi_stats +SELECT + i, i/2, -- MCV + i, i + j, -- histogram + k, k/2, -- dependencies + l/5, l/10 -- ndistinct +FROM ( + SELECT + mod(x, 13) AS i, + mod(x, 17) AS j, + mod(x, 11) AS k, + mod(x, 51) AS l + FROM generate_series(1,30000) AS s(x) +) foo; +ANALYZE multi_stats; +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (c >= 3) AND (d <= 10); + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on multi_stats (cost=0.00..821.00 rows=413 width=32) + Filter: ((c >= 3) AND (d <= 10) AND (a = 8) AND (b = 4)) +(2 rows) + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (e = 10) AND (f = 5); + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on multi_stats (cost=0.00..821.00 rows=210 width=32) + Filter: ((a = 8) AND (b = 4) AND (e = 10) AND (f = 5)) +(2 rows) + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (e = 10) AND (f = 5); + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on multi_stats (cost=0.00..821.00 rows=210 width=32) + Filter: ((a = 8) AND (b = 4) AND (e = 10) AND (f = 5)) +(2 rows) + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (g = 2) AND (h = 1); + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on multi_stats (cost=0.00..821.00 rows=210 width=32) + Filter: ((a = 8) AND (b = 4) AND (g = 2) AND (h = 1)) +(2 rows) + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND + (c >= 3) AND (d <= 10) AND + (e = 10) AND (f = 5); + QUERY PLAN +------------------------------------------------------------------------------------- + Seq Scan on multi_stats (cost=0.00..971.00 rows=37 width=32) + Filter: ((c >= 3) AND (d <= 10) AND (a = 8) AND (b = 4) AND (e = 10) AND (f = 5)) +(2 rows) + +DROP TABLE multi_stats; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 36dd618..bd4a294 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -118,4 +118,4 @@ test: event_trigger test: stats # run tests of multivariate stats -test: mv_ndistinct mv_dependencies mv_mcv mv_histogram +test: mv_ndistinct mv_dependencies mv_mcv mv_histogram mv_statistics diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 34f5467..54cc854 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -175,3 +175,4 @@ test: mv_ndistinct test: mv_dependencies test: mv_mcv test: mv_histogram +test: mv_statistics diff --git a/src/test/regress/sql/mv_statistics.sql b/src/test/regress/sql/mv_statistics.sql new file mode 100644 index 0000000..cd12ad0 --- /dev/null +++ b/src/test/regress/sql/mv_statistics.sql @@ -0,0 +1,60 @@ +-- data type passed by value +CREATE TABLE multi_stats ( + a INT, + b INT, + c INT, + d INT, + e INT, + f INT, + g INT, + h INT +); + +-- MCV list on (a,b) +CREATE STATISTICS m1 WITH (mcv) ON (a, b) FROM multi_stats; + +-- histogram on (c,d) +CREATE STATISTICS m2 WITH (histogram) ON (c, d) FROM multi_stats; + +-- functional dependencies on (e,f) +CREATE STATISTICS m3 WITH (dependencies) ON (e, f) FROM multi_stats; + +-- ndistinct coefficients on (g,h) +CREATE STATISTICS m4 WITH (ndistinct) ON (g, h) FROM multi_stats; + +-- perfectly correlated groups +INSERT INTO multi_stats +SELECT + i, i/2, -- MCV + i, i + j, -- histogram + k, k/2, -- dependencies + l/5, l/10 -- ndistinct +FROM ( + SELECT + mod(x, 13) AS i, + mod(x, 17) AS j, + mod(x, 11) AS k, + mod(x, 51) AS l + FROM generate_series(1,30000) AS s(x) +) foo; + +ANALYZE multi_stats; + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (c >= 3) AND (d <= 10); + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (e = 10) AND (f = 5); + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (e = 10) AND (f = 5); + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND (g = 2) AND (h = 1); + +EXPLAIN SELECT * FROM multi_stats + WHERE (a = 8) AND (b = 4) AND + (c >= 3) AND (d <= 10) AND + (e = 10) AND (f = 5); + +DROP TABLE multi_stats; -- 2.5.5