From 83696f24eceb2c9d7a71ffb74171c30bc0c3727a Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sun, 23 Oct 2016 17:38:35 +0200 Subject: [PATCH 6/9] PATCH: multivariate histograms - extends the pg_mv_statistic catalog (add 'hist' fields) - building the histograms during ANALYZE - simple estimation while planning the queries - pg_histogram data type (varlena-based) Includes regression tests mostly equal to those for functional dependencies / MCV lists. A new varlena-based data type for storing serialized histograms. --- doc/src/sgml/catalogs.sgml | 30 + doc/src/sgml/planstats.sgml | 125 ++ doc/src/sgml/ref/create_statistics.sgml | 35 + src/backend/catalog/system_views.sql | 4 +- src/backend/commands/statscmds.c | 11 +- src/backend/nodes/outfuncs.c | 2 + src/backend/optimizer/path/clausesel.c | 606 +++++++- src/backend/optimizer/util/plancat.c | 4 +- src/backend/utils/mvstats/Makefile | 2 +- src/backend/utils/mvstats/README.histogram | 299 ++++ src/backend/utils/mvstats/README.stats | 2 + src/backend/utils/mvstats/common.c | 32 +- src/backend/utils/mvstats/common.h | 8 +- src/backend/utils/mvstats/histogram.c | 2123 ++++++++++++++++++++++++++++ src/bin/psql/describe.c | 15 +- src/include/catalog/pg_cast.h | 3 + src/include/catalog/pg_mv_statistic.h | 22 +- src/include/catalog/pg_proc.h | 13 + src/include/catalog/pg_type.h | 4 + src/include/nodes/relation.h | 2 + src/include/utils/builtins.h | 4 + src/include/utils/mvstats.h | 125 +- src/test/regress/expected/mv_histogram.out | 198 +++ src/test/regress/expected/opr_sanity.out | 3 +- src/test/regress/expected/rules.out | 4 +- src/test/regress/expected/type_sanity.out | 3 +- src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/mv_histogram.sql | 167 +++ 29 files changed, 3801 insertions(+), 48 deletions(-) create mode 100644 src/backend/utils/mvstats/README.histogram create mode 100644 src/backend/utils/mvstats/histogram.c create mode 100644 src/test/regress/expected/mv_histogram.out create mode 100644 src/test/regress/sql/mv_histogram.sql diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index bca03e9..be34e24 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -4307,6 +4307,17 @@ + hist_enabled + bool + + + If true, histogram will be computed for the combination of columns, + covered by the statistics. This does not mean the histogram is already + computed, though. + + + + ndist_built bool @@ -4337,6 +4348,16 @@ + hist_built + bool + + + If true, histogram is already computed and available for use during query + estimation. + + + + stakeys int2vector pg_attribute.attnum @@ -4374,6 +4395,15 @@ + + stahist + pg_histogram + + + Histogram, serialized as pg_histogram type. + + + diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml index 57f9441..2896b04 100644 --- a/doc/src/sgml/planstats.sgml +++ b/doc/src/sgml/planstats.sgml @@ -914,6 +914,131 @@ EXPLAIN ANALYZE SELECT * FROM t WHERE a <= 49 AND b > 49; + + Histograms + + + MCV lists, introduced in the previous section, work very well + for low-cardinality columns (i.e. columns with only very few distinct + values), and for columns with a few very frequent values (and possibly + many rare ones). Histograms, a generalization of per-column histograms + briefly described in , are meant + to address the other cases, i.e. high-cardinality columns, particularly + when there are no frequent values. + + + + Although the example data we've used so far is not a very good match, we + can try creating a histogram instead of the MCV list. With the + histogram in place, you may get a plan like this: + + +DROP STATISTICS s2; +CREATE STATISTICS s3 ON t (a,b) WITH (histogram); +ANALYZE t; +EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 1; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..195.00 rows=100 width=8) (actual time=0.035..2.967 rows=100 loops=1) + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 9900 + Planning time: 0.227 ms + Execution time: 3.189 ms +(5 rows) + + + Which seems quite accurate, however for other combinations of values the + results may be much worse, as illustrated by the following query + + + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..195.00 rows=100 width=8) (actual time=2.771..2.771 rows=0 loops=1) + Filter: ((a = 1) AND (b = 10)) + Rows Removed by Filter: 10000 + Planning time: 0.179 ms + Execution time: 2.812 ms +(5 rows) + + + This is due to histograms tracking ranges of values, not individual values. + That means it's only possible say whether a bucket may contain items + matching the conditions, but it's unclear how many such tuples there + actually are in the bucket. Moreover, for larger tables only a small subset + of rows gets sampled by ANALYZE, causing small variations in + the shape of buckets. + + + + To inspect details of the histogram, we can look into the + pg_mv_stats view + + +SELECT tablename, staname, attnums, histbytes, histinfo + FROM pg_mv_stats WHERE staname = 's3'; + tablename | staname | attnums | histbytes | histinfo +-----------+---------+---------+-----------+------------- + t | s3 | 1 2 | 1928 | nbuckets=64 +(1 row) + + + This shows the histogram has 64 buckets, but as we know there are 100 + distinct combinations of values in the two columns. This means there are + buckets containing multiple combinations, causing the inaccuracy. + + + + Similarly to MCV lists, we can inspect histogram contents + using a function called pg_mv_histogram_buckets. + + +test=# SELECT * FROM pg_mv_histogram_buckets((SELECT oid FROM pg_mv_statistic WHERE staname = 's3'), 0); + index | minvals | maxvals | nullsonly | mininclusive | maxinclusive | frequency | density | bucket_volume +-------+---------+---------+-----------+--------------+--------------+-----------+----------+--------------- + 0 | {0,0} | {3,1} | {f,f} | {t,t} | {f,f} | 0.01 | 1.68 | 0.005952 + 1 | {50,0} | {51,3} | {f,f} | {t,t} | {f,f} | 0.01 | 1.12 | 0.008929 + 2 | {0,25} | {26,31} | {f,f} | {t,t} | {f,f} | 0.01 | 0.28 | 0.035714 +... + 61 | {60,0} | {99,12} | {f,f} | {t,t} | {t,f} | 0.02 | 0.124444 | 0.160714 + 62 | {34,35} | {37,49} | {f,f} | {t,t} | {t,t} | 0.02 | 0.96 | 0.020833 + 63 | {84,35} | {87,49} | {f,f} | {t,t} | {t,t} | 0.02 | 0.96 | 0.020833 +(64 rows) + + + Which confirms there are 64 buckets, with frequencies ranging between 1% + and 2%. The minvals and maxvals show the + bucket boundaries, nullsonly shows which columns contain + only null values (in the given bucket). + + + + Similarly to MCV lists, the planner applies all conditions to + the buckets, and sums the frequencies of the matching ones. For details, + see clauselist_mv_selectivity_histogram function in + clausesel.c. + + + + It's also possible to build MCV lists and a histogram, in which + case ANALYZE will build a MCV lists with the most + frequent values, and a histogram on the remaining part of the sample. + + +DROP STATISTICS s3; +CREATE STATISTICS s4 ON t (a,b) WITH (mcv, histogram); + + + In this case the MCV list and histogram are treated as a single + composed statistics. + + + + For additional information about multivariate histograms, see + src/backend/utils/mvstats/README.histogram. + + + + diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml index e95d8d3..de419d2 100644 --- a/doc/src/sgml/ref/create_statistics.sgml +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -124,6 +124,15 @@ CREATE STATISTICS [ IF NOT EXISTS ] statistics_na + histogram (boolean) + + + Enables histogram for the statistics. + + + + + mcv (boolean) @@ -201,6 +210,32 @@ EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2); + + Create table t3 with two strongly correlated columns, and + a histogram on those two columns: + + +CREATE TABLE t3 ( + a float, + b float +); + +INSERT INTO t3 SELECT mod(i,1000), mod(i,1000) + 50 * (r - 0.5) FROM ( + SELECT i, random() r FROM generate_series(1,1000000) s(i) + ) foo; + +CREATE STATISTICS s3 WITH (histogram) ON (a, b) FROM t3; + +ANALYZE t3; + +-- small overlap +EXPLAIN ANALYZE SELECT * FROM t3 WHERE (a < 500) AND (b > 500); + +-- no overlap +EXPLAIN ANALYZE SELECT * FROM t3 WHERE (a < 400) AND (b > 600); + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index d4d9c24..2501455 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -190,7 +190,9 @@ CREATE VIEW pg_mv_stats AS length(s.standist::bytea) AS ndistbytes, length(S.stadeps::bytea) AS depsbytes, length(S.stamcv::bytea) AS mcvbytes, - pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo + pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo, + length(S.stahist::bytea) AS histbytes, + pg_mv_stats_histogram_info(S.stahist) AS histinfo FROM (pg_mv_statistic S JOIN pg_class C ON (C.oid = S.starelid)) LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace); diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index ef05745..2e91b0c 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -71,7 +71,8 @@ CreateStatistics(CreateStatsStmt *stmt) /* by default build nothing */ bool build_ndistinct = false, build_dependencies = false, - build_mcv = false; + build_mcv = false, + build_histogram = false; Assert(IsA(stmt, CreateStatsStmt)); @@ -172,6 +173,8 @@ CreateStatistics(CreateStatsStmt *stmt) build_dependencies = defGetBoolean(opt); else if (strcmp(opt->defname, "mcv") == 0) build_mcv = defGetBoolean(opt); + else if (strcmp(opt->defname, "histogram") == 0) + build_histogram = defGetBoolean(opt); else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -180,10 +183,10 @@ CreateStatistics(CreateStatsStmt *stmt) } /* Make sure there's at least one statistics type specified. */ - if (!(build_ndistinct || build_dependencies || build_mcv)) + if (!(build_ndistinct || build_dependencies || build_mcv || build_histogram)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("no statistics type (ndistinct, dependencies, mcv) requested"))); + errmsg("no statistics type (ndistinct, dependencies, mcv, histogram) requested"))); stakeys = buildint2vector(attnums, numcols); @@ -207,10 +210,12 @@ CreateStatistics(CreateStatsStmt *stmt) values[Anum_pg_mv_statistic_ndist_enabled - 1] = BoolGetDatum(build_ndistinct); values[Anum_pg_mv_statistic_deps_enabled - 1] = BoolGetDatum(build_dependencies); values[Anum_pg_mv_statistic_mcv_enabled - 1] = BoolGetDatum(build_mcv); + values[Anum_pg_mv_statistic_hist_enabled - 1] = BoolGetDatum(build_histogram); nulls[Anum_pg_mv_statistic_standist - 1] = true; nulls[Anum_pg_mv_statistic_stadeps - 1] = true; nulls[Anum_pg_mv_statistic_stamcv - 1] = true; + nulls[Anum_pg_mv_statistic_stahist - 1] = true; /* insert the tuple into pg_mv_statistic */ mvstatrel = heap_open(MvStatisticRelationId, RowExclusiveLock); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index a9cc9ad..27dbe76 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2204,11 +2204,13 @@ _outMVStatisticInfo(StringInfo str, const MVStatisticInfo *node) WRITE_BOOL_FIELD(ndist_enabled); WRITE_BOOL_FIELD(deps_enabled); WRITE_BOOL_FIELD(mcv_enabled); + WRITE_BOOL_FIELD(hist_enabled); /* built/available statistics */ WRITE_BOOL_FIELD(ndist_built); WRITE_BOOL_FIELD(deps_built); WRITE_BOOL_FIELD(mcv_built); + WRITE_BOOL_FIELD(hist_built); } static void diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index abdbc5b..fddbcc4 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -49,6 +49,7 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause, #define STATS_TYPE_FDEPS 0x01 #define STATS_TYPE_MCV 0x02 +#define STATS_TYPE_HIST 0x04 static bool clause_is_mv_compatible(Node *clause, Index relid, Bitmapset **attnums, int type); @@ -77,12 +78,21 @@ static Selectivity clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses, MVStatisticInfo *mvstats, bool *fullmatch, Selectivity *lowsel); +static Selectivity clauselist_mv_selectivity_histogram(PlannerInfo *root, + List *clauses, MVStatisticInfo *mvstats); + static int update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses, int2vector *stakeys, MCVList mcvlist, int nmatches, char *matches, Selectivity *lowsel, bool *fullmatch, bool is_or); +static int update_match_bitmap_histogram(PlannerInfo *root, List *clauses, + int2vector *stakeys, + MVSerializedHistogram mvhist, + int nmatches, char *matches, + bool is_or); + static bool has_stats(List *stats, int type); static List *find_stats(PlannerInfo *root, Index relid); @@ -93,6 +103,7 @@ static bool stats_type_matches(MVStatisticInfo *stat, int type); #define UPDATE_RESULT(m,r,isor) \ (m) = (isor) ? (Max(m,r)) : (Min(m,r)) + /**************************************************************************** * ROUTINES TO COMPUTE SELECTIVITIES ****************************************************************************/ @@ -121,7 +132,7 @@ static bool stats_type_matches(MVStatisticInfo *stat, int type); * * First we try to reduce the list of clauses by applying (soft) functional * dependencies, and then we try to estimate the selectivity of the reduced - * list of clauses using the multivariate MCV list. + * list of clauses using the multivariate MCV list and histograms. * * Finally we remove the portion of clauses estimated using multivariate stats, * and process the rest of the clauses using the regular per-column stats. @@ -208,16 +219,17 @@ clauselist_selectivity(PlannerInfo *root, * If there are no such stats or not enough attributes, don't waste time * simply skip to estimation using the plain per-column stats. */ - if (has_stats(stats, STATS_TYPE_MCV) && - (count_mv_attnums(clauses, relid, STATS_TYPE_MCV) >= 2)) + if (has_stats(stats, STATS_TYPE_MCV | STATS_TYPE_HIST) && + (count_mv_attnums(clauses, relid, + STATS_TYPE_MCV | STATS_TYPE_HIST) >= 2)) { /* collect attributes from the compatible conditions */ Bitmapset *mvattnums = collect_mv_attnums(clauses, relid, - STATS_TYPE_MCV); + STATS_TYPE_MCV | STATS_TYPE_HIST); /* and search for the statistic covering the most attributes */ MVStatisticInfo *mvstat = choose_mv_statistics(stats, mvattnums, - STATS_TYPE_MCV); + STATS_TYPE_MCV | STATS_TYPE_HIST); if (mvstat != NULL) /* we have a matching stats */ { @@ -226,7 +238,7 @@ clauselist_selectivity(PlannerInfo *root, /* split the clauselist into regular and mv-clauses */ clauses = clauselist_mv_split(root, relid, clauses, &mvclauses, - mvstat, STATS_TYPE_MCV); + mvstat, STATS_TYPE_MCV | STATS_TYPE_HIST); /* we've chosen the histogram to match the clauses */ Assert(mvclauses != NIL); @@ -1178,6 +1190,8 @@ static Selectivity clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStatisticInfo *mvstats) { bool fullmatch = false; + Selectivity s1 = 0.0, + s2 = 0.0; /* * Lowest frequency in the MCV list (may be used as an upper bound for @@ -1191,9 +1205,26 @@ clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStatisticInfo *mvs * order by selectivity (to optimize the MCV/histogram evaluation). */ - /* Evaluate the MCV selectivity */ - return clauselist_mv_selectivity_mcvlist(root, clauses, mvstats, - &fullmatch, &mcv_low); + /* Evaluate the MCV first. */ + s1 = clauselist_mv_selectivity_mcvlist(root, clauses, mvstats, + &fullmatch, &mcv_low); + + /* + * If we got a full equality match on the MCV list, we're done (and the + * estimate is pretty good). + */ + if (fullmatch && (s1 > 0.0)) + return s1; + + /* + * TODO if (fullmatch) without matching MCV item, use the mcv_low + * selectivity as upper bound + */ + + s2 = clauselist_mv_selectivity_histogram(root, clauses, mvstats); + + /* TODO clamp to <= 1.0 (or more strictly, when possible) */ + return s1 + s2; } /* @@ -1379,7 +1410,8 @@ choose_mv_statistics(List *stats, Bitmapset *attnums, int types) /* skip statistics not matching any of the requested types */ if (! ((info->deps_built && (STATS_TYPE_FDEPS & types)) || - (info->mcv_built && (STATS_TYPE_MCV & types)))) + (info->mcv_built && (STATS_TYPE_MCV & types)) || + (info->hist_built && (STATS_TYPE_HIST & types)))) continue; /* count columns covered by the statistics */ @@ -1609,7 +1641,7 @@ mv_compatible_walker(Node *node, mv_compatible_context *context) case F_SCALARGTSEL: /* not compatible with functional dependencies */ - if (!(context->types & STATS_TYPE_MCV)) + if (!(context->types & (STATS_TYPE_MCV | STATS_TYPE_HIST))) return true; /* terminate */ break; @@ -1677,6 +1709,9 @@ stats_type_matches(MVStatisticInfo *stat, int type) if ((type & STATS_TYPE_MCV) && stat->mcv_built) return true; + if ((type & STATS_TYPE_HIST) && stat->hist_built) + return true; + return false; } @@ -1695,6 +1730,9 @@ has_stats(List *stats, int type) /* terminate if we've found at least one matching statistics */ if (stats_type_matches(stat, type)) return true; + + if ((type & STATS_TYPE_HIST) && stat->hist_built) + return true; } return false; @@ -1725,12 +1763,12 @@ find_stats(PlannerInfo *root, Index relid) * * The algorithm works like this: * - * 1) mark all items as 'match' - * 2) walk through all the clauses - * 3) for a particular clause, walk through all the items - * 4) skip items that are already 'no match' - * 5) check clause for items that still match - * 6) sum frequencies for items to get selectivity + * 1) mark all items as 'match' + * 2) walk through all the clauses + * 3) for a particular clause, walk through all the items + * 4) skip items that are already 'no match' + * 5) check clause for items that still match + * 6) sum frequencies for items to get selectivity * * The function also returns the frequency of the least frequent item * on the MCV list, which may be useful for clamping estimate from the @@ -2116,3 +2154,537 @@ update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses, return nmatches; } + +/* + * Estimate selectivity of clauses using a histogram. + * + * If there's no histogram for the stats, the function returns 0.0. + * + * The general idea of this method is similar to how MCV lists are + * processed, except that this introduces the concept of a partial + * match (MCV only works with full match / mismatch). + * + * The algorithm works like this: + * + * 1) mark all buckets as 'full match' + * 2) walk through all the clauses + * 3) for a particular clause, walk through all the buckets + * 4) skip buckets that are already 'no match' + * 5) check clause for buckets that still match (at least partially) + * 6) sum frequencies for buckets to get selectivity + * + * Unlike MCV lists, histograms have a concept of a partial match. In + * that case we use 1/2 the bucket, to minimize the average error. The + * MV histograms are usually less detailed than the per-column ones, + * meaning the sum is often quite high (thanks to combining a lot of + * "partially hit" buckets). + * + * Maybe we could use per-bucket information with number of distinct + * values it contains (for each dimension), and then use that to correct + * the estimate (so with 10 distinct values, we'd use 1/10 of the bucket + * frequency). We might also scale the value depending on the actual + * ndistinct estimate (not just the values observed in the sample). + * + * Another option would be to multiply the selectivities, i.e. if we get + * 'partial match' for a bucket for multiple conditions, we might use + * 0.5^k (where k is the number of conditions), instead of 0.5. This + * probably does not minimize the average error, though. + * + * TODO: This might use a similar shortcut to MCV lists - count buckets + * marked as partial/full match, and terminate once this drop to 0. + * Not sure if it's really worth it - for MCV lists a situation like + * this is not uncommon, but for histograms it's not that clear. + */ +static Selectivity +clauselist_mv_selectivity_histogram(PlannerInfo *root, List *clauses, + MVStatisticInfo *mvstats) +{ + int i; + Selectivity s = 0.0; + Selectivity u = 0.0; + + int nmatches = 0; + char *matches = NULL; + + MVSerializedHistogram mvhist = NULL; + + /* there's no histogram */ + if (!mvstats->hist_built) + return 0.0; + + /* There may be no histogram in the stats (check hist_built flag) */ + mvhist = load_mv_histogram(mvstats->mvoid); + + Assert(mvhist != NULL); + Assert(clauses != NIL); + Assert(list_length(clauses) >= 2); + + /* + * Bitmap of bucket matches (mismatch, partial, full). by default all + * buckets fully match (and we'll eliminate them). + */ + matches = palloc0(sizeof(char) * mvhist->nbuckets); + memset(matches, MVSTATS_MATCH_FULL, sizeof(char) * mvhist->nbuckets); + + nmatches = mvhist->nbuckets; + + /* build the match bitmap */ + update_match_bitmap_histogram(root, clauses, + mvstats->stakeys, mvhist, + nmatches, matches, false); + + /* now, walk through the buckets and sum the selectivities */ + for (i = 0; i < mvhist->nbuckets; i++) + { + /* + * Find out what part of the data is covered by the histogram, so that + * we can 'scale' the selectivity properly (e.g. when only 50% of the + * sample got into the histogram, and the rest is in a MCV list). + * + * TODO This might be handled by keeping a global "frequency" for the + * whole histogram, which might save us some time spent accessing the + * not-matching part of the histogram. Although it's likely in a + * cache, so it's very fast. + */ + u += mvhist->buckets[i]->ntuples; + + if (matches[i] == MVSTATS_MATCH_FULL) + s += mvhist->buckets[i]->ntuples; + else if (matches[i] == MVSTATS_MATCH_PARTIAL) + s += 0.5 * mvhist->buckets[i]->ntuples; + } + +#ifdef DEBUG_MVHIST + debug_histogram_matches(mvhist, matches); +#endif + + /* release the allocated bitmap and deserialized histogram */ + pfree(matches); + pfree(mvhist); + + return s * u; +} + +/* cached result of bucket boundary comparison for a single dimension */ + +#define HIST_CACHE_NOT_FOUND 0x00 +#define HIST_CACHE_FALSE 0x01 +#define HIST_CACHE_TRUE 0x03 +#define HIST_CACHE_MASK 0x02 + +static char +bucket_contains_value(FmgrInfo ltproc, Datum constvalue, + Datum min_value, Datum max_value, + int min_index, int max_index, + bool min_include, bool max_include, + char *callcache) +{ + bool a, + b; + + char min_cached = callcache[min_index]; + char max_cached = callcache[max_index]; + + /* + * First some quick checks on equality - if any of the boundaries equals, + * we have a partial match (so no need to call the comparator). + */ + if (((min_value == constvalue) && (min_include)) || + ((max_value == constvalue) && (max_include))) + return MVSTATS_MATCH_PARTIAL; + + /* Keep the values 0/1 because of the XOR at the end. */ + a = ((min_cached & HIST_CACHE_MASK) >> 1); + b = ((max_cached & HIST_CACHE_MASK) >> 1); + + /* + * If result for the bucket lower bound not in cache, evaluate the + * function and store the result in the cache. + */ + if (!min_cached) + { + a = DatumGetBool(FunctionCall2Coll(<proc, + DEFAULT_COLLATION_OID, + constvalue, min_value)); + /* remember the result */ + callcache[min_index] = (a) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE; + } + + /* And do the same for the upper bound. */ + if (!max_cached) + { + b = DatumGetBool(FunctionCall2Coll(<proc, + DEFAULT_COLLATION_OID, + constvalue, max_value)); + /* remember the result */ + callcache[max_index] = (b) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE; + } + + return (a ^ b) ? MVSTATS_MATCH_PARTIAL : MVSTATS_MATCH_NONE; +} + +static char +bucket_is_smaller_than_value(FmgrInfo opproc, Datum constvalue, + Datum min_value, Datum max_value, + int min_index, int max_index, + bool min_include, bool max_include, + char *callcache, bool isgt) +{ + char min_cached = callcache[min_index]; + char max_cached = callcache[max_index]; + + /* Keep the values 0/1 because of the XOR at the end. */ + bool a = ((min_cached & HIST_CACHE_MASK) >> 1); + bool b = ((max_cached & HIST_CACHE_MASK) >> 1); + + if (!min_cached) + { + a = DatumGetBool(FunctionCall2Coll(&opproc, + DEFAULT_COLLATION_OID, + min_value, + constvalue)); + /* remember the result */ + callcache[min_index] = (a) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE; + } + + if (!max_cached) + { + b = DatumGetBool(FunctionCall2Coll(&opproc, + DEFAULT_COLLATION_OID, + max_value, + constvalue)); + /* remember the result */ + callcache[max_index] = (b) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE; + } + + /* + * Now, we need to combine both results into the final answer, and we need + * to be careful about the 'isgt' variable which kinda inverts the + * meaning. + * + * First, we handle the case when each boundary returns different results. + * In that case the outcome can only be 'partial' match. + */ + if (a != b) + return MVSTATS_MATCH_PARTIAL; + + /* + * When the results are the same, then it depends on the 'isgt' value. + * There are four options: + * + * isgt=false a=b=true => full match isgt=false a=b=false => empty + * isgt=true a=b=true => empty isgt=true a=b=false => full match + * + * We'll cheat a bit, because we know that (a=b) so we'll use just one of + * them. + */ + if (isgt) + return (!a) ? MVSTATS_MATCH_FULL : MVSTATS_MATCH_NONE; + else + return (a) ? MVSTATS_MATCH_FULL : MVSTATS_MATCH_NONE; +} + +/* + * Evaluate clauses using the histogram, and update the match bitmap. + * + * The bitmap may be already partially set, so this is really a way to + * combine results of several clause lists - either when computing + * conditional probability P(A|B) or a combination of AND/OR clauses. + * + * Note: This is not a simple bitmap in the sense that there are more + * than two possible values for each item - no match, partial + * match and full match. So we need 2 bits per item. + * + * TODO: This works with 'bitmap' where each item is represented as a + * char, which is slightly wasteful. Instead, we could use a bitmap + * with 2 bits per item, reducing the size to ~1/4. By using values + * 0, 1 and 3 (instead of 0, 1 and 2), the operations (merging etc.) + * might be performed just like for simple bitmap by using & and |, + * which might be faster than min/max. + */ +static int +update_match_bitmap_histogram(PlannerInfo *root, List *clauses, + int2vector *stakeys, + MVSerializedHistogram mvhist, + int nmatches, char *matches, + bool is_or) +{ + int i; + ListCell *l; + + /* + * Used for caching function calls, only once per deduplicated value. + * + * We know may have up to (2 * nbuckets) values per dimension. It's + * probably overkill, but let's allocate that once for all clauses, to + * minimize overhead. + * + * Also, we only need two bits per value, but this allocates byte per + * value. Might be worth optimizing. + * + * 0x00 - not yet called 0x01 - called, result is 'false' 0x03 - called, + * result is 'true' + */ + char *callcache = palloc(mvhist->nbuckets); + + Assert(mvhist != NULL); + Assert(mvhist->nbuckets > 0); + Assert(nmatches >= 0); + Assert(nmatches <= mvhist->nbuckets); + + Assert(clauses != NIL); + Assert(list_length(clauses) >= 1); + + /* loop through the clauses and do the estimation */ + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + + /* if it's a RestrictInfo, then extract the clause */ + if (IsA(clause, RestrictInfo)) + clause = (Node *) ((RestrictInfo *) clause)->clause; + + /* it's either OpClause, or NullTest */ + if (is_opclause(clause)) + { + OpExpr *expr = (OpExpr *) clause; + bool varonleft = true; + bool ok; + + FmgrInfo opproc; /* operator */ + + fmgr_info(get_opcode(expr->opno), &opproc); + + /* reset the cache (per clause) */ + memset(callcache, 0, mvhist->nbuckets); + + ok = (NumRelids(clause) == 1) && + (is_pseudo_constant_clause(lsecond(expr->args)) || + (varonleft = false, + is_pseudo_constant_clause(linitial(expr->args)))); + + if (ok) + { + FmgrInfo ltproc; + RegProcedure oprrest = get_oprrest(expr->opno); + + Var *var = (varonleft) ? linitial(expr->args) : lsecond(expr->args); + Const *cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args); + bool isgt = (!varonleft); + + TypeCacheEntry *typecache + = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR); + + /* lookup dimension for the attribute */ + int idx = mv_get_index(var->varattno, stakeys); + + fmgr_info(get_opcode(typecache->lt_opr), <proc); + + /* + * Check this for all buckets that still have "true" in the + * bitmap + * + * We already know the clauses use suitable operators (because + * that's how we filtered them). + */ + for (i = 0; i < mvhist->nbuckets; i++) + { + char res = MVSTATS_MATCH_NONE; + + MVSerializedBucket bucket = mvhist->buckets[i]; + + /* histogram boundaries */ + Datum minval, + maxval; + bool mininclude, + maxinclude; + int minidx, + maxidx; + + /* + * For AND-lists, we can also mark NULL buckets as 'no + * match' (and then skip them). For OR-lists this is not + * possible. + */ + if ((!is_or) && bucket->nullsonly[idx]) + matches[i] = MVSTATS_MATCH_NONE; + + /* + * Skip buckets that were already eliminated - this is + * impotant considering how we update the info (we only + * lower the match). We can't really do anything about the + * MATCH_PARTIAL buckets. + */ + if ((!is_or) && (matches[i] == MVSTATS_MATCH_NONE)) + continue; + else if (is_or && (matches[i] == MVSTATS_MATCH_FULL)) + continue; + + /* lookup the values and cache of function calls */ + minidx = bucket->min[idx]; + maxidx = bucket->max[idx]; + + minval = mvhist->values[idx][bucket->min[idx]]; + maxval = mvhist->values[idx][bucket->max[idx]]; + + mininclude = bucket->min_inclusive[idx]; + maxinclude = bucket->max_inclusive[idx]; + + /* + * TODO Maybe it's possible to add here a similar + * optimization as for the MCV lists: + * + * (nmatches == 0) && AND-list => all eliminated (FALSE) + * (nmatches == N) && OR-list => all eliminated (TRUE) + * + * But it's more complex because of the partial matches. + */ + + /* + * If it's not a "<" or ">" or "=" operator, just ignore + * the clause. Otherwise note the relid and attnum for the + * variable. + * + * TODO I'm really unsure the handling of 'isgt' flag + * (that is, clauses with reverse order of + * variable/constant) is correct. I wouldn't be surprised + * if there was some mixup. Using the lt/gt operators + * instead of messing with the opproc could make it + * simpler. It would however be using a different operator + * than the query, although it's not any shadier than + * using the selectivity function as is done currently. + */ + switch (oprrest) + { + case F_SCALARLTSEL: /* Var < Const */ + case F_SCALARGTSEL: /* Var > Const */ + + res = bucket_is_smaller_than_value(opproc, cst->constvalue, + minval, maxval, + minidx, maxidx, + mininclude, maxinclude, + callcache, isgt); + break; + + case F_EQSEL: + + /* + * We only check whether the value is within the + * bucket, using the lt operator, and we also + * check for equality with the boundaries. + */ + + res = bucket_contains_value(ltproc, cst->constvalue, + minval, maxval, + minidx, maxidx, + mininclude, maxinclude, + callcache); + break; + } + + UPDATE_RESULT(matches[i], res, is_or); + + } + } + } + else if (IsA(clause, NullTest)) + { + NullTest *expr = (NullTest *) clause; + Var *var = (Var *) (expr->arg); + + /* FIXME proper matching attribute to dimension */ + int idx = mv_get_index(var->varattno, stakeys); + + /* + * Walk through the buckets and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining buckets that might possibly match. + */ + for (i = 0; i < mvhist->nbuckets; i++) + { + MVSerializedBucket bucket = mvhist->buckets[i]; + + /* + * Skip buckets that were already eliminated - this is + * impotant considering how we update the info (we only lower + * the match) + */ + if ((!is_or) && (matches[i] == MVSTATS_MATCH_NONE)) + continue; + else if (is_or && (matches[i] == MVSTATS_MATCH_FULL)) + continue; + + /* if the clause mismatches the bucket, set it as MATCH_NONE */ + if ((expr->nulltesttype == IS_NULL) + && (!bucket->nullsonly[idx])) + UPDATE_RESULT(matches[i], MVSTATS_MATCH_NONE, is_or); + + else if ((expr->nulltesttype == IS_NOT_NULL) && + (bucket->nullsonly[idx])) + UPDATE_RESULT(matches[i], MVSTATS_MATCH_NONE, is_or); + } + } + else if (or_clause(clause) || and_clause(clause)) + { + /* + * AND/OR clause, with all clauses compatible with the selected MV + * stat + */ + + int i; + BoolExpr *orclause = ((BoolExpr *) clause); + List *orclauses = orclause->args; + + /* match/mismatch bitmap for each bucket */ + int or_nmatches = 0; + char *or_matches = NULL; + + Assert(orclauses != NIL); + Assert(list_length(orclauses) >= 2); + + /* number of matching buckets */ + or_nmatches = mvhist->nbuckets; + + /* by default none of the buckets matches the clauses */ + or_matches = palloc0(sizeof(char) * or_nmatches); + + if (or_clause(clause)) + { + /* OR clauses assume nothing matches, initially */ + memset(or_matches, MVSTATS_MATCH_NONE, sizeof(char) * or_nmatches); + or_nmatches = 0; + } + else + { + /* AND clauses assume nothing matches, initially */ + memset(or_matches, MVSTATS_MATCH_FULL, sizeof(char) * or_nmatches); + } + + /* build the match bitmap for the OR-clauses */ + or_nmatches = update_match_bitmap_histogram(root, orclauses, + stakeys, mvhist, + or_nmatches, or_matches, or_clause(clause)); + + /* merge the bitmap into the existing one */ + for (i = 0; i < mvhist->nbuckets; i++) + { + /* + * Merge the result into the bitmap (Min for AND, Max for OR). + * + * FIXME this does not decrease the number of matches + */ + UPDATE_RESULT(matches[i], or_matches[i], is_or); + } + + pfree(or_matches); + + } + else + elog(ERROR, "unknown clause type: %d", clause->type); + } + + /* free the call cache */ + pfree(callcache); + + return nmatches; +} diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 9dd4e83..c804e13 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1287,7 +1287,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation) mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup); /* unavailable stats are not interesting for the planner */ - if (mvstat->deps_built || mvstat->ndist_built || mvstat->mcv_built) + if (mvstat->deps_built || mvstat->ndist_built || mvstat->mcv_built || mvstat->hist_built) { info = makeNode(MVStatisticInfo); @@ -1298,11 +1298,13 @@ get_relation_statistics(RelOptInfo *rel, Relation relation) info->ndist_enabled = mvstat->ndist_enabled; info->deps_enabled = mvstat->deps_enabled; info->mcv_enabled = mvstat->mcv_enabled; + info->hist_enabled = mvstat->hist_enabled; /* built/available statistics */ info->ndist_built = mvstat->ndist_built; info->deps_built = mvstat->deps_built; info->mcv_built = mvstat->mcv_built; + info->hist_built = mvstat->hist_built; /* stakeys */ adatum = SysCacheGetAttr(MVSTATOID, htup, diff --git a/src/backend/utils/mvstats/Makefile b/src/backend/utils/mvstats/Makefile index d5d47ba..d4b88e9 100644 --- a/src/backend/utils/mvstats/Makefile +++ b/src/backend/utils/mvstats/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/utils/mvstats top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = common.o dependencies.o mcv.o mvdist.o +OBJS = common.o dependencies.o histogram.o mcv.o mvdist.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/mvstats/README.histogram b/src/backend/utils/mvstats/README.histogram new file mode 100644 index 0000000..a182fa3 --- /dev/null +++ b/src/backend/utils/mvstats/README.histogram @@ -0,0 +1,299 @@ +Multivariate histograms +======================= + +Histograms on individual attributes consist of buckets represented by ranges, +covering the domain of the attribute. That is, each bucket is a [min,max] +interval, and contains all values in this range. The histogram is built in such +a way that all buckets have about the same frequency. + +Multivariate histograms are an extension into n-dimensional space - the buckets +are n-dimensional intervals (i.e. n-dimensional rectagles), covering the domain +of the combination of attributes. That is, each bucket has a vector of lower +and upper boundaries, denoted min[i] and max[i] (where i = 1..n). + +In addition to the boundaries, each bucket tracks additional info: + + * frequency (fraction of tuples in the bucket) + * whether the boundaries are inclusive or exclusive + * whether the dimension contains only NULL values + * number of distinct values in each dimension (for building only) + +It's possible that in the future we'll multiple histogram types, with different +features. We do however expect all the types to share the same representation +(buckets as ranges) and only differ in how we build them. + +The current implementation builds non-overlapping buckets, that may not be true +for some histogram types and the code should not rely on this assumption. There +are interesting types of histograms (or algorithms) with overlapping buckets. + +When used on low-cardinality data, histograms usually perform considerably worse +than MCV lists (which are a good fit for this kind of data). This is especially +true on label-like values, where ordering of the values is mostly unrelated to +meaning of the data, as proper ordering is crucial for histograms. + +On high-cardinality data the histograms are usually a better choice, because MCV +lists can't represent the distribution accurately enough. + + +Selectivity estimation +---------------------- + +The estimation is implemented in clauselist_mv_selectivity_histogram(), and +works very similarly to clauselist_mv_selectivity_mcvlist. + +The main difference is that while MCV lists support exact matches, histograms +often result in approximate matches - e.g. with equality we can only say if +the constant would be part of the bucket, but not whether it really is there +or what fraction of the bucket it corresponds to. In this case we rely on +some defaults just like in the per-column histograms. + +The current implementation uses histograms to estimates those types of clauses +(think of WHERE conditions): + + (a) equality clauses WHERE (a = 1) AND (b = 2) + (b) inequality clauses WHERE (a < 1) AND (b >= 2) + (c) NULL clauses WHERE (a IS NULL) AND (b IS NOT NULL) + (d) OR-clauses WHERE (a = 1) OR (b = 2) + +Similarly to MCV lists, it's possible to add support for additional types of +clauses, for example: + + (e) multi-var clauses WHERE (a > b) + +and so on. These are tasks for the future, not yet implemented. + + +When evaluating a clause on a bucket, we may get one of three results: + + (a) FULL_MATCH - The bucket definitely matches the clause. + + (b) PARTIAL_MATCH - The bucket matches the clause, but not necessarily all + the tuples it represents. + + (c) NO_MATCH - The bucket definitely does not match the clause. + +This may be illustrated using a range [1, 5], which is essentially a 1-D bucket. +With clause + + WHERE (a < 10) => FULL_MATCH (all range values are below + 10, so the whole bucket matches) + + WHERE (a < 3) => PARTIAL_MATCH (there may be values matching + the clause, but we don't know how many) + + WHERE (a < 0) => NO_MATCH (the whole range is above 1, so + no values from the bucket can match) + +Some clauses may produce only some of those results - for example equality +clauses may never produce FULL_MATCH as we always hit only part of the bucket +(we can't match both boundaries at the same time). This results in less accurate +estimates compared to MCV lists, where we can hit a MCV items exactly (there's +no PARTIAL match in MCV). + +There are also clauses that may not produce any PARTIAL_MATCH results. A nice +example of that is 'IS [NOT] NULL' clause, which either matches the bucket +completely (FULL_MATCH) or not at all (NO_MATCH), thanks to how the NULL-buckets +are constructed. + +Computing the total selectivity estimate is trivial - simply sum selectivities +from all the FULL_MATCH and PARTIAL_MATCH buckets (but for buckets marked with +PARTIAL_MATCH, multiply the frequency by 0.5 to minimize the average error). + + +Building a histogram +--------------------- + +The algorithm of building a histogram in general is quite simple: + + (a) create an initial bucket (containing all sample rows) + + (b) create NULL buckets (by splitting the initial bucket) + + (c) repeat + + (1) choose bucket to split next + + (2) terminate if no bucket that might be split found, or if we've + reached the maximum number of buckets (16384) + + (3) choose dimension to partition the bucket by + + (4) partition the bucket by the selected dimension + +The main complexity is hidden in steps (c.1) and (c.3), i.e. how we choose the +bucket and dimension for the split, as discussed in the next section. + + +Partitioning criteria +--------------------- + +Similarly to one-dimensional histograms, we want to produce buckets with roughly +the same frequency. + +We also need to produce "regular" buckets, because buckets with one dimension +much longer than the others are very likely to match a lot of conditions (which +increases error, even if the bucket frequency is very low). + +This is especially important when handling OR-clauses, because in that case each +clause may add buckets independently. With AND-clauses all the clauses have to +match each bucket, which makes this issue somewhat less concenrning. + +To achieve this, we choose the largest bucket (containing the most sample rows), +but we only choose buckets that can actually be split (have at least 3 different +combinations of values). + +Then we choose the "longest" dimension of the bucket, which is computed by using +the distinct values in the sample as a measure. + +For details see functions select_bucket_to_partition() and partition_bucket(), +which also includes further discussion. + + +The current limit on number of buckets (16384) is mostly arbitrary, but chosen +so that it guarantees we don't exceed the number of distinct values indexable by +uint16 in any of the dimensions. In practice we could handle more buckets as we +index each dimension separately and the splits should use the dimensions evenly. + +Also, histograms this large (with 16k values in multiple dimensions) would be +quite expensive to build and process, so the 16k limit is rather reasonable. + +The actual number of buckets is also related to statistics target, because we +require MIN_BUCKET_ROWS (10) tuples per bucket before a split, so we can't have +more than (2 * 300 * target / 10) buckets. For the default target (100) this +evaluates to ~6k. + + +NULL handling (create_null_buckets) +----------------------------------- + +When building histograms on a single attribute, we first filter out NULL values. +In the multivariate case, we can't really do that because the rows may contain +a mix of NULL and non-NULL values in different columns (so we can't simply +filter all of them out). + +For this reason, the histograms are built in a way so that for each bucket, each +dimension only contains only NULL or non-NULL values. Building the NULL-buckets +happens as the first step in the build, by the create_null_buckets() function. +The number of NULL buckets, as produced by this function, has a clear upper +boundary (2^N) where N is the number of dimensions (attributes the histogram is +built on). Or rather 2^K where K is the number of attributes that are not marked +as not-NULL. + +The buckets with NULL dimensions are then subject to the same build algorithm +(i.e. may be split into smaller buckets) just like any other bucket, but may +only be split by non-NULL dimension. + + +Serialization +------------- + +To store the histogram in pg_mv_statistic table, it is serialized into a more +efficient form. We also use the representation for estimation, i.e. we don't +fully deserialize the histogram. + +For example the boundary values are deduplicated to minimize the required space. +How much redundancy is there, actually? Let's assume there are no NULL values, +so we start with a single bucket - in that case we have 2*N boundaries. Each +time we split a bucket we introduce one new value (in the "middle" of one of +the dimensions), and keep boundries for all the other dimensions. So after K +splits, we have up to + + 2*N + K + +unique boundary values (we may have fewe values, if the same value is used for +several splits). But after K splits we do have (K+1) buckets, so + + (K+1) * 2 * N + +boundary values. Using e.g. N=4 and K=999, we arrive to those numbers: + + 2*N + K = 1007 + (K+1) * 2 * N = 8000 + +wich means a lot of redundancy. It's somewhat counter-intuitive that the number +of distinct values does not really depend on the number of dimensions (except +for the initial bucket, but that's negligible compared to the total). + +By deduplicating the values and replacing them with 16-bit indexes (uint16), we +reduce the required space to + + 1007 * 8 + 8000 * 2 ~= 24kB + +which is significantly less than 64kB required for the 'raw' histogram (assuming +the values are 8B). + +While the bytea compression (pglz) might achieve the same reduction of space, +the deduplicated representation is used to optimize the estimation by caching +results of function calls for already visited values. This significantly +reduces the number of calls to (often quite expensive) operators. + +Note: Of course, this reasoning only holds for histograms built by the algorithm +that simply splits the buckets in half. Other histograms types (e.g. containing +overlapping buckets) may behave differently and require different serialization. + +Serialized histograms are marked with 'magic' constant, to make it easier to +check the bytea value really is a serialized histogram. + + +varlena compression +------------------- + +This serialization may however disable automatic varlena compression, the array +of unique values is placed at the beginning of the serialized form. Which is +exactly the chunk used by pglz to check if the data is compressible, and it +will probably decide it's not very compressible. This is similar to the issue +we had with JSONB initially. + +Maybe storing buckets first would make it work, as the buckets may be better +compressible. + +On the other hand the serialization is actually a context-aware compression, +usually compressing to ~30% (or even less, with large data types). So the lack +of additional pglz compression may be acceptable. + + +Deserialization +--------------- + +The deserialization is not a perfect inverse of the serialization, as we keep +the deduplicated arrays. This reduces the amount of memory and also allows +optimizations during estimation (e.g. we can cache results for the distinct +values, saving expensive function calls). + + +Inspecting the histogram +------------------------ + +Inspecting the regular (per-attribute) histograms is trivial, as it's enough +to select the columns from pg_stats - the data is encoded as anyarray, so we +simply get the text representation of the array. + +With multivariate histograms it's not that simple due to the possible mix of +data types in the histogram. It might be possible to produce similar array-like +text representation, but that'd unnecessarily complicate further processing +and analysis of the histogram. Instead, there's a SRF function that allows +access to lower/upper boundaries, frequencies etc. + + SELECT * FROM pg_mv_histogram_buckets(); + +It has two input parameters: + + oid - OID of the histogram (pg_mv_statistic.staoid) + otype - type of output + +and produces a table with these columns: + + - bucket ID (0...nbuckets-1) + - lower bucket boundaries (string array) + - upper bucket boundaries (string array) + - nulls only dimensions (boolean array) + - lower boundary inclusive (boolean array) + - upper boundary includive (boolean array) + - frequency (double precision) + +The 'otype' accepts three values, determining what will be returned in the +lower/upper boundary arrays: + + - 0 - values stored in the histogram, encoded as text + - 1 - indexes into the deduplicated arrays + - 2 - idnexes into the deduplicated arrays, scaled to [0,1] diff --git a/src/backend/utils/mvstats/README.stats b/src/backend/utils/mvstats/README.stats index 8d3d268..9cc1c3e 100644 --- a/src/backend/utils/mvstats/README.stats +++ b/src/backend/utils/mvstats/README.stats @@ -18,6 +18,8 @@ Currently we only have two kinds of multivariate statistics (b) MCV lists (README.mcv) + (c) multivariate histograms (README.histogram) + Compatible clause types ----------------------- diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c index fc8eae2..82f4e4a 100644 --- a/src/backend/utils/mvstats/common.c +++ b/src/backend/utils/mvstats/common.c @@ -13,6 +13,7 @@ * *------------------------------------------------------------------------- */ +#include "postgres.h" #include "common.h" #include "utils/array.h" @@ -24,7 +25,7 @@ static List *list_mv_stats(Oid relid); static void update_mv_stats(Oid relid, MVNDistinct ndistinct, MVDependencies dependencies, - MCVList mcvlist, + MCVList mcvlist, MVHistogram histogram, int2vector *attrs, VacAttrStats **stats); /* @@ -57,7 +58,8 @@ build_mv_stats(Relation onerel, double totalrows, MVNDistinct ndistinct = NULL; MVDependencies deps = NULL; MCVList mcvlist = NULL; - int numrows_filtered = 0; + MVHistogram histogram = NULL; + int numrows_filtered = numrows; VacAttrStats **stats = NULL; int numatts = 0; @@ -102,8 +104,12 @@ build_mv_stats(Relation onerel, double totalrows, if (stat->mcv_enabled) mcvlist = build_mv_mcvlist(numrows, rows, attrs, stats, &numrows_filtered); - /* store the statistics in the catalog */ - update_mv_stats(stat->mvoid, ndistinct, deps, mcvlist, attrs, stats); + /* build a multivariate histogram on the columns */ + if ((numrows_filtered > 0) && (stat->hist_enabled)) + histogram = build_mv_histogram(numrows_filtered, rows, attrs, stats, numrows); + + /* store the histogram / MCV list in the catalog */ + update_mv_stats(stat->mvoid, ndistinct, deps, mcvlist, histogram, attrs, stats); } } @@ -187,6 +193,8 @@ list_mv_stats(Oid relid) info->deps_built = stats->deps_built; info->mcv_enabled = stats->mcv_enabled; info->mcv_built = stats->mcv_built; + info->hist_enabled = stats->hist_enabled; + info->hist_built = stats->hist_built; result = lappend(result, info); } @@ -255,7 +263,8 @@ find_mv_attnums(Oid mvoid, Oid *relid) */ static void update_mv_stats(Oid mvoid, - MVNDistinct ndistinct, MVDependencies dependencies, MCVList mcvlist, + MVNDistinct ndistinct, MVDependencies dependencies, + MCVList mcvlist, MVHistogram histogram, int2vector *attrs, VacAttrStats **stats) { HeapTuple stup, @@ -297,15 +306,26 @@ update_mv_stats(Oid mvoid, values[Anum_pg_mv_statistic_stamcv - 1] = PointerGetDatum(data); } + if (histogram != NULL) + { + bytea *data = serialize_mv_histogram(histogram, attrs, stats); + + nulls[Anum_pg_mv_statistic_stahist - 1] = (data == NULL); + values[Anum_pg_mv_statistic_stahist - 1] + = PointerGetDatum(data); + } + /* always replace the value (either by bytea or NULL) */ replaces[Anum_pg_mv_statistic_standist - 1] = true; replaces[Anum_pg_mv_statistic_stadeps - 1] = true; replaces[Anum_pg_mv_statistic_stamcv - 1] = true; + replaces[Anum_pg_mv_statistic_stahist - 1] = true; /* always change the availability flags */ nulls[Anum_pg_mv_statistic_ndist_built - 1] = false; nulls[Anum_pg_mv_statistic_deps_built - 1] = false; nulls[Anum_pg_mv_statistic_mcv_built - 1] = false; + nulls[Anum_pg_mv_statistic_hist_built - 1] = false; nulls[Anum_pg_mv_statistic_stakeys - 1] = false; @@ -313,12 +333,14 @@ update_mv_stats(Oid mvoid, replaces[Anum_pg_mv_statistic_ndist_built - 1] = true; replaces[Anum_pg_mv_statistic_deps_built - 1] = true; replaces[Anum_pg_mv_statistic_mcv_built - 1] = true; + replaces[Anum_pg_mv_statistic_hist_built - 1] = true; replaces[Anum_pg_mv_statistic_stakeys - 1] = true; values[Anum_pg_mv_statistic_ndist_built - 1] = BoolGetDatum(ndistinct != NULL); values[Anum_pg_mv_statistic_deps_built - 1] = BoolGetDatum(dependencies != NULL); values[Anum_pg_mv_statistic_mcv_built - 1] = BoolGetDatum(mcvlist != NULL); + values[Anum_pg_mv_statistic_hist_built - 1] = BoolGetDatum(histogram != NULL); values[Anum_pg_mv_statistic_stakeys - 1] = PointerGetDatum(attrs); diff --git a/src/backend/utils/mvstats/common.h b/src/backend/utils/mvstats/common.h index fe56f51..96c0317 100644 --- a/src/backend/utils/mvstats/common.h +++ b/src/backend/utils/mvstats/common.h @@ -77,7 +77,7 @@ MultiSortSupport multi_sort_init(int ndims); void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, int dim, VacAttrStats **vacattrstats); -int multi_sort_compare(const void *a, const void *b, void *arg); +int multi_sort_compare(const void *a, const void *b, void *arg); int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss); @@ -86,9 +86,9 @@ int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss); /* comparators, used when constructing multivariate stats */ -int compare_datums_simple(Datum a, Datum b, SortSupport ssup); -int compare_scalars_simple(const void *a, const void *b, void *arg); -int compare_scalars_partition(const void *a, const void *b, void *arg); +int compare_datums_simple(Datum a, Datum b, SortSupport ssup); +int compare_scalars_simple(const void *a, const void *b, void *arg); +int compare_scalars_partition(const void *a, const void *b, void *arg); void *bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, diff --git a/src/backend/utils/mvstats/histogram.c b/src/backend/utils/mvstats/histogram.c new file mode 100644 index 0000000..fc0c9c2 --- /dev/null +++ b/src/backend/utils/mvstats/histogram.c @@ -0,0 +1,2123 @@ +/*------------------------------------------------------------------------- + * + * histogram.c + * POSTGRES multivariate histograms + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/mvstats/histogram.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" + +#include "utils/bytea.h" +#include "utils/lsyscache.h" + +#include "common.h" +#include + + +static MVBucket create_initial_mv_bucket(int numrows, HeapTuple *rows, + int2vector *attrs, + VacAttrStats **stats); + +static MVBucket select_bucket_to_partition(int nbuckets, MVBucket *buckets); + +static MVBucket partition_bucket(MVBucket bucket, int2vector *attrs, + VacAttrStats **stats, + int *ndistvalues, Datum **distvalues); + +static MVBucket copy_mv_bucket(MVBucket bucket, uint32 ndimensions); + +static void update_bucket_ndistinct(MVBucket bucket, int2vector *attrs, + VacAttrStats **stats); + +static void update_dimension_ndistinct(MVBucket bucket, int dimension, + int2vector *attrs, + VacAttrStats **stats, + bool update_boundaries); + +static void create_null_buckets(MVHistogram histogram, int bucket_idx, + int2vector *attrs, VacAttrStats **stats); + +static Datum *build_ndistinct(int numrows, HeapTuple *rows, int2vector *attrs, + VacAttrStats **stats, int i, int *nvals); + +/* + * Each serialized bucket needs to store (in this order): + * + * - number of tuples (float) + * - number of distinct (float) + * - min inclusive flags (ndim * sizeof(bool)) + * - max inclusive flags (ndim * sizeof(bool)) + * - null dimension flags (ndim * sizeof(bool)) + * - min boundary indexes (2 * ndim * sizeof(uint16)) + * - max boundary indexes (2 * ndim * sizeof(uint16)) + * + * So in total: + * + * ndim * (4 * sizeof(uint16) + 3 * sizeof(bool)) + (2 * sizeof(float)) + */ +#define BUCKET_SIZE(ndims) \ + (ndims * (4 * sizeof(uint16) + 3 * sizeof(bool)) + sizeof(float)) + +/* pointers into a flat serialized bucket of BUCKET_SIZE(n) bytes */ +#define BUCKET_NTUPLES(b) (*(float*)b) +#define BUCKET_MIN_INCL(b,n) ((bool*)(b + sizeof(float))) +#define BUCKET_MAX_INCL(b,n) (BUCKET_MIN_INCL(b,n) + n) +#define BUCKET_NULLS_ONLY(b,n) (BUCKET_MAX_INCL(b,n) + n) +#define BUCKET_MIN_INDEXES(b,n) ((uint16*)(BUCKET_NULLS_ONLY(b,n) + n)) +#define BUCKET_MAX_INDEXES(b,n) ((BUCKET_MIN_INDEXES(b,n) + n)) + +/* can't split bucket with less than 10 rows */ +#define MIN_BUCKET_ROWS 10 + +/* + * Data used while building the histogram. + */ +typedef struct HistogramBuildData +{ + + float ndistinct; /* frequency of distinct values */ + + HeapTuple *rows; /* aray of sample rows */ + uint32 numrows; /* number of sample rows (array size) */ + + /* + * Number of distinct values in each dimension. This is used when building + * the histogram (and is not serialized/deserialized). + */ + uint32 *ndistincts; + +} HistogramBuildData; + +typedef HistogramBuildData *HistogramBuild; + +/* + * builds a multivariate algorithm + * + * The build algorithm is iterative - initially a single bucket containing all + * the sample rows is formed, and then repeatedly split into smaller buckets. + * In each step the largest bucket (in some sense) is chosen to be split next. + * + * The criteria for selecting the largest bucket (and the dimension for the + * split) needs to be elaborate enough to produce buckets of roughly the same + * size, and also regular shape (not very long in one dimension). + * + * The current algorithm works like this: + * + * build NULL-buckets (create_null_buckets) + * + * while [maximum number of buckets not reached] + * + * choose bucket to partition (largest bucket) + * if no bucket to partition + * terminate the algorithm + * + * choose bucket dimension to partition (largest dimension) + * split the bucket into two buckets + * + * See the discussion at select_bucket_to_partition and partition_bucket for + * more details about the algorithm. + */ +MVHistogram +build_mv_histogram(int numrows, HeapTuple *rows, int2vector *attrs, + VacAttrStats **stats, int numrows_total) +{ + int i; + int numattrs = attrs->dim1; + + int *ndistvalues; + Datum **distvalues; + + MVHistogram histogram; + + HeapTuple *rows_copy = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple)); + + memcpy(rows_copy, rows, sizeof(HeapTuple) * numrows); + + Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS)); + + /* build histogram header */ + + histogram = (MVHistogram) palloc0(sizeof(MVHistogramData)); + + histogram->magic = MVSTAT_HIST_MAGIC; + histogram->type = MVSTAT_HIST_TYPE_BASIC; + + histogram->nbuckets = 1; + histogram->ndimensions = numattrs; + + /* create max buckets (better than repalloc for short-lived objects) */ + histogram->buckets + = (MVBucket *) palloc0(MVSTAT_HIST_MAX_BUCKETS * sizeof(MVBucket)); + + /* create the initial bucket, covering the whole sample set */ + histogram->buckets[0] + = create_initial_mv_bucket(numrows, rows_copy, attrs, stats); + + /* + * Collect info on distinct values in each dimension (used later to select + * dimension to partition). + */ + ndistvalues = (int *) palloc0(sizeof(int) * numattrs); + distvalues = (Datum **) palloc0(sizeof(Datum *) * numattrs); + + for (i = 0; i < numattrs; i++) + distvalues[i] = build_ndistinct(numrows, rows, attrs, stats, i, + &ndistvalues[i]); + + /* + * Split the initial bucket into buckets that don't mix NULL and non-NULL + * values in a single dimension. + */ + create_null_buckets(histogram, 0, attrs, stats); + + /* + * Do the actual histogram build - select a bucket and split it. + */ + while (histogram->nbuckets < MVSTAT_HIST_MAX_BUCKETS) + { + MVBucket bucket = select_bucket_to_partition(histogram->nbuckets, + histogram->buckets); + + /* no buckets eligible for partitioning */ + if (bucket == NULL) + break; + + /* we modify the bucket in-place and add one new bucket */ + histogram->buckets[histogram->nbuckets++] + = partition_bucket(bucket, attrs, stats, ndistvalues, distvalues); + } + + /* finalize the histogram build - compute the frequencies etc. */ + for (i = 0; i < histogram->nbuckets; i++) + { + HistogramBuild build_data + = ((HistogramBuild) histogram->buckets[i]->build_data); + + /* + * The frequency has to be computed from the whole sample, in case + * some of the rows were used for MCV. + * + * XXX Perhaps this should simply compute frequency with respect to + * the local freuquency, and then factor-in the MCV later. + * + * FIXME The 'ntuples' sounds a bit inappropriate for frequency. + */ + histogram->buckets[i]->ntuples + = (build_data->numrows * 1.0) / numrows_total; + } + + return histogram; +} + +/* build array of distinct values for a single attribute */ +static Datum * +build_ndistinct(int numrows, HeapTuple *rows, int2vector *attrs, + VacAttrStats **stats, int i, int *nvals) +{ + int j; + int nvalues, + ndistinct; + Datum *values, + *distvalues; + + SortSupportData ssup; + StdAnalyzeData *mystats = (StdAnalyzeData *) stats[i]->extra_data; + + /* initialize sort support, etc. */ + memset(&ssup, 0, sizeof(ssup)); + ssup.ssup_cxt = CurrentMemoryContext; + + /* We always use the default collation for statistics */ + ssup.ssup_collation = DEFAULT_COLLATION_OID; + ssup.ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); + + nvalues = 0; + values = (Datum *) palloc0(sizeof(Datum) * numrows); + + /* collect values from the sample rows, ignore NULLs */ + for (j = 0; j < numrows; j++) + { + Datum value; + bool isnull; + + /* + * remember the index of the sample row, to make the partitioning + * simpler + */ + value = heap_getattr(rows[j], attrs->values[i], + stats[i]->tupDesc, &isnull); + + if (isnull) + continue; + + values[nvalues++] = value; + } + + /* if no non-NULL values were found, free the memory and terminate */ + if (nvalues == 0) + { + pfree(values); + return NULL; + } + + /* sort the array of values using the SortSupport */ + qsort_arg((void *) values, nvalues, sizeof(Datum), + compare_scalars_simple, (void *) &ssup); + + /* count the distinct values first, and allocate just enough memory */ + ndistinct = 1; + for (j = 1; j < nvalues; j++) + if (compare_scalars_simple(&values[j], &values[j - 1], &ssup) != 0) + ndistinct += 1; + + distvalues = (Datum *) palloc0(sizeof(Datum) * ndistinct); + + /* now collect distinct values into the array */ + distvalues[0] = values[0]; + ndistinct = 1; + + for (j = 1; j < nvalues; j++) + { + if (compare_scalars_simple(&values[j], &values[j - 1], &ssup) != 0) + { + distvalues[ndistinct] = values[j]; + ndistinct += 1; + } + } + + pfree(values); + + *nvals = ndistinct; + return distvalues; +} + +/* fetch the histogram (as a bytea) from the pg_mv_statistic catalog */ +MVSerializedHistogram +load_mv_histogram(Oid mvoid) +{ + bool isnull = false; + Datum histogram; + +#ifdef USE_ASSERT_CHECKING + Form_pg_mv_statistic mvstat; +#endif + + /* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */ + HeapTuple htup = SearchSysCache1(MVSTATOID, ObjectIdGetDatum(mvoid)); + + if (!HeapTupleIsValid(htup)) + return NULL; + +#ifdef USE_ASSERT_CHECKING + mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup); + Assert(mvstat->hist_enabled && mvstat->hist_built); +#endif + + histogram = SysCacheGetAttr(MVSTATOID, htup, + Anum_pg_mv_statistic_stahist, &isnull); + + Assert(!isnull); + + ReleaseSysCache(htup); + + return deserialize_mv_histogram(DatumGetByteaP(histogram)); +} + +/* print some basic info about the histogram */ +Datum +pg_mv_stats_histogram_info(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_P(0); + char *result; + + MVSerializedHistogram hist = deserialize_mv_histogram(data); + + result = palloc0(128); + snprintf(result, 128, "nbuckets=%d", hist->nbuckets); + + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +/* + * Serialize the MV histogram into a bytea value. The basic algorithm is quite + * simple, and mostly mimincs the MCV serialization: + * + * (1) perform deduplication for each attribute (separately) + * + * (a) collect all (non-NULL) attribute values from all buckets + * (b) sort the data (using 'lt' from VacAttrStats) + * (c) remove duplicate values from the array + * + * (2) serialize the arrays into a bytea value + * + * (3) process all buckets + * + * (a) replace min/max values with indexes into the arrays + * + * Each attribute has to be processed separately, as we're mixing different + * datatypes, and we we need to use the right operators to compare/sort them. + * We're also mixing pass-by-value and pass-by-ref types, and so on. + * + * + * FIXME This probably leaks memory, or at least uses it inefficiently + * (many small palloc() calls instead of a large one). + * + * TODO Consider packing boolean flags (NULL) for each item into 'char' or + * a longer type (instead of using an array of bool items). + */ +bytea * +serialize_mv_histogram(MVHistogram histogram, int2vector *attrs, + VacAttrStats **stats) +{ + int i = 0, + j = 0; + Size total_length = 0; + + bytea *output = NULL; + char *data = NULL; + + DimensionInfo *info; + SortSupport ssup; + + int nbuckets = histogram->nbuckets; + int ndims = histogram->ndimensions; + + /* allocated for serialized bucket data */ + int bucketsize = BUCKET_SIZE(ndims); + char *bucket = palloc0(bucketsize); + + /* values per dimension (and number of non-NULL values) */ + Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims); + int *counts = (int *) palloc0(sizeof(int) * ndims); + + /* info about dimensions (for deserialize) */ + info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims); + + /* sort support data */ + ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims); + + /* collect and deduplicate values for each dimension separately */ + for (i = 0; i < ndims; i++) + { + int count; + StdAnalyzeData *tmp = (StdAnalyzeData *) stats[i]->extra_data; + + /* keep important info about the data type */ + info[i].typlen = stats[i]->attrtype->typlen; + info[i].typbyval = stats[i]->attrtype->typbyval; + + /* + * Allocate space for all min/max values, including NULLs (we won't + * use them, but we don't know how many are there), and then collect + * all non-NULL values. + */ + values[i] = (Datum *) palloc0(sizeof(Datum) * nbuckets * 2); + + for (j = 0; j < histogram->nbuckets; j++) + { + /* skip buckets where this dimension is NULL-only */ + if (!histogram->buckets[j]->nullsonly[i]) + { + values[i][counts[i]] = histogram->buckets[j]->min[i]; + counts[i] += 1; + + values[i][counts[i]] = histogram->buckets[j]->max[i]; + counts[i] += 1; + } + } + + /* there are just NULL values in this dimension */ + if (counts[i] == 0) + continue; + + /* sort and deduplicate */ + ssup[i].ssup_cxt = CurrentMemoryContext; + ssup[i].ssup_collation = DEFAULT_COLLATION_OID; + ssup[i].ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(tmp->ltopr, &ssup[i]); + + qsort_arg(values[i], counts[i], sizeof(Datum), + compare_scalars_simple, &ssup[i]); + + /* + * Walk through the array and eliminate duplicitate values, but keep + * the ordering (so that we can do bsearch later). We know there's at + * least 1 item, so we can skip the first element. + */ + count = 1; /* number of deduplicated items */ + for (j = 1; j < counts[i]; j++) + { + /* if it's different from the previous value, we need to keep it */ + if (compare_datums_simple(values[i][j - 1], values[i][j], &ssup[i]) != 0) + { + /* XXX: not needed if (count == j) */ + values[i][count] = values[i][j]; + count += 1; + } + } + + /* make sure we fit into uint16 */ + Assert(count <= UINT16_MAX); + + /* keep info about the deduplicated count */ + info[i].nvalues = count; + + /* compute size of the serialized data */ + if (info[i].typlen > 0) + /* byval or byref, but with fixed length (name, tid, ...) */ + info[i].nbytes = info[i].nvalues * info[i].typlen; + else if (info[i].typlen == -1) + /* varlena, so just use VARSIZE_ANY */ + for (j = 0; j < info[i].nvalues; j++) + info[i].nbytes += VARSIZE_ANY(values[i][j]); + else if (info[i].typlen == -2) + /* cstring, so simply strlen */ + for (j = 0; j < info[i].nvalues; j++) + info[i].nbytes += strlen(DatumGetPointer(values[i][j])); + else + elog(ERROR, "unknown data type typbyval=%d typlen=%d", + info[i].typbyval, info[i].typlen); + } + + /* + * Now we finally know how much space we'll need for the serialized + * histogram, as it contains these fields: + * + * - length (4B) for varlena - magic (4B) - type (4B) - ndimensions (4B) - + * nbuckets (4B) - info (ndim * sizeof(DimensionInfo) - arrays of values + * for each dimension - serialized buckets (nbuckets * bucketsize) + * + * So the 'header' size is 20B + ndim * sizeof(DimensionInfo) and then + * we'll place the data (and buckets). + */ + total_length = (sizeof(int32) + offsetof(MVHistogramData, buckets) + +ndims * sizeof(DimensionInfo) + + nbuckets * bucketsize); + + /* account for the deduplicated data */ + for (i = 0; i < ndims; i++) + total_length += info[i].nbytes; + + /* enforce arbitrary limit of 1MB */ + if (total_length > (1024 * 1024)) + elog(ERROR, "serialized histogram exceeds 1MB (%ld > %d)", + total_length, (1024 * 1024)); + + /* allocate space for the serialized histogram list, set header */ + output = (bytea *) palloc0(total_length); + SET_VARSIZE(output, total_length); + + /* we'll use 'data' to keep track of the place to write data */ + data = VARDATA(output); + + memcpy(data, histogram, offsetof(MVHistogramData, buckets)); + data += offsetof(MVHistogramData, buckets); + + memcpy(data, info, sizeof(DimensionInfo) * ndims); + data += sizeof(DimensionInfo) * ndims; + + /* serialize the deduplicated values for all attributes */ + for (i = 0; i < ndims; i++) + { +#ifdef USE_ASSERT_CHECKING + char *tmp = data; +#endif + for (j = 0; j < info[i].nvalues; j++) + { + Datum v = values[i][j]; + + if (info[i].typbyval) /* passed by value */ + { + memcpy(data, &v, info[i].typlen); + data += info[i].typlen; + } + else if (info[i].typlen > 0) /* pased by reference */ + { + memcpy(data, DatumGetPointer(v), info[i].typlen); + data += info[i].typlen; + } + else if (info[i].typlen == -1) /* varlena */ + { + memcpy(data, DatumGetPointer(v), VARSIZE_ANY(v)); + data += VARSIZE_ANY(values[i][j]); + } + else if (info[i].typlen == -2) /* cstring */ + { + memcpy(data, DatumGetPointer(v), strlen(DatumGetPointer(v)) + 1); + data += strlen(DatumGetPointer(v)) + 1; + } + } + + /* make sure we got exactly the amount of data we expected */ + Assert((data - tmp) == info[i].nbytes); + } + + /* finally serialize the items, with uint16 indexes instead of the values */ + for (i = 0; i < nbuckets; i++) + { + /* don't write beyond the allocated space */ + Assert(data <= (char *) output + total_length - bucketsize); + + /* reset the values for each item */ + memset(bucket, 0, bucketsize); + + BUCKET_NTUPLES(bucket) = histogram->buckets[i]->ntuples; + + for (j = 0; j < ndims; j++) + { + /* do the lookup only for non-NULL values */ + if (!histogram->buckets[i]->nullsonly[j]) + { + uint16 idx; + Datum *v = NULL; + + /* min boundary */ + v = (Datum *) bsearch_arg(&histogram->buckets[i]->min[j], + values[j], info[j].nvalues, sizeof(Datum), + compare_scalars_simple, &ssup[j]); + + Assert(v != NULL); /* serialization or deduplication + * error */ + + /* compute index within the array */ + idx = (v - values[j]); + + Assert((idx >= 0) && (idx < info[j].nvalues)); + + BUCKET_MIN_INDEXES(bucket, ndims)[j] = idx; + + /* max boundary */ + v = (Datum *) bsearch_arg(&histogram->buckets[i]->max[j], + values[j], info[j].nvalues, sizeof(Datum), + compare_scalars_simple, &ssup[j]); + + Assert(v != NULL); /* serialization or deduplication + * error */ + + /* compute index within the array */ + idx = (v - values[j]); + + Assert((idx >= 0) && (idx < info[j].nvalues)); + + BUCKET_MAX_INDEXES(bucket, ndims)[j] = idx; + } + } + + /* copy flags (nulls, min/max inclusive) */ + memcpy(BUCKET_NULLS_ONLY(bucket, ndims), + histogram->buckets[i]->nullsonly, sizeof(bool) * ndims); + + memcpy(BUCKET_MIN_INCL(bucket, ndims), + histogram->buckets[i]->min_inclusive, sizeof(bool) * ndims); + + memcpy(BUCKET_MAX_INCL(bucket, ndims), + histogram->buckets[i]->max_inclusive, sizeof(bool) * ndims); + + /* copy the item into the array */ + memcpy(data, bucket, bucketsize); + + data += bucketsize; + } + + /* at this point we expect to match the total_length exactly */ + Assert((data - (char *) output) == total_length); + + /* free the values/counts arrays here */ + pfree(counts); + pfree(info); + pfree(ssup); + + for (i = 0; i < ndims; i++) + pfree(values[i]); + + pfree(values); + + return output; +} + +/* + * Returns histogram in a partially-serialized form (keeps the boundary values + * deduplicated, so that it's possible to optimize the estimation part by + * caching function call results between buckets etc.). + */ +MVSerializedHistogram +deserialize_mv_histogram(bytea *data) +{ + int i = 0, + j = 0; + + Size expected_size; + char *tmp = NULL; + + MVSerializedHistogram histogram; + DimensionInfo *info; + + int nbuckets; + int ndims; + int bucketsize; + + /* temporary deserialization buffer */ + int bufflen; + char *buff; + char *ptr; + + if (data == NULL) + return NULL; + + if (VARSIZE_ANY_EXHDR(data) < offsetof(MVSerializedHistogramData, buckets)) + elog(ERROR, "invalid histogram size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), offsetof(MVSerializedHistogramData, buckets)); + + /* read the histogram header */ + histogram + = (MVSerializedHistogram) palloc(sizeof(MVSerializedHistogramData)); + + /* initialize pointer to the data part (skip the varlena header) */ + tmp = VARDATA_ANY(data); + + /* get the header and perform basic sanity checks */ + memcpy(histogram, tmp, offsetof(MVSerializedHistogramData, buckets)); + tmp += offsetof(MVSerializedHistogramData, buckets); + + if (histogram->magic != MVSTAT_HIST_MAGIC) + elog(ERROR, "invalid histogram magic %d (expected %dd)", + histogram->magic, MVSTAT_HIST_MAGIC); + + if (histogram->type != MVSTAT_HIST_TYPE_BASIC) + elog(ERROR, "invalid histogram type %d (expected %dd)", + histogram->type, MVSTAT_HIST_TYPE_BASIC); + + nbuckets = histogram->nbuckets; + ndims = histogram->ndimensions; + bucketsize = BUCKET_SIZE(ndims); + + Assert((nbuckets > 0) && (nbuckets <= MVSTAT_HIST_MAX_BUCKETS)); + Assert((ndims >= 2) && (ndims <= MVSTATS_MAX_DIMENSIONS)); + + /* + * What size do we expect with those parameters (it's incomplete, as we + * yet have to count the array sizes (from DimensionInfo records). + */ + expected_size = offsetof(MVSerializedHistogramData, buckets) + + ndims * sizeof(DimensionInfo) + + (nbuckets * bucketsize); + + /* check that we have at least the DimensionInfo records */ + if (VARSIZE_ANY_EXHDR(data) < expected_size) + elog(ERROR, "invalid histogram size %ld (expected %ld)", + VARSIZE_ANY_EXHDR(data), expected_size); + + info = (DimensionInfo *) (tmp); + tmp += ndims * sizeof(DimensionInfo); + + /* account for the value arrays */ + for (i = 0; i < ndims; i++) + expected_size += info[i].nbytes; + + if (VARSIZE_ANY_EXHDR(data) != expected_size) + elog(ERROR, "invalid histogram size %ld (expected %ld)", + VARSIZE_ANY_EXHDR(data), expected_size); + + /* looks OK - not corrupted or something */ + + /* a single buffer for all the values and counts */ + bufflen = (sizeof(int) + sizeof(Datum *)) * ndims; + + for (i = 0; i < ndims; i++) + /* don't allocate space for byval types, matching Datum */ + if (!(info[i].typbyval && (info[i].typlen == sizeof(Datum)))) + bufflen += (sizeof(Datum) * info[i].nvalues); + + /* also, include space for the result, tracking the buckets */ + bufflen += nbuckets * ( + sizeof(MVSerializedBucket) + /* bucket pointer */ + sizeof(MVSerializedBucketData)); /* bucket data */ + + buff = palloc0(bufflen); + ptr = buff; + + histogram->nvalues = (int *) ptr; + ptr += (sizeof(int) * ndims); + + histogram->values = (Datum **) ptr; + ptr += (sizeof(Datum *) * ndims); + + /* + * FIXME This uses pointers to the original data array (the types not + * passed by value), so when someone frees the memory, e.g. by doing + * something like this: + * + * bytea * data = ... fetch the data from catalog ... MCVList mcvlist = + * deserialize_mcv_list(data); pfree(data); + * + * then 'mcvlist' references the freed memory. This needs to copy the + * pieces. + * + * TODO same as in MCV deserialization / consider moving to common.c + */ + for (i = 0; i < ndims; i++) + { + histogram->nvalues[i] = info[i].nvalues; + + if (info[i].typbyval) + { + /* passed by value / Datum - simply reuse the array */ + if (info[i].typlen == sizeof(Datum)) + { + histogram->values[i] = (Datum *) tmp; + tmp += info[i].nbytes; + } + else + { + histogram->values[i] = (Datum *) ptr; + ptr += (sizeof(Datum) * info[i].nvalues); + + for (j = 0; j < info[i].nvalues; j++) + { + /* just point into the array */ + memcpy(&histogram->values[i][j], tmp, info[i].typlen); + tmp += info[i].typlen; + } + } + } + else + { + /* all the other types need a chunk of the buffer */ + histogram->values[i] = (Datum *) ptr; + ptr += (sizeof(Datum) * info[i].nvalues); + + if (info[i].typlen > 0) + { + /* pased by reference, but fixed length (name, tid, ...) */ + for (j = 0; j < info[i].nvalues; j++) + { + /* just point into the array */ + histogram->values[i][j] = PointerGetDatum(tmp); + tmp += info[i].typlen; + } + } + else if (info[i].typlen == -1) + { + /* varlena */ + for (j = 0; j < info[i].nvalues; j++) + { + /* just point into the array */ + histogram->values[i][j] = PointerGetDatum(tmp); + tmp += VARSIZE_ANY(tmp); + } + } + else if (info[i].typlen == -2) + { + /* cstring */ + for (j = 0; j < info[i].nvalues; j++) + { + /* just point into the array */ + histogram->values[i][j] = PointerGetDatum(tmp); + tmp += (strlen(tmp) + 1); /* don't forget the \0 */ + } + } + } + } + + histogram->buckets = (MVSerializedBucket *) ptr; + ptr += (sizeof(MVSerializedBucket) * nbuckets); + + for (i = 0; i < nbuckets; i++) + { + MVSerializedBucket bucket = (MVSerializedBucket) ptr; + + ptr += sizeof(MVSerializedBucketData); + + bucket->ntuples = BUCKET_NTUPLES(tmp); + bucket->nullsonly = BUCKET_NULLS_ONLY(tmp, ndims); + bucket->min_inclusive = BUCKET_MIN_INCL(tmp, ndims); + bucket->max_inclusive = BUCKET_MAX_INCL(tmp, ndims); + + bucket->min = BUCKET_MIN_INDEXES(tmp, ndims); + bucket->max = BUCKET_MAX_INDEXES(tmp, ndims); + + histogram->buckets[i] = bucket; + + Assert(tmp <= (char *) data + VARSIZE_ANY(data)); + + tmp += bucketsize; + } + + /* at this point we expect to match the total_length exactly */ + Assert((tmp - VARDATA(data)) == expected_size); + + /* we should exhaust the output buffer exactly */ + Assert((ptr - buff) == bufflen); + + return histogram; +} + +/* + * Build the initial bucket, which will be then split into smaller ones. + */ +static MVBucket +create_initial_mv_bucket(int numrows, HeapTuple *rows, int2vector *attrs, + VacAttrStats **stats) +{ + int i; + int numattrs = attrs->dim1; + HistogramBuild data = NULL; + + /* TODO allocate bucket as a single piece, including all the fields. */ + MVBucket bucket = (MVBucket) palloc0(sizeof(MVBucketData)); + + Assert(numrows > 0); + Assert(rows != NULL); + Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS)); + + /* allocate the per-dimension arrays */ + + /* flags for null-only dimensions */ + bucket->nullsonly = (bool *) palloc0(numattrs * sizeof(bool)); + + /* inclusiveness boundaries - lower/upper bounds */ + bucket->min_inclusive = (bool *) palloc0(numattrs * sizeof(bool)); + bucket->max_inclusive = (bool *) palloc0(numattrs * sizeof(bool)); + + /* lower/upper boundaries */ + bucket->min = (Datum *) palloc0(numattrs * sizeof(Datum)); + bucket->max = (Datum *) palloc0(numattrs * sizeof(Datum)); + + /* build-data */ + data = (HistogramBuild) palloc0(sizeof(HistogramBuildData)); + + /* number of distinct values (per dimension) */ + data->ndistincts = (uint32 *) palloc0(numattrs * sizeof(uint32)); + + /* all the sample rows fall into the initial bucket */ + data->numrows = numrows; + data->rows = rows; + + bucket->build_data = data; + + /* + * Update the number of ndistinct combinations in the bucket (which we use + * when selecting bucket to partition), and then number of distinct values + * for each partition (which we use when choosing which dimension to + * split). + */ + update_bucket_ndistinct(bucket, attrs, stats); + + /* Update ndistinct (and also set min/max) for all dimensions. */ + for (i = 0; i < numattrs; i++) + update_dimension_ndistinct(bucket, i, attrs, stats, true); + + return bucket; +} + +/* + * Choose the bucket to partition next. + * + * The current criteria is rather simple, chosen so that the algorithm produces + * buckets with about equal frequency and regular size. We select the bucket + * with the highest number of distinct values, and then split it by the longest + * dimension. + * + * The distinct values are uniformly mapped to [0,1] interval, and this is used + * to compute length of the value range. + * + * NOTE: This is not the same array used for deduplication, as this contains + * values for all the tuples from the sample, not just the boundary values. + * + * Returns either pointer to the bucket selected to be partitioned, or NULL if + * there are no buckets that may be split (e.g. if all buckets are too small + * or contain too few distinct values). + * + * + * Tricky example + * -------------- + * + * Consider this table: + * + * CREATE TABLE t AS SELECT i AS a, i AS b + * FROM generate_series(1,1000000) s(i); + * + * CREATE STATISTICS s1 ON t (a,b) WITH (histogram); + * + * ANALYZE t; + * + * It's a very specific (and perhaps artificial) example, because every bucket + * always has exactly the same number of distinct values in all dimensions, + * which makes the partitioning tricky. + * + * Then: + * + * SELECT * FROM t WHERE (a < 100) AND (b < 100); + * + * is estimated to return ~120 rows, while in reality it returns only 99. + * + * QUERY PLAN + * ------------------------------------------------------------- + * Seq Scan on t (cost=0.00..19425.00 rows=117 width=8) + * (actual time=0.129..82.776 rows=99 loops=1) + * Filter: ((a < 100) AND (b < 100)) + * Rows Removed by Filter: 999901 + * Planning time: 1.286 ms + * Execution time: 82.984 ms + * (5 rows) + * + * So this estimate is reasonably close. Let's change the query to OR clause: + * + * SELECT * FROM t WHERE (a < 100) OR (b < 100); + * + * QUERY PLAN + * ------------------------------------------------------------- + * Seq Scan on t (cost=0.00..19425.00 rows=8100 width=8) + * (actual time=0.145..99.910 rows=99 loops=1) + * Filter: ((a < 100) OR (b < 100)) + * Rows Removed by Filter: 999901 + * Planning time: 1.578 ms + * Execution time: 100.132 ms + * (5 rows) + * + * That's clearly a much worse estimate. This happens because the histogram + * contains buckets like this: + * + * bucket 592 [3 30310] [30134 30593] => [0.000233] + * + * i.e. the length of "a" dimension is (30310-3)=30307, while the length of "b" + * is (30593-30134)=459. So the "b" dimension is much narrower than "a". + * Of course, there are also buckets where "b" is the wider dimension. + * + * This is partially mitigated by selecting the "longest" dimension but that + * only happens after we already selected the bucket. So if we never select the + * bucket, this optimization does not apply. + * + * The other reason why this particular example behaves so poorly is due to the + * way we actually split the selected bucket. We do attempt to divide the bucket + * into two parts containing about the same number of tuples, but that does not + * too well when most of the tuples is squashed on one side of the bucket. + * + * For example for columns with data on the diagonal (i.e. when a=b), we end up + * with a narrow bucket on the diagonal and a huge bucket overing the remaining + * part (with much lower density). + * + * So perhaps we need two partitioning strategies - one aiming to split buckets + * with high frequency (number of sampled rows), the other aiming to split + * "large" buckets. And alternating between them, somehow. + * + * TODO Consider using similar lower boundary for row count as for simple + * histograms, i.e. 300 tuples per bucket. + */ +static MVBucket +select_bucket_to_partition(int nbuckets, MVBucket *buckets) +{ + int i; + int numrows = 0; + MVBucket bucket = NULL; + + for (i = 0; i < nbuckets; i++) + { + HistogramBuild data = (HistogramBuild) buckets[i]->build_data; + + /* if the number of rows is higher, use this bucket */ + if ((data->ndistinct > 2) && + (data->numrows > numrows) && + (data->numrows >= MIN_BUCKET_ROWS)) + { + bucket = buckets[i]; + numrows = data->numrows; + } + } + + /* may be NULL if there are not buckets with (ndistinct>1) */ + return bucket; +} + +/* + * A simple bucket partitioning implementation - we choose the longest bucket + * dimension, measured using the array of distinct values built at the very + * beginning of the build. + * + * We map all the distinct values to a [0,1] interval, uniformly distributed, + * and then use this to measure length. It's essentially a number of distinct + * values within the range, normalized to [0,1]. + * + * Then we choose a 'middle' value splitting the bucket into two parts with + * roughly the same frequency. + * + * This splits the bucket by tweaking the existing one, and returning the new + * bucket (essentially shrinking the existing one in-place and returning the + * other "half" as a new bucket). The caller is responsible for adding the new + * bucket into the list of buckets. + * + * There are multiple histogram options, centered around the partitioning + * criteria, specifying both how to choose a bucket and the dimension most in + * need of a split. For a nice summary and general overview, see "rK-Hist : an + * R-Tree based histogram for multi-dimensional selectivity estimation" thesis + * by J. A. Lopez, Concordia University, p.34-37 (and possibly p. 32-34 for + * explanation of the terms). + * + * It requires care to prevent splitting only one dimension and not splitting + * another one at all (which might happen easily in case of strongly dependent + * columns - e.g. y=x). The current algorithm minimizes this, but may still + * happen for perfectly dependent examples (when all the dimensions have equal + * length, the first one will be selected). + * + * TODO Should probably consider statistics target for the columns (e.g. + * to split dimensions with higher statistics target more frequently). + */ +static MVBucket +partition_bucket(MVBucket bucket, int2vector *attrs, + VacAttrStats **stats, + int *ndistvalues, Datum **distvalues) +{ + int i; + int dimension; + int numattrs = attrs->dim1; + + Datum split_value; + MVBucket new_bucket; + HistogramBuild new_data; + + /* needed for sort, when looking for the split value */ + bool isNull; + int nvalues = 0; + HistogramBuild data = (HistogramBuild) bucket->build_data; + StdAnalyzeData *mystats = NULL; + ScalarItem *values = (ScalarItem *) palloc0(data->numrows * sizeof(ScalarItem)); + SortSupportData ssup; + + int nrows = 1; /* number of rows below current value */ + double delta; + + /* needed when splitting the values */ + HeapTuple *oldrows = data->rows; + int oldnrows = data->numrows; + + /* + * We can't split buckets with a single distinct value (this also + * disqualifies NULL-only dimensions). Also, there has to be multiple + * sample rows (otherwise, how could there be more distinct values). + */ + Assert(data->ndistinct > 1); + Assert(data->numrows > 1); + Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS)); + + /* Look for the next dimension to split. */ + delta = 0.0; + dimension = -1; + + for (i = 0; i < numattrs; i++) + { + Datum *a, + *b; + + mystats = (StdAnalyzeData *) stats[i]->extra_data; + + /* initialize sort support, etc. */ + memset(&ssup, 0, sizeof(ssup)); + ssup.ssup_cxt = CurrentMemoryContext; + + /* We always use the default collation for statistics */ + ssup.ssup_collation = DEFAULT_COLLATION_OID; + ssup.ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); + + /* can't split NULL-only dimension */ + if (bucket->nullsonly[i]) + continue; + + /* can't split dimension with a single ndistinct value */ + if (data->ndistincts[i] <= 1) + continue; + + /* search for min boundary in the distinct list */ + a = (Datum *) bsearch_arg(&bucket->min[i], + distvalues[i], ndistvalues[i], + sizeof(Datum), compare_scalars_simple, &ssup); + + b = (Datum *) bsearch_arg(&bucket->max[i], + distvalues[i], ndistvalues[i], + sizeof(Datum), compare_scalars_simple, &ssup); + + /* if this dimension is 'larger' then partition by it */ + if (((b - a) * 1.0 / ndistvalues[i]) > delta) + { + delta = ((b - a) * 1.0 / ndistvalues[i]); + dimension = i; + } + } + + /* + * If we haven't found a dimension here, we've done something wrong in + * select_bucket_to_partition. + */ + Assert(dimension != -1); + + /* + * Walk through the selected dimension, collect and sort the values and + * then choose the value to use as the new boundary. + */ + mystats = (StdAnalyzeData *) stats[dimension]->extra_data; + + /* initialize sort support, etc. */ + memset(&ssup, 0, sizeof(ssup)); + ssup.ssup_cxt = CurrentMemoryContext; + + /* We always use the default collation for statistics */ + ssup.ssup_collation = DEFAULT_COLLATION_OID; + ssup.ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); + + for (i = 0; i < data->numrows; i++) + { + /* + * remember the index of the sample row, to make the partitioning + * simpler + */ + values[nvalues].value = heap_getattr(data->rows[i], attrs->values[dimension], + stats[dimension]->tupDesc, &isNull); + values[nvalues].tupno = i; + + /* no NULL values allowed here (we never split null-only dimension) */ + Assert(!isNull); + + nvalues++; + } + + /* sort the array of values */ + qsort_arg((void *) values, nvalues, sizeof(ScalarItem), + compare_scalars_partition, (void *) &ssup); + + /* + * We know there are bucket->ndistincts[dimension] distinct values in this + * dimension, and we want to split this into half, so walk through the + * array and stop once we see (ndistinct/2) values. + * + * We always choose the "next" value, i.e. (n/2+1)-th distinct value, and + * use it as an exclusive upper boundary (and inclusive lower boundary). + * + * TODO Maybe we should use "average" of the two middle distinct values + * (at least for even distinct counts), but that would require being able + * to do an average (which does not work for non-numeric types). + * + * TODO Another option is to look for a split that'd give about 50% tuples + * (not distinct values) in each partition. That might work better when + * there are a few very frequent values, and many rare ones. + */ + delta = fabs(data->numrows); + split_value = values[0].value; + + for (i = 1; i < data->numrows; i++) + { + if (values[i].value != values[i - 1].value) + { + /* are we closer to splitting the bucket in half? */ + if (fabs(i - data->numrows / 2.0) < delta) + { + /* let's assume we'll use this value for the split */ + split_value = values[i].value; + delta = fabs(i - data->numrows / 2.0); + nrows = i; + } + } + } + + Assert(nrows > 0); + Assert(nrows < data->numrows); + + /* + * create the new bucket as a (incomplete) copy of the one being + * partitioned. + */ + new_bucket = copy_mv_bucket(bucket, numattrs); + new_data = (HistogramBuild) new_bucket->build_data; + + /* + * Do the actual split of the chosen dimension, using the split value as + * the upper bound for the existing bucket, and lower bound for the new + * one. + */ + bucket->max[dimension] = split_value; + new_bucket->min[dimension] = split_value; + + /* + * We also treat only one side of the new boundary as inclusive, in the + * bucket where it happens to be the upper boundary. We never set the + * min_inclusive[] to false anywhere, but we set it to true anyway. + */ + bucket->max_inclusive[dimension] = false; + new_bucket->min_inclusive[dimension] = true; + + /* + * Redistribute the sample tuples using the 'ScalarItem->tupno' index. We + * know 'nrows' rows should remain in the original bucket and the rest + * goes to the new one. + */ + + data->rows = (HeapTuple *) palloc0(nrows * sizeof(HeapTuple)); + new_data->rows = (HeapTuple *) palloc0((oldnrows - nrows) * sizeof(HeapTuple)); + + data->numrows = nrows; + new_data->numrows = (oldnrows - nrows); + + /* + * The first nrows should go to the first bucket, the rest should go to + * the new one. Use the tupno field to get the actual HeapTuple row from + * the original array of sample rows. + */ + for (i = 0; i < nrows; i++) + memcpy(&data->rows[i], &oldrows[values[i].tupno], sizeof(HeapTuple)); + + for (i = nrows; i < oldnrows; i++) + memcpy(&new_data->rows[i - nrows], &oldrows[values[i].tupno], sizeof(HeapTuple)); + + /* update ndistinct values for the buckets (total and per dimension) */ + update_bucket_ndistinct(bucket, attrs, stats); + update_bucket_ndistinct(new_bucket, attrs, stats); + + /* + * TODO We don't need to do this for the dimension we used for split, + * because we know how many distinct values went to each partition. + */ + for (i = 0; i < numattrs; i++) + { + update_dimension_ndistinct(bucket, i, attrs, stats, false); + update_dimension_ndistinct(new_bucket, i, attrs, stats, false); + } + + pfree(oldrows); + pfree(values); + + return new_bucket; +} + +/* + * Copy a histogram bucket. The copy does not include the build-time data, i.e. + * sampled rows etc. + */ +static MVBucket +copy_mv_bucket(MVBucket bucket, uint32 ndimensions) +{ + /* TODO allocate as a single piece (including all the fields) */ + MVBucket new_bucket = (MVBucket) palloc0(sizeof(MVBucketData)); + HistogramBuild data = (HistogramBuild) palloc0(sizeof(HistogramBuildData)); + + /* + * Copy only the attributes that will stay the same after the split, and + * we'll recompute the rest after the split. + */ + + /* allocate the per-dimension arrays */ + new_bucket->nullsonly = (bool *) palloc0(ndimensions * sizeof(bool)); + + /* inclusiveness boundaries - lower/upper bounds */ + new_bucket->min_inclusive = (bool *) palloc0(ndimensions * sizeof(bool)); + new_bucket->max_inclusive = (bool *) palloc0(ndimensions * sizeof(bool)); + + /* lower/upper boundaries */ + new_bucket->min = (Datum *) palloc0(ndimensions * sizeof(Datum)); + new_bucket->max = (Datum *) palloc0(ndimensions * sizeof(Datum)); + + /* copy data */ + memcpy(new_bucket->nullsonly, bucket->nullsonly, ndimensions * sizeof(bool)); + + memcpy(new_bucket->min_inclusive, bucket->min_inclusive, ndimensions * sizeof(bool)); + memcpy(new_bucket->min, bucket->min, ndimensions * sizeof(Datum)); + + memcpy(new_bucket->max_inclusive, bucket->max_inclusive, ndimensions * sizeof(bool)); + memcpy(new_bucket->max, bucket->max, ndimensions * sizeof(Datum)); + + /* allocate and copy the interesting part of the build data */ + data->ndistincts = (uint32 *) palloc0(ndimensions * sizeof(uint32)); + + new_bucket->build_data = data; + + return new_bucket; +} + +/* + * Counts the number of distinct values in the bucket. This just copies the + * Datum values into a simple array, and sorts them using memcmp-based + * comparator. That means it only works for pass-by-value data types (assuming + * they don't use collations etc.) + */ +static void +update_bucket_ndistinct(MVBucket bucket, int2vector *attrs, VacAttrStats **stats) +{ + int i, + j; + int numattrs = attrs->dim1; + + HistogramBuild data = (HistogramBuild) bucket->build_data; + int numrows = data->numrows; + + MultiSortSupport mss = multi_sort_init(numattrs); + + /* + * We could collect this while walking through all the attributes above + * (this way we have to call heap_getattr twice). + */ + SortItem *items = (SortItem *) palloc0(numrows * sizeof(SortItem)); + Datum *values = (Datum *) palloc0(numrows * sizeof(Datum) * numattrs); + bool *isnull = (bool *) palloc0(numrows * sizeof(bool) * numattrs); + + for (i = 0; i < numrows; i++) + { + items[i].values = &values[i * numattrs]; + items[i].isnull = &isnull[i * numattrs]; + } + + /* prepare the sort function for the first dimension */ + for (i = 0; i < numattrs; i++) + multi_sort_add_dimension(mss, i, i, stats); + + /* collect the values */ + for (i = 0; i < numrows; i++) + for (j = 0; j < numattrs; j++) + items[i].values[j] + = heap_getattr(data->rows[i], attrs->values[j], + stats[j]->tupDesc, &items[i].isnull[j]); + + qsort_arg((void *) items, numrows, sizeof(SortItem), + multi_sort_compare, mss); + + data->ndistinct = 1; + + for (i = 1; i < numrows; i++) + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + data->ndistinct += 1; + + pfree(items); + pfree(values); + pfree(isnull); +} + +/* + * Count distinct values per bucket dimension. + */ +static void +update_dimension_ndistinct(MVBucket bucket, int dimension, int2vector *attrs, + VacAttrStats **stats, bool update_boundaries) +{ + int j; + int nvalues = 0; + bool isNull; + HistogramBuild data = (HistogramBuild) bucket->build_data; + Datum *values = (Datum *) palloc0(data->numrows * sizeof(Datum)); + SortSupportData ssup; + + StdAnalyzeData *mystats = (StdAnalyzeData *) stats[dimension]->extra_data; + + /* we may already know this is a NULL-only dimension */ + if (bucket->nullsonly[dimension]) + data->ndistincts[dimension] = 1; + + memset(&ssup, 0, sizeof(ssup)); + ssup.ssup_cxt = CurrentMemoryContext; + + /* We always use the default collation for statistics */ + ssup.ssup_collation = DEFAULT_COLLATION_OID; + ssup.ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup); + + for (j = 0; j < data->numrows; j++) + { + values[nvalues] = heap_getattr(data->rows[j], attrs->values[dimension], + stats[dimension]->tupDesc, &isNull); + + /* ignore NULL values */ + if (!isNull) + nvalues++; + } + + /* there's always at least 1 distinct value (may be NULL) */ + data->ndistincts[dimension] = 1; + + /* + * if there are only NULL values in the column, mark it so and continue + * with the next one + */ + if (nvalues == 0) + { + pfree(values); + bucket->nullsonly[dimension] = true; + return; + } + + /* sort the array (pass-by-value datum */ + qsort_arg((void *) values, nvalues, sizeof(Datum), + compare_scalars_simple, (void *) &ssup); + + /* + * Update min/max boundaries to the smallest bounding box. Generally, this + * needs to be done only when constructing the initial bucket. + */ + if (update_boundaries) + { + /* store the min/max values */ + bucket->min[dimension] = values[0]; + bucket->min_inclusive[dimension] = true; + + bucket->max[dimension] = values[nvalues - 1]; + bucket->max_inclusive[dimension] = true; + } + + /* + * Walk through the array and count distinct values by comparing + * succeeding values. + * + * FIXME This only works for pass-by-value types (i.e. not VARCHARs etc.). + * Although thanks to the deduplication it might work even for those types + * (equal values will get the same item in the deduplicated array). + */ + for (j = 1; j < nvalues; j++) + { + if (values[j] != values[j - 1]) + data->ndistincts[dimension] += 1; + } + + pfree(values); +} + +/* + * A properly built histogram must not contain buckets mixing NULL and non-NULL + * values in a single dimension. Each dimension may either be marked as 'nulls + * only', and thus containing only NULL values, or it must not contain any NULL + * values. + * + * Therefore, if the sample contains NULL values in any of the columns, it's + * necessary to build those NULL-buckets. This is done in an iterative way + * using this algorithm, operating on a single bucket: + * + * (1) Check that all dimensions are well-formed (not mixing NULL and + * non-NULL values). + * + * (2) If all dimensions are well-formed, terminate. + * + * (3) If the dimension contains only NULL values, but is not marked as + * NULL-only, mark it as NULL-only and run the algorithm again (on + * this bucket). + * + * (4) If the dimension mixes NULL and non-NULL values, split the bucket + * into two parts - one with NULL values, one with non-NULL values + * (replacing the current one). Then run the algorithm on both buckets. + * + * This is executed in a recursive manner, but the number of executions should + * be quite low - limited by the number of NULL-buckets. Also, in each branch + * the number of nested calls is limited by the number of dimensions + * (attributes) of the histogram. + * + * At the end, there should be buckets with no mixed dimensions. The number of + * buckets produced by this algorithm is rather limited - with N dimensions, + * there may be only 2^N such buckets (each dimension may be either NULL or + * non-NULL). So with 8 dimensions (current value of MVSTATS_MAX_DIMENSIONS) + * there may be only 256 such buckets. + * + * After this, a 'regular' bucket-split algorithm shall run, further optimizing + * the histogram. + */ +static void +create_null_buckets(MVHistogram histogram, int bucket_idx, + int2vector *attrs, VacAttrStats **stats) +{ + int i, + j; + int null_dim = -1; + int null_count = 0; + bool null_found = false; + MVBucket bucket, + null_bucket; + int null_idx, + curr_idx; + HistogramBuild data, + null_data; + + /* remember original values from the bucket */ + int numrows; + HeapTuple *oldrows = NULL; + + Assert(bucket_idx < histogram->nbuckets); + Assert(histogram->ndimensions == attrs->dim1); + + bucket = histogram->buckets[bucket_idx]; + data = (HistogramBuild) bucket->build_data; + + numrows = data->numrows; + oldrows = data->rows; + + /* + * Walk through all rows / dimensions, and stop once we find NULL in a + * dimension not yet marked as NULL-only. + */ + for (i = 0; i < data->numrows; i++) + { + /* + * FIXME We don't need to start from the first attribute here - we can + * start from the last known dimension. + */ + for (j = 0; j < histogram->ndimensions; j++) + { + /* Is this a NULL-only dimension? If yes, skip. */ + if (bucket->nullsonly[j]) + continue; + + /* found a NULL in that dimension? */ + if (heap_attisnull(data->rows[i], attrs->values[j])) + { + null_found = true; + null_dim = j; + break; + } + } + + /* terminate if we found attribute with NULL values */ + if (null_found) + break; + } + + /* no regular dimension contains NULL values => we're done */ + if (!null_found) + return; + + /* walk through the rows again, count NULL values in 'null_dim' */ + for (i = 0; i < data->numrows; i++) + { + if (heap_attisnull(data->rows[i], attrs->values[null_dim])) + null_count += 1; + } + + Assert(null_count <= data->numrows); + + /* + * If (null_count == numrows) the dimension already is NULL-only, but is + * not yet marked like that. It's enough to mark it and repeat the process + * recursively (until we run out of dimensions). + */ + if (null_count == data->numrows) + { + bucket->nullsonly[null_dim] = true; + create_null_buckets(histogram, bucket_idx, attrs, stats); + return; + } + + /* + * We have to split the bucket into two - one with NULL values in the + * dimension, one with non-NULL values. We don't need to sort the data or + * anything, but otherwise it's similar to what partition_bucket() does. + */ + + /* create bucket with NULL-only dimension 'dim' */ + null_bucket = copy_mv_bucket(bucket, histogram->ndimensions); + null_data = (HistogramBuild) null_bucket->build_data; + + /* remember the current array info */ + oldrows = data->rows; + numrows = data->numrows; + + /* we'll keep non-NULL values in the current bucket */ + data->numrows = (numrows - null_count); + data->rows + = (HeapTuple *) palloc0(data->numrows * sizeof(HeapTuple)); + + /* and the NULL values will go to the new one */ + null_data->numrows = null_count; + null_data->rows + = (HeapTuple *) palloc0(null_data->numrows * sizeof(HeapTuple)); + + /* mark the dimension as NULL-only (in the new bucket) */ + null_bucket->nullsonly[null_dim] = true; + + /* walk through the sample rows and distribute them accordingly */ + null_idx = 0; + curr_idx = 0; + for (i = 0; i < numrows; i++) + { + if (heap_attisnull(oldrows[i], attrs->values[null_dim])) + /* NULL => copy to the new bucket */ + memcpy(&null_data->rows[null_idx++], &oldrows[i], + sizeof(HeapTuple)); + else + memcpy(&data->rows[curr_idx++], &oldrows[i], + sizeof(HeapTuple)); + } + + /* update ndistinct values for the buckets (total and per dimension) */ + update_bucket_ndistinct(bucket, attrs, stats); + update_bucket_ndistinct(null_bucket, attrs, stats); + + /* + * TODO We don't need to do this for the dimension we used for split, + * because we know how many distinct values went to each bucket (NULL is + * not a value, so NULL buckets get 0, and the other bucket got all the + * distinct values). + */ + for (i = 0; i < histogram->ndimensions; i++) + { + update_dimension_ndistinct(bucket, i, attrs, stats, false); + update_dimension_ndistinct(null_bucket, i, attrs, stats, false); + } + + pfree(oldrows); + + /* add the NULL bucket to the histogram */ + histogram->buckets[histogram->nbuckets++] = null_bucket; + + /* + * And now run the function recursively on both buckets (the new one + * first, because the call may change number of buckets, and it's used as + * an index). + */ + create_null_buckets(histogram, (histogram->nbuckets - 1), attrs, stats); + create_null_buckets(histogram, bucket_idx, attrs, stats); +} + +/* + * SRF with details about buckets of a histogram: + * + * - bucket ID (0...nbuckets) + * - min values (string array) + * - max values (string array) + * - nulls only (boolean array) + * - min inclusive flags (boolean array) + * - max inclusive flags (boolean array) + * - frequency (double precision) + * + * The input is the OID of the statistics, and there are no rows returned if the + * statistics contains no histogram (or if there's no statistics for the OID). + * + * The second parameter (type) determines what values will be returned + * in the (minvals,maxvals). There are three possible values: + * + * 0 (actual values) + * ----------------- + * - prints actual values + * - using the output function of the data type (as string) + * - handy for investigating the histogram + * + * 1 (distinct index) + * ------------------ + * - prints index of the distinct value (into the serialized array) + * - makes it easier to spot neighbor buckets, etc. + * - handy for plotting the histogram + * + * 2 (normalized distinct index) + * ----------------------------- + * - prints index of the distinct value, but normalized into [0,1] + * - similar to 1, but shows how 'long' the bucket range is + * - handy for plotting the histogram + * + * When plotting the histogram, be careful as the (1) and (2) options skew the + * lengths by distributing the distinct values uniformly. For data types + * without a clear meaning of 'distance' (e.g. strings) that is not a big deal, + * but for numbers it may be confusing. + */ +PG_FUNCTION_INFO_V1(pg_mv_histogram_buckets); + +#define OUTPUT_FORMAT_RAW 0 +#define OUTPUT_FORMAT_INDEXES 1 +#define OUTPUT_FORMAT_DISTINCT 2 + +Datum +pg_mv_histogram_buckets(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + int call_cntr; + int max_calls; + TupleDesc tupdesc; + AttInMetadata *attinmeta; + + Oid mvoid = PG_GETARG_OID(0); + int otype = PG_GETARG_INT32(1); + + if ((otype < 0) || (otype > 2)) + elog(ERROR, "invalid output type specified"); + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + MVSerializedHistogram histogram; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + histogram = load_mv_histogram(mvoid); + + funcctx->user_fctx = histogram; + + /* total number of tuples to be returned */ + funcctx->max_calls = 0; + if (funcctx->user_fctx != NULL) + funcctx->max_calls = histogram->nbuckets; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + funcctx->attinmeta = attinmeta; + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + call_cntr = funcctx->call_cntr; + max_calls = funcctx->max_calls; + attinmeta = funcctx->attinmeta; + + if (call_cntr < max_calls) /* do when there is more left to send */ + { + char **values; + HeapTuple tuple; + Datum result; + int2vector *stakeys; + Oid relid; + double bucket_volume = 1.0; + StringInfo bufs; + + char *format; + int i; + + Oid *outfuncs; + FmgrInfo *fmgrinfo; + + MVSerializedHistogram histogram; + MVSerializedBucket bucket; + + histogram = (MVSerializedHistogram) funcctx->user_fctx; + + Assert(call_cntr < histogram->nbuckets); + + bucket = histogram->buckets[call_cntr]; + + stakeys = find_mv_attnums(mvoid, &relid); + + /* + * The scalar values will be formatted directly, using snprintf. + * + * The 'array' values will be formatted through StringInfo. + */ + values = (char **) palloc0(9 * sizeof(char *)); + bufs = (StringInfo) palloc0(9 * sizeof(StringInfoData)); + + values[0] = (char *) palloc(64 * sizeof(char)); + + initStringInfo(&bufs[1]); /* lower boundaries */ + initStringInfo(&bufs[2]); /* upper boundaries */ + initStringInfo(&bufs[3]); /* nulls-only */ + initStringInfo(&bufs[4]); /* lower inclusive */ + initStringInfo(&bufs[5]); /* upper inclusive */ + + values[6] = (char *) palloc(64 * sizeof(char)); + values[7] = (char *) palloc(64 * sizeof(char)); + values[8] = (char *) palloc(64 * sizeof(char)); + + /* we need to do this only when printing the actual values */ + outfuncs = (Oid *) palloc0(sizeof(Oid) * histogram->ndimensions); + fmgrinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * histogram->ndimensions); + + /* + * lookup output functions for all histogram dimensions + * + * XXX This might be one in the first call and stored in user_fctx. + */ + for (i = 0; i < histogram->ndimensions; i++) + { + bool isvarlena; + + getTypeOutputInfo(get_atttype(relid, stakeys->values[i]), + &outfuncs[i], &isvarlena); + + fmgr_info(outfuncs[i], &fmgrinfo[i]); + } + + snprintf(values[0], 64, "%d", call_cntr); /* bucket ID */ + + /* + * for the arrays of lower/upper boundaries, formated according to + * otype + */ + for (i = 0; i < histogram->ndimensions; i++) + { + Datum *vals = histogram->values[i]; + + uint16 minidx = bucket->min[i]; + uint16 maxidx = bucket->max[i]; + + /* + * compute bucket volume, using distinct values as a measure + * + * XXX Not really sure what to do for NULL dimensions here, so + * let's simply count them as '1'. + */ + bucket_volume + *= (double) (maxidx - minidx + 1) / (histogram->nvalues[i] - 1); + + if (i == 0) + format = "{%s"; /* fist dimension */ + else if (i < (histogram->ndimensions - 1)) + format = ", %s"; /* medium dimensions */ + else + format = ", %s}"; /* last dimension */ + + appendStringInfo(&bufs[3], format, bucket->nullsonly[i] ? "t" : "f"); + appendStringInfo(&bufs[4], format, bucket->min_inclusive[i] ? "t" : "f"); + appendStringInfo(&bufs[5], format, bucket->max_inclusive[i] ? "t" : "f"); + + /* + * for NULL-only dimension, simply put there the NULL and + * continue + */ + if (bucket->nullsonly[i]) + { + if (i == 0) + format = "{%s"; + else if (i < (histogram->ndimensions - 1)) + format = ", %s"; + else + format = ", %s}"; + + appendStringInfo(&bufs[1], format, "NULL"); + appendStringInfo(&bufs[2], format, "NULL"); + + continue; + } + + /* otherwise we really need to format the value */ + switch (otype) + { + case OUTPUT_FORMAT_RAW: /* actual boundary values */ + + if (i == 0) + format = "{%s"; + else if (i < (histogram->ndimensions - 1)) + format = ", %s"; + else + format = ", %s}"; + + appendStringInfo(&bufs[1], format, + FunctionCall1(&fmgrinfo[i], vals[minidx])); + + appendStringInfo(&bufs[2], format, + FunctionCall1(&fmgrinfo[i], vals[maxidx])); + + break; + + case OUTPUT_FORMAT_INDEXES: /* indexes into deduplicated + * arrays */ + + if (i == 0) + format = "{%d"; + else if (i < (histogram->ndimensions - 1)) + format = ", %d"; + else + format = ", %d}"; + + appendStringInfo(&bufs[1], format, minidx); + + appendStringInfo(&bufs[2], format, maxidx); + + break; + + case OUTPUT_FORMAT_DISTINCT: /* distinct arrays as measure */ + + if (i == 0) + format = "{%f"; + else if (i < (histogram->ndimensions - 1)) + format = ", %f"; + else + format = ", %f}"; + + appendStringInfo(&bufs[1], format, + (minidx * 1.0 / (histogram->nvalues[i] - 1))); + + appendStringInfo(&bufs[2], format, + (maxidx * 1.0 / (histogram->nvalues[i] - 1))); + + break; + + default: + elog(ERROR, "unknown output type: %d", otype); + } + } + + values[1] = bufs[1].data; + values[2] = bufs[2].data; + values[3] = bufs[3].data; + values[4] = bufs[4].data; + values[5] = bufs[5].data; + + snprintf(values[6], 64, "%f", bucket->ntuples); /* frequency */ + snprintf(values[7], 64, "%f", bucket->ntuples / bucket_volume); /* density */ + snprintf(values[8], 64, "%f", bucket_volume); /* volume (as a + * fraction) */ + + /* build a tuple */ + tuple = BuildTupleFromCStrings(attinmeta, values); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + /* clean up (this is not really necessary) */ + pfree(values[0]); + pfree(values[6]); + pfree(values[7]); + pfree(values[8]); + + resetStringInfo(&bufs[1]); + resetStringInfo(&bufs[2]); + resetStringInfo(&bufs[3]); + resetStringInfo(&bufs[4]); + resetStringInfo(&bufs[5]); + + pfree(bufs); + pfree(values); + + SRF_RETURN_NEXT(funcctx, result); + } + else /* do when there is no more left */ + { + SRF_RETURN_DONE(funcctx); + } +} + +/* + * pg_histogram_in - input routine for type pg_histogram. + * + * pg_histogram is real enough to be a table column, but it has no operations + * of its own, and disallows input too + * + * XXX This is inspired by what pg_node_tree does. + */ +Datum +pg_histogram_in(PG_FUNCTION_ARGS) +{ + /* + * pg_node_list stores the data in binary form and parsing text input is + * not needed, so disallow this. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_histogram"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_histogram - output routine for type PG_HISTOGRAM. + * + * histograms are serialized into a bytea value, so we simply call byteaout() + * to serialize the value into text. But it'd be nice to serialize that into + * a meaningful representation (e.g. for inspection by people). + * + * FIXME not implemented yet, returning dummy value + */ +Datum +pg_histogram_out(PG_FUNCTION_ARGS) +{ + return byteaout(fcinfo); +} + +/* + * pg_histogram_recv - binary input routine for type pg_histogram. + */ +Datum +pg_histogram_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_histogram"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_histogram_send - binary output routine for type pg_histogram. + * + * XXX Histograms are serialized into a bytea value, so let's just send that. + */ +Datum +pg_histogram_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +#ifdef DEBUG_MVHIST +/* + * prints debugging info about matched histogram buckets (full/partial) + * + * XXX Currently works only for INT data type. + */ +void +debug_histogram_matches(MVSerializedHistogram mvhist, char *matches) +{ + int i, + j; + + float ffull = 0, + fpartial = 0; + int nfull = 0, + npartial = 0; + + StringInfoData buf; + + initStringInfo(&buf); + + for (i = 0; i < mvhist->nbuckets; i++) + { + MVSerializedBucket bucket = mvhist->buckets[i]; + + if (!matches[i]) + continue; + + /* increment the counters */ + nfull += (matches[i] == MVSTATS_MATCH_FULL) ? 1 : 0; + npartial += (matches[i] == MVSTATS_MATCH_PARTIAL) ? 1 : 0; + + /* and also update the frequencies */ + ffull += (matches[i] == MVSTATS_MATCH_FULL) ? bucket->ntuples : 0; + fpartial += (matches[i] == MVSTATS_MATCH_PARTIAL) ? bucket->ntuples : 0; + + resetStringInfo(&buf); + + /* build ranges for all the dimentions */ + for (j = 0; j < mvhist->ndimensions; j++) + { + appendStringInfo(&buf, '[%d %d]', + DatumGetInt32(mvhist->values[j][bucket->min[j]]), + DatumGetInt32(mvhist->values[j][bucket->max[j]])); + } + + elog(WARNING, "bucket %d %s => %d [%f]", i, buf.data, matches[i], bucket->ntuples); + } + + elog(WARNING, "full=%f partial=%f (%f)", ffull, fpartial, (ffull + 0.5 * fpartial)); +} +#endif diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index db74d93..cf73aec 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2298,8 +2298,8 @@ describeOneTableDetails(const char *schemaname, { printfPQExpBuffer(&buf, "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n" - " ndist_enabled, deps_enabled, mcv_enabled,\n" - " ndist_built, deps_built, mcv_built,\n" + " ndist_enabled, deps_enabled, mcv_enabled, hist_enabled,\n" + " ndist_built, deps_built, mcv_built, hist_built,\n" " (SELECT string_agg(attname::text,', ')\n" " FROM ((SELECT unnest(stakeys) AS attnum) s\n" " JOIN pg_attribute a ON (starelid = a.attrelid and a.attnum = s.attnum))) AS attnums\n" @@ -2342,8 +2342,17 @@ describeOneTableDetails(const char *schemaname, first = false; } + if (!strcmp(PQgetvalue(result, i, 6), "t")) + { + if (!first) + appendPQExpBuffer(&buf, ", histogram"); + else + appendPQExpBuffer(&buf, "(histogram"); + first = false; + } + appendPQExpBuffer(&buf, ") ON (%s)", - PQgetvalue(result, i, 9)); + PQgetvalue(result, i, 12)); printTableAddFooter(&cont, buf.data); } diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h index 80d8ea2..f62ba50 100644 --- a/src/include/catalog/pg_cast.h +++ b/src/include/catalog/pg_cast.h @@ -266,6 +266,9 @@ DATA(insert ( 3358 25 0 i i )); DATA(insert ( 441 17 0 i b )); DATA(insert ( 441 25 0 i i )); +/* pg_histogram can be coerced to, but not from, bytea */ +DATA(insert ( 774 17 0 i b )); + /* * Datetime category diff --git a/src/include/catalog/pg_mv_statistic.h b/src/include/catalog/pg_mv_statistic.h index 34049d6..d30d3cd9 100644 --- a/src/include/catalog/pg_mv_statistic.h +++ b/src/include/catalog/pg_mv_statistic.h @@ -40,11 +40,13 @@ CATALOG(pg_mv_statistic,3381) bool ndist_enabled; /* build ndist coefficient? */ bool deps_enabled; /* analyze dependencies? */ bool mcv_enabled; /* build MCV list? */ + bool hist_enabled; /* build histogram? */ /* statistics that are available (if requested) */ bool ndist_built; /* ndistinct coeff built */ bool deps_built; /* dependencies were built */ bool mcv_built; /* MCV list was built */ + bool hist_built; /* histogram was built */ /* * variable-length fields start here, but we allow direct access to @@ -56,6 +58,7 @@ CATALOG(pg_mv_statistic,3381) pg_ndistinct standist; /* ndistinct coeff (serialized) */ pg_dependencies stadeps; /* dependencies (serialized) */ pg_mcv_list stamcv; /* MCV list (serialized) */ + pg_histogram stahist; /* MV histogram (serialized) */ #endif } FormData_pg_mv_statistic; @@ -71,7 +74,7 @@ typedef FormData_pg_mv_statistic *Form_pg_mv_statistic; * compiler constants for pg_mv_statistic * ---------------- */ -#define Natts_pg_mv_statistic 14 +#define Natts_pg_mv_statistic 17 #define Anum_pg_mv_statistic_starelid 1 #define Anum_pg_mv_statistic_staname 2 #define Anum_pg_mv_statistic_stanamespace 3 @@ -79,12 +82,15 @@ typedef FormData_pg_mv_statistic *Form_pg_mv_statistic; #define Anum_pg_mv_statistic_ndist_enabled 5 #define Anum_pg_mv_statistic_deps_enabled 6 #define Anum_pg_mv_statistic_mcv_enabled 7 -#define Anum_pg_mv_statistic_ndist_built 8 -#define Anum_pg_mv_statistic_deps_built 9 -#define Anum_pg_mv_statistic_mcv_built 10 -#define Anum_pg_mv_statistic_stakeys 11 -#define Anum_pg_mv_statistic_standist 12 -#define Anum_pg_mv_statistic_stadeps 13 -#define Anum_pg_mv_statistic_stamcv 14 +#define Anum_pg_mv_statistic_hist_enabled 8 +#define Anum_pg_mv_statistic_ndist_built 9 +#define Anum_pg_mv_statistic_deps_built 10 +#define Anum_pg_mv_statistic_mcv_built 11 +#define Anum_pg_mv_statistic_hist_built 12 +#define Anum_pg_mv_statistic_stakeys 13 +#define Anum_pg_mv_statistic_standist 14 +#define Anum_pg_mv_statistic_stadeps 15 +#define Anum_pg_mv_statistic_stamcv 16 +#define Anum_pg_mv_statistic_stahist 17 #endif /* PG_MV_STATISTIC_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 7cf1e5a..653bf1a 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2730,6 +2730,10 @@ DATA(insert OID = 3376 ( pg_mv_stats_mcvlist_info PGNSP PGUID 12 1 0 0 0 f f f DESCR("multi-variate statistics: MCV list info"); DATA(insert OID = 3373 ( pg_mv_mcv_items PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 1 0 2249 "26" "{26,23,1009,1000,701}" "{i,o,o,o,o}" "{oid,index,values,nulls,frequency}" _null_ _null_ pg_mv_mcv_items _null_ _null_ _null_ )); DESCR("details about MCV list items"); +DATA(insert OID = 3375 ( pg_mv_stats_histogram_info PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "774" _null_ _null_ _null_ _null_ _null_ pg_mv_stats_histogram_info _null_ _null_ _null_ )); +DESCR("multi-variate statistics: histogram info"); +DATA(insert OID = 3374 ( pg_mv_histogram_buckets PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 2 0 2249 "26 23" "{26,23,23,1009,1009,1000,1000,1000,701,701,701}" "{i,i,o,o,o,o,o,o,o,o,o}" "{oid,otype,index,minvals,maxvals,nullsonly,mininclusive,maxinclusive,frequency,density,bucket_volume}" _null_ _null_ pg_mv_histogram_buckets _null_ _null_ _null_ )); +DESCR("details about histogram buckets"); DATA(insert OID = 3354 ( pg_ndistinct_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3353 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ )); DESCR("I/O"); @@ -2758,6 +2762,15 @@ DESCR("I/O"); DATA(insert OID = 445 ( pg_mcv_list_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "441" _null_ _null_ _null_ _null_ _null_ pg_mcv_list_send _null_ _null_ _null_ )); DESCR("I/O"); +DATA(insert OID = 775 ( pg_histogram_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 774 "2275" _null_ _null_ _null_ _null_ _null_ pg_histogram_in _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 776 ( pg_histogram_out PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "774" _null_ _null_ _null_ _null_ _null_ pg_histogram_out _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 777 ( pg_histogram_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 774 "2281" _null_ _null_ _null_ _null_ _null_ pg_histogram_recv _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 778 ( pg_histogram_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "774" _null_ _null_ _null_ _null_ _null_ pg_histogram_send _null_ _null_ _null_ )); +DESCR("I/O"); + DATA(insert OID = 1928 ( pg_stat_get_numscans PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ )); DESCR("statistics: number of scans done for table/index"); DATA(insert OID = 1929 ( pg_stat_get_tuples_returned PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ )); diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index fbac135..7133862 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -376,6 +376,10 @@ DATA(insert OID = 441 ( pg_mcv_list PGNSP PGUID -1 f b S f t \054 0 0 0 pg_mcv_ DESCR("multivariate MCV list"); #define PGMCVLISTOID 441 +DATA(insert OID = 774 ( pg_histogram PGNSP PGUID -1 f b S f t \054 0 0 0 pg_histogram_in pg_histogram_out pg_histogram_recv pg_histogram_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); +DESCR("multivariate histogram"); +#define PGHISTOGRAMOID 774 + DATA(insert OID = 32 ( pg_ddl_command PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("internal type for passing CollectedCommand"); #define PGDDLCOMMANDOID 32 diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d912827..f99f547 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -684,11 +684,13 @@ typedef struct MVStatisticInfo bool ndist_enabled; /* ndistinct coefficient enabled */ bool deps_enabled; /* functional dependencies enabled */ bool mcv_enabled; /* MCV list enabled */ + bool hist_enabled; /* histogram enabled */ /* built/available statistics */ bool ndist_built; /* ndistinct coefficient built */ bool deps_built; /* functional dependencies built */ bool mcv_built; /* MCV list built */ + bool hist_built; /* histogram built */ /* columns in the statistics (attnums) */ int2vector *stakeys; /* attnums of the columns covered */ diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 9ed080a..1c7925b 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -81,6 +81,10 @@ extern Datum pg_mcv_list_in(PG_FUNCTION_ARGS); extern Datum pg_mcv_list_out(PG_FUNCTION_ARGS); extern Datum pg_mcv_list_recv(PG_FUNCTION_ARGS); extern Datum pg_mcv_list_send(PG_FUNCTION_ARGS); +extern Datum pg_histogram_in(PG_FUNCTION_ARGS); +extern Datum pg_histogram_out(PG_FUNCTION_ARGS); +extern Datum pg_histogram_recv(PG_FUNCTION_ARGS); +extern Datum pg_histogram_send(PG_FUNCTION_ARGS); /* regexp.c */ extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive, diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h index 0c4f621..5d8c024 100644 --- a/src/include/utils/mvstats.h +++ b/src/include/utils/mvstats.h @@ -18,7 +18,7 @@ #include "commands/vacuum.h" /* - * Degree of how much MCV item matches a clause. + * Degree of how much MCV item / histogram bucket matches a clause. * This is then considered when computing the selectivity. */ #define MVSTATS_MATCH_NONE 0 /* no match at all */ @@ -114,19 +114,133 @@ bool dependency_implies_attribute(MVDependency dependency, AttrNumber attnum, bool dependency_is_fully_matched(MVDependency dependency, Bitmapset *attnums, int16 *attmap); +/* used to flag stats serialized to bytea */ +#define MVSTAT_HIST_MAGIC 0x7F8C5670 /* marks serialized bytea */ +#define MVSTAT_HIST_TYPE_BASIC 1 /* basic histogram type */ + +/* max buckets in a histogram (mostly arbitrary number */ +#define MVSTAT_HIST_MAX_BUCKETS 16384 + +/* + * Multivariate histograms + */ +typedef struct MVBucketData +{ + + /* Frequencies of this bucket. */ + float ntuples; /* frequency of tuples tuples */ + + /* + * Information about dimensions being NULL-only. Not yet used. + */ + bool *nullsonly; + + /* lower boundaries - values and information about the inequalities */ + Datum *min; + bool *min_inclusive; + + /* upper boundaries - values and information about the inequalities */ + Datum *max; + bool *max_inclusive; + + /* used when building the histogram (not serialized/deserialized) */ + void *build_data; + +} MVBucketData; + +typedef MVBucketData *MVBucket; + + +typedef struct MVHistogramData +{ + + uint32 magic; /* magic constant marker */ + uint32 type; /* type of histogram (BASIC) */ + uint32 nbuckets; /* number of buckets (buckets array) */ + uint32 ndimensions; /* number of dimensions */ + + MVBucket *buckets; /* array of buckets */ + +} MVHistogramData; + +typedef MVHistogramData *MVHistogram; + +/* + * Histogram in a partially serialized form, with deduplicated boundary + * values etc. + * + * TODO add more detailed description here + */ + +typedef struct MVSerializedBucketData +{ + + /* Frequencies of this bucket. */ + float ntuples; /* frequency of tuples tuples */ + + /* + * Information about dimensions being NULL-only. Not yet used. + */ + bool *nullsonly; + + /* lower boundaries - values and information about the inequalities */ + uint16 *min; + bool *min_inclusive; + + /* + * indexes of upper boundaries - values and information about the + * inequalities (exclusive vs. inclusive) + */ + uint16 *max; + bool *max_inclusive; + +} MVSerializedBucketData; + +typedef MVSerializedBucketData *MVSerializedBucket; + +typedef struct MVSerializedHistogramData +{ + + uint32 magic; /* magic constant marker */ + uint32 type; /* type of histogram (BASIC) */ + uint32 nbuckets; /* number of buckets (buckets array) */ + uint32 ndimensions; /* number of dimensions */ + + /* + * keep this the same with MVHistogramData, because of deserialization + * (same offset) + */ + MVSerializedBucket *buckets; /* array of buckets */ + + /* + * serialized boundary values, one array per dimension, deduplicated (the + * min/max indexes point into these arrays) + */ + int *nvalues; + Datum **values; + +} MVSerializedHistogramData; + +typedef MVSerializedHistogramData *MVSerializedHistogram; + + MVNDistinct load_mv_ndistinct(Oid mvoid); MVDependencies load_mv_dependencies(Oid mvoid); MCVList load_mv_mcvlist(Oid mvoid); +MVSerializedHistogram load_mv_histogram(Oid mvoid); bytea *serialize_mv_ndistinct(MVNDistinct ndistinct); bytea *serialize_mv_dependencies(MVDependencies dependencies); bytea *serialize_mv_mcvlist(MCVList mcvlist, int2vector *attrs, VacAttrStats **stats); +bytea *serialize_mv_histogram(MVHistogram histogram, int2vector *attrs, + VacAttrStats **stats); /* deserialization of stats (serialization is private to analyze) */ MVNDistinct deserialize_mv_ndistinct(bytea *data); MVDependencies deserialize_mv_dependencies(bytea *data); MCVList deserialize_mv_mcvlist(bytea *data); +MVSerializedHistogram deserialize_mv_histogram(bytea * data); /* * Returns index of the attribute number within the vector (i.e. a @@ -139,6 +253,8 @@ int2vector *find_mv_attnums(Oid mvoid, Oid *relid); /* functions for inspecting the statistics */ extern Datum pg_mv_stats_mcvlist_info(PG_FUNCTION_ARGS); extern Datum pg_mv_mcvlist_items(PG_FUNCTION_ARGS); +extern Datum pg_mv_stats_histogram_info(PG_FUNCTION_ARGS); +extern Datum pg_mv_histogram_buckets(PG_FUNCTION_ARGS); MVNDistinct build_mv_ndistinct(double totalrows, int numrows, HeapTuple *rows, @@ -151,8 +267,15 @@ MVDependencies build_mv_dependencies(int numrows, HeapTuple *rows, MCVList build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs, VacAttrStats **stats, int *numrows_filtered); +MVHistogram build_mv_histogram(int numrows, HeapTuple *rows, int2vector *attrs, + VacAttrStats **stats, int numrows_total); + void build_mv_stats(Relation onerel, double totalrows, int numrows, HeapTuple *rows, int natts, VacAttrStats **vacattrstats); +#ifdef DEBUG_MVHIST +extern void debug_histogram_matches(MVSerializedHistogram mvhist, char *matches); +#endif + #endif diff --git a/src/test/regress/expected/mv_histogram.out b/src/test/regress/expected/mv_histogram.out new file mode 100644 index 0000000..16410ce --- /dev/null +++ b/src/test/regress/expected/mv_histogram.out @@ -0,0 +1,198 @@ +-- data type passed by value +CREATE TABLE mv_histogram ( + a INT, + b INT, + c INT +); +-- unknown column +CREATE STATISTICS s7 WITH (histogram) ON (unknown_column) FROM mv_histogram; +ERROR: column "unknown_column" referenced in statistics does not exist +-- single column +CREATE STATISTICS s7 WITH (histogram) ON (a) FROM mv_histogram; +ERROR: statistics require at least 2 columns +-- single column, duplicated +CREATE STATISTICS s7 WITH (histogram) ON (a, a) FROM mv_histogram; +ERROR: duplicate column name in statistics definition +-- two columns, one duplicated +CREATE STATISTICS s7 WITH (histogram) ON (a, a, b) FROM mv_histogram; +ERROR: duplicate column name in statistics definition +-- unknown option +CREATE STATISTICS s7 WITH (unknown_option) ON (a, b, c) FROM mv_histogram; +ERROR: unrecognized STATISTICS option "unknown_option" +-- correct command +CREATE STATISTICS s7 WITH (histogram) ON (a, b, c) FROM mv_histogram; +-- random data (no functional dependencies) +INSERT INTO mv_histogram + SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +TRUNCATE mv_histogram; +-- a => b, a => c, b => c +INSERT INTO mv_histogram + SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +TRUNCATE mv_histogram; +-- a => b, a => c +INSERT INTO mv_histogram + SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +TRUNCATE mv_histogram; +-- check explain (expect bitmap index scan, not plain index scan) +INSERT INTO mv_histogram + SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i); +CREATE INDEX hist_idx ON mv_histogram (a, b); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +EXPLAIN (COSTS off) + SELECT * FROM mv_histogram WHERE a = 10 AND b = 5; + QUERY PLAN +-------------------------------------------- + Bitmap Heap Scan on mv_histogram + Recheck Cond: ((a = 10) AND (b = 5)) + -> Bitmap Index Scan on hist_idx + Index Cond: ((a = 10) AND (b = 5)) +(4 rows) + +DROP TABLE mv_histogram; +-- varlena type (text) +CREATE TABLE mv_histogram ( + a TEXT, + b TEXT, + c TEXT +); +CREATE STATISTICS s8 WITH (histogram) ON (a, b, c) FROM mv_histogram; +-- random data (no functional dependencies) +INSERT INTO mv_histogram + SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +TRUNCATE mv_histogram; +-- a => b, a => c, b => c +INSERT INTO mv_histogram + SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +TRUNCATE mv_histogram; +-- a => b, a => c +INSERT INTO mv_histogram + SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +TRUNCATE mv_histogram; +-- check explain (expect bitmap index scan, not plain index scan) +INSERT INTO mv_histogram + SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i); +CREATE INDEX hist_idx ON mv_histogram (a, b); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +EXPLAIN (COSTS off) + SELECT * FROM mv_histogram WHERE a = '10' AND b = '5'; + QUERY PLAN +------------------------------------------------------------ + Bitmap Heap Scan on mv_histogram + Recheck Cond: ((a = '10'::text) AND (b = '5'::text)) + -> Bitmap Index Scan on hist_idx + Index Cond: ((a = '10'::text) AND (b = '5'::text)) +(4 rows) + +TRUNCATE mv_histogram; +-- check explain (expect bitmap index scan, not plain index scan) with NULLs +INSERT INTO mv_histogram + SELECT + (CASE WHEN i/100 = 0 THEN NULL ELSE i/100 END), + (CASE WHEN i/200 = 0 THEN NULL ELSE i/200 END), + (CASE WHEN i/400 = 0 THEN NULL ELSE i/400 END) + FROM generate_series(1,30000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +EXPLAIN (COSTS off) + SELECT * FROM mv_histogram WHERE a IS NULL AND b IS NULL; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mv_histogram + Recheck Cond: ((a IS NULL) AND (b IS NULL)) + -> Bitmap Index Scan on hist_idx + Index Cond: ((a IS NULL) AND (b IS NULL)) +(4 rows) + +DROP TABLE mv_histogram; +-- NULL values (mix of int and text columns) +CREATE TABLE mv_histogram ( + a INT, + b TEXT, + c INT, + d TEXT +); +CREATE STATISTICS s9 WITH (histogram) ON (a, b, c, d) FROM mv_histogram; +INSERT INTO mv_histogram + SELECT + mod(i, 100), + (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END), + mod(i, 400), + (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END) + FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + hist_enabled | hist_built +--------------+------------ + t | t +(1 row) + +DROP TABLE mv_histogram; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 9969c10..a9d8163 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -820,11 +820,12 @@ WHERE c.castmethod = 'b' AND pg_ndistinct | bytea | 0 | i pg_dependencies | bytea | 0 | i pg_mcv_list | bytea | 0 | i + pg_histogram | bytea | 0 | i cidr | inet | 0 | i xml | text | 0 | a xml | character varying | 0 | a xml | character | 0 | a -(10 rows) +(11 rows) -- **************** pg_conversion **************** -- Look for illegal values in pg_conversion fields. diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 2e3c40e..27e903c 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1383,7 +1383,9 @@ pg_mv_stats| SELECT n.nspname AS schemaname, length((s.standist)::bytea) AS ndistbytes, length((s.stadeps)::bytea) AS depsbytes, length((s.stamcv)::bytea) AS mcvbytes, - pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo + pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo, + length((s.stahist)::bytea) AS histbytes, + pg_mv_stats_histogram_info(s.stahist) AS histinfo FROM ((pg_mv_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))); diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index dde15b9..4d3c4d7 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -73,8 +73,9 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' 3353 | pg_ndistinct 3358 | pg_dependencies 441 | pg_mcv_list + 774 | pg_histogram 210 | smgr -(5 rows) +(6 rows) -- Make sure typarray points to a varlena array type of our own base SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype, diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index d805840..36dd618 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -118,4 +118,4 @@ test: event_trigger test: stats # run tests of multivariate stats -test: mv_ndistinct mv_dependencies mv_mcv +test: mv_ndistinct mv_dependencies mv_mcv mv_histogram diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 72c6acd..34f5467 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -174,3 +174,4 @@ test: stats test: mv_ndistinct test: mv_dependencies test: mv_mcv +test: mv_histogram diff --git a/src/test/regress/sql/mv_histogram.sql b/src/test/regress/sql/mv_histogram.sql new file mode 100644 index 0000000..55197cb --- /dev/null +++ b/src/test/regress/sql/mv_histogram.sql @@ -0,0 +1,167 @@ +-- data type passed by value +CREATE TABLE mv_histogram ( + a INT, + b INT, + c INT +); + +-- unknown column +CREATE STATISTICS s7 WITH (histogram) ON (unknown_column) FROM mv_histogram; + +-- single column +CREATE STATISTICS s7 WITH (histogram) ON (a) FROM mv_histogram; + +-- single column, duplicated +CREATE STATISTICS s7 WITH (histogram) ON (a, a) FROM mv_histogram; + +-- two columns, one duplicated +CREATE STATISTICS s7 WITH (histogram) ON (a, a, b) FROM mv_histogram; + +-- unknown option +CREATE STATISTICS s7 WITH (unknown_option) ON (a, b, c) FROM mv_histogram; + +-- correct command +CREATE STATISTICS s7 WITH (histogram) ON (a, b, c) FROM mv_histogram; + +-- random data (no functional dependencies) +INSERT INTO mv_histogram + SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i); + +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +TRUNCATE mv_histogram; + +-- a => b, a => c, b => c +INSERT INTO mv_histogram + SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i); + +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +TRUNCATE mv_histogram; + +-- a => b, a => c +INSERT INTO mv_histogram + SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +TRUNCATE mv_histogram; + +-- check explain (expect bitmap index scan, not plain index scan) +INSERT INTO mv_histogram + SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i); +CREATE INDEX hist_idx ON mv_histogram (a, b); +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +EXPLAIN (COSTS off) + SELECT * FROM mv_histogram WHERE a = 10 AND b = 5; + +DROP TABLE mv_histogram; + +-- varlena type (text) +CREATE TABLE mv_histogram ( + a TEXT, + b TEXT, + c TEXT +); + +CREATE STATISTICS s8 WITH (histogram) ON (a, b, c) FROM mv_histogram; + +-- random data (no functional dependencies) +INSERT INTO mv_histogram + SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i); + +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +TRUNCATE mv_histogram; + +-- a => b, a => c, b => c +INSERT INTO mv_histogram + SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i); + +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +TRUNCATE mv_histogram; + +-- a => b, a => c +INSERT INTO mv_histogram + SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i); +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +TRUNCATE mv_histogram; + +-- check explain (expect bitmap index scan, not plain index scan) +INSERT INTO mv_histogram + SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i); +CREATE INDEX hist_idx ON mv_histogram (a, b); +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +EXPLAIN (COSTS off) + SELECT * FROM mv_histogram WHERE a = '10' AND b = '5'; + +TRUNCATE mv_histogram; + +-- check explain (expect bitmap index scan, not plain index scan) with NULLs +INSERT INTO mv_histogram + SELECT + (CASE WHEN i/100 = 0 THEN NULL ELSE i/100 END), + (CASE WHEN i/200 = 0 THEN NULL ELSE i/200 END), + (CASE WHEN i/400 = 0 THEN NULL ELSE i/400 END) + FROM generate_series(1,30000) s(i); +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +EXPLAIN (COSTS off) + SELECT * FROM mv_histogram WHERE a IS NULL AND b IS NULL; + +DROP TABLE mv_histogram; + +-- NULL values (mix of int and text columns) +CREATE TABLE mv_histogram ( + a INT, + b TEXT, + c INT, + d TEXT +); + +CREATE STATISTICS s9 WITH (histogram) ON (a, b, c, d) FROM mv_histogram; + +INSERT INTO mv_histogram + SELECT + mod(i, 100), + (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END), + mod(i, 400), + (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END) + FROM generate_series(1,10000) s(i); + +ANALYZE mv_histogram; + +SELECT hist_enabled, hist_built + FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass; + +DROP TABLE mv_histogram; -- 2.5.5