diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index cc24c8a..5dcda42 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -94,22 +94,24 @@ * to justify complicating matters. *---------- */ #include "postgres.h" #include #include #include +#include "access/brin.h" #include "access/gin.h" #include "access/htup_details.h" +#include "access/reloptions.h" #include "access/sysattr.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_type.h" #include "executor/executor.h" @@ -7703,62 +7705,262 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * BRIN has search behavior completely different from other index types */ void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages) { IndexOptInfo *index = path->indexinfo; List *indexQuals = path->indexquals; - List *indexOrderBys = path->indexorderbys; double numPages = index->pages; double numTuples = index->tuples; + RelOptInfo *baserel = index->rel; List *qinfos; Cost spc_seq_page_cost; Cost spc_random_page_cost; double qual_op_cost; double qual_arg_cost; + double qualSelectivity; + BlockNumber pagesPerRange; + double rangeProportion; + double numRanges; + double rangeSelectivity; + double selec; + Relation indexRel; + VariableStatData vardata; /* Do preliminary analysis of indexquals */ qinfos = deconstruct_indexquals(path); - /* fetch estimated page cost for tablespace containing index */ + /* Fetch estimated page cost for tablespace containing index */ get_tablespace_page_costs(index->reltablespace, &spc_random_page_cost, &spc_seq_page_cost); /* * BRIN indexes are always read in full; use that as startup cost. * - * XXX maybe only include revmap pages here? + * XXX We should consider the revmap at seqpage cost, and regular pages at + * random page cost. */ *indexStartupCost = spc_seq_page_cost * numPages * loop_count; /* * To read a BRIN index there might be a bit of back and forth over * regular pages, as revmap might point to them out of sequential order; * calculate this as reading the whole index in random order. */ *indexTotalCost = spc_random_page_cost * numPages * loop_count; - *indexSelectivity = - clauselist_selectivity(root, indexQuals, - path->indexinfo->rel->relid, - JOIN_INNER, NULL); - *indexCorrelation = 1; + /* + * Compute index correlation + * + * Because we can use all index quals equally when scanning, we can use + * the largest correlation (in absolute value) among columns used by the + * query. Start at zero, the worst possible case. If we cannot find + * any correlation statistics, we will keep use it as 0. + */ + *indexCorrelation = 0; + + { + RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); + ListCell *cell; + + Assert(rte->rtekind == RTE_RELATION); + + foreach(cell, qinfos) + { + IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(cell); + AttrNumber colnum = index->indexkeys[qinfo->indexcol]; + + if (colnum != 0) + { + /* Simple variable -- look to stats for the underlying table */ + if (get_relation_stats_hook && + (*get_relation_stats_hook) (root, rte, colnum, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. + * If it did supply a tuple, it'd better have supplied + * a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(rte->relid), + Int16GetDatum(colnum), + /* XXX no inh */ + BoolGetDatum(false)); + vardata.freefunc = ReleaseSysCache; + } + } + else + { + /* Expression --- maybe there are stats for the index itself */ + if (get_index_stats_hook && + (*get_index_stats_hook) (root, index->indexoid, 1, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else + { + vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(index->indexoid), + Int16GetDatum(1), + BoolGetDatum(false)); + vardata.freefunc = ReleaseSysCache; + } + } + + if (HeapTupleIsValid(vardata.statsTuple)) + { + float4 *numbers; + int nnumbers; + + /* XXX is InvalidOID reqop fine?? */ + if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0, + STATISTIC_KIND_CORRELATION, + InvalidOid, + NULL, + NULL, NULL, + &numbers, &nnumbers)) + { + double varCorrelation; + + Assert(nnumbers == 1); + varCorrelation = Abs(numbers[0]); + + if (varCorrelation > *indexCorrelation) + *indexCorrelation = varCorrelation; + + free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers); + } + } + + ReleaseVariableStats(vardata); + } + } + + qualSelectivity = clauselist_selectivity(root, indexQuals, + baserel->relid, + JOIN_INNER, NULL); + + indexRel = index_open(index->indexoid, AccessShareLock); + pagesPerRange = Min(BrinGetPagesPerRange(indexRel), baserel->pages); + Assert(baserel->pages > 0); + Assert(pagesPerRange > 0); + rangeProportion = (double) pagesPerRange / baserel->pages; + numRanges = 1.0 + (double) baserel->pages / pagesPerRange; + index_close(indexRel, AccessShareLock); /* - * Add on index qual eval costs, much as in genericcostestimate. + * Index selectivity is important for the planner to calculate the cost of + * the bitmap heap scan. Unfortunately, we don't have a robust way to + * estimate selectivity of BRIN. It can depend on many things. This is a + * long rationale about the incomplete calculation we have at the moment. + * + * Our starting point is that BRIN selectivity has to be less than the + * selectivity of the btree. We are using a product of logical and + * physical selectivities to achieve this: + * + * (1 + logical_selectivity) * (1 + physical_selectivity) - 1 + * + * The logical selectivity (qualSelectivity) is calculated using + * the indexable expressions of the WHERE clause. This is the same value + * btree is using. The physical selectivity (rangeSelectivity) is + * calculated using the range proportion and the maximum correlation. + * The range proportion is a comparable value with selectivity. It is + * the selectivity of the smallest unit of the index. The final + * selectivity can never be less than that. + * + * There are not very good reasons about the equation above. It is just + * some equation that never results with any value less than the two + * variables in it. It can, however, result with values greater than 1, + * while the variables are not. In this case, we will use the value 1 + * as selectivity. It is fair, because we expect the index not to help + * in many cases where correlation is not very good. We could use + * an empirical formula instead of this made-up equation, but it is not + * easy to find one that would work with different datasets. + * + * Using the inverse of the correlation by subtracting it from 1 is not + * not really a comparable value with the selectivity. It is just a value + * between 0 and 1. On the other hand, it is the only value related to + * the BRIN quality we have available right now. We are using the + * arithmetic mean of it with the range proportion to normalize. + * This part of the physical selectivity is likely to be more effective + * than the range proportion in many circumstances as there would be many + * ranges on big tables. + * + * Using the inverse of the correlation of a column as selectivity of the + * index is wrong in many ways. First of all, it cannot be applied to all + * BRIN operator classes. It makes sense for the main built-in operator + * class "minmax", and makes a little sense for the other one "inclusion". + * It wouldn't probably make any sense for a completely different + * implementation, if there would be any. Maybe, we should push down this + * function to the operator class, but there is not enough reason to do it + * right now. + * + * Second, correlation is not dependent to any indexed expression. It + * probably doesn't make any sense for the complicated operators. It + * would probably effect basic comparison operators differently than + * equality operator. The effect would even differ by count of those + * expressions. For example, x IN (10, 20, 30) would be effected from + * correlation more than x = 15, even when their selectivities are the + * same. + * + * Last but not least, the correlation is a single value for the whole + * range. The indexed table can partly be very well correlated, but the + * correlation value can still be very low. For example, if a perfectly + * correlated table is copied 4 times using "INSERT INTO t SELECT * FROM t", + * the correlation would be 0.25, although the index would be almost + * as good as the version on the initial table. Or the expression can + * match the better correlated part of the table. It is not hard + * to imagine more scenarios where the correlation is a bad value to use + * as the selectivity. We should probably improve this by collecting more + * statistics, one day. + * + * Another problem in here is that the caller assumes the selectivity by + * tuples. It might have been better, if we had a way to return it as + * some number of pages. On the other hand, even though we know about the + * index, it is not easier for us to estimate the number of matching pages + * than it is for the caller. We are likely to make enough error by + * relying on the correlation, anyway. We are at least not making things + * worse in here trying scale the estimation for the pages. + */ + rangeSelectivity = (rangeProportion + (1.0 - *indexCorrelation)) / 2.0; + selec = (1.0 + qualSelectivity) * (1.0 + rangeSelectivity) - 1.0; + CLAMP_PROBABILITY(selec); + + *indexSelectivity = selec; + + /* + * Add index qual arg costs, much as in genericcostestimate. */ qual_arg_cost = other_operands_eval_cost(root, qinfos) + orderby_operands_eval_cost(root, path); - qual_op_cost = cpu_operator_cost * - (list_length(indexQuals) + list_length(indexOrderBys)); - *indexStartupCost += qual_arg_cost; *indexTotalCost += qual_arg_cost; - *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); *indexPages = index->pages; - /* XXX what about pages_per_range? */ + /* + * Add index qual op costs. Unlike other indexes, we are not processing + * tuples but ranges. + */ + qual_op_cost = cpu_operator_cost * list_length(indexQuals); + *indexTotalCost += numRanges * qual_op_cost; + + /* + * Add CPU index tuple costs, much as in genericcostestimate. + */ + *indexTotalCost += selec * numTuples * cpu_index_tuple_cost; } diff --git a/src/test/regress/expected/brin.out b/src/test/regress/expected/brin.out index f0008dd3..135e155 100644 --- a/src/test/regress/expected/brin.out +++ b/src/test/regress/expected/brin.out @@ -62,20 +62,21 @@ INSERT INTO brintest (inetcol, cidrcol, int4rangecol) SELECT cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous, 'empty'::int4range FROM tenk1 ORDER BY thousand, tenthous LIMIT 25; CREATE INDEX brinidx ON brintest USING brin ( byteacol, charcol, namecol, int8col, int2col, int4col, + (int4col * 2), textcol, oidcol, tidcol, float4col, float8col, macaddrcol, inetcol inet_inclusion_ops, inetcol inet_minmax_ops, cidrcol inet_inclusion_ops, cidrcol inet_minmax_ops, @@ -87,21 +88,21 @@ CREATE INDEX brinidx ON brintest USING brin ( intervalcol, timetzcol, bitcol, varbitcol, numericcol, uuidcol, int4rangecol, lsncol, boxcol ) with (pages_per_range = 1); -CREATE TABLE brinopers (colname name, typ text, +CREATE TABLE brinopers (colname text, typ text, op text[], value text[], matches int[], check (cardinality(op) = cardinality(value)), check (cardinality(op) = cardinality(matches))); INSERT INTO brinopers VALUES ('byteacol', 'bytea', '{>, >=, =, <=, <}', '{AAAAAA, AAAAAA, BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA, ZZZZZZ, ZZZZZZ}', '{100, 100, 1, 100, 100}'), ('charcol', '"char"', '{>, >=, =, <=, <}', @@ -128,20 +129,24 @@ INSERT INTO brinopers VALUES '{0, 0, 800, 1999, 1999}', '{100, 100, 1, 100, 100}'), ('int4col', 'int4', '{>, >=, =, <=, <}', '{0, 0, 800, 1999, 1999}', '{100, 100, 1, 100, 100}'), ('int4col', 'int8', '{>, >=, =, <=, <}', '{0, 0, 800, 1999, 1428427143}', '{100, 100, 1, 100, 100}'), + ('(int4col * 2)', 'int8', + '{>, >=, =, <=, <}', + '{0, 0, 800, 1999, 1428427143}', + '{100, 100, 1, 47, 100}'), ('int8col', 'int2', '{>, >=}', '{0, 0}', '{100, 100}'), ('int8col', 'int4', '{>, >=}', '{0, 0}', '{100, 100}'), ('int8col', 'int8', '{>, >=, =, <=, <}', @@ -291,23 +296,23 @@ DECLARE idx_ctids tid[]; ss_ctids tid[]; count int; plan_ok bool; plan_line text; BEGIN FOR r IN SELECT colname, oper, typ, value[ordinality], matches[ordinality] FROM brinopers, unnest(op) WITH ORDINALITY AS oper LOOP -- prepare the condition IF r.value IS NULL THEN - cond := format('%I %s %L', r.colname, r.oper, r.value); + cond := format('%s %s %L', r.colname, r.oper, r.value); ELSE - cond := format('%I %s %L::%s', r.colname, r.oper, r.value, r.typ); + cond := format('%s %s %L::%s', r.colname, r.oper, r.value, r.typ); END IF; -- run the query using the brin index SET enable_seqscan = 0; SET enable_bitmapscan = 1; plan_ok := false; FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest WHERE %s $y$, cond) LOOP IF plan_line LIKE '%Bitmap Heap Scan on brintest%' THEN plan_ok := true; diff --git a/src/test/regress/sql/brin.sql b/src/test/regress/sql/brin.sql index 5bf5387..62c0018 100644 --- a/src/test/regress/sql/brin.sql +++ b/src/test/regress/sql/brin.sql @@ -65,20 +65,21 @@ INSERT INTO brintest (inetcol, cidrcol, int4rangecol) SELECT 'empty'::int4range FROM tenk1 ORDER BY thousand, tenthous LIMIT 25; CREATE INDEX brinidx ON brintest USING brin ( byteacol, charcol, namecol, int8col, int2col, int4col, + (int4col * 2), textcol, oidcol, tidcol, float4col, float8col, macaddrcol, inetcol inet_inclusion_ops, inetcol inet_minmax_ops, cidrcol inet_inclusion_ops, cidrcol inet_minmax_ops, @@ -91,21 +92,21 @@ CREATE INDEX brinidx ON brintest USING brin ( timetzcol, bitcol, varbitcol, numericcol, uuidcol, int4rangecol, lsncol, boxcol ) with (pages_per_range = 1); -CREATE TABLE brinopers (colname name, typ text, +CREATE TABLE brinopers (colname text, typ text, op text[], value text[], matches int[], check (cardinality(op) = cardinality(value)), check (cardinality(op) = cardinality(matches))); INSERT INTO brinopers VALUES ('byteacol', 'bytea', '{>, >=, =, <=, <}', '{AAAAAA, AAAAAA, BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA, ZZZZZZ, ZZZZZZ}', '{100, 100, 1, 100, 100}'), ('charcol', '"char"', @@ -133,20 +134,24 @@ INSERT INTO brinopers VALUES '{0, 0, 800, 1999, 1999}', '{100, 100, 1, 100, 100}'), ('int4col', 'int4', '{>, >=, =, <=, <}', '{0, 0, 800, 1999, 1999}', '{100, 100, 1, 100, 100}'), ('int4col', 'int8', '{>, >=, =, <=, <}', '{0, 0, 800, 1999, 1428427143}', '{100, 100, 1, 100, 100}'), + ('(int4col * 2)', 'int8', + '{>, >=, =, <=, <}', + '{0, 0, 800, 1999, 1428427143}', + '{100, 100, 1, 47, 100}'), ('int8col', 'int2', '{>, >=}', '{0, 0}', '{100, 100}'), ('int8col', 'int4', '{>, >=}', '{0, 0}', '{100, 100}'), ('int8col', 'int8', '{>, >=, =, <=, <}', @@ -297,23 +302,23 @@ DECLARE idx_ctids tid[]; ss_ctids tid[]; count int; plan_ok bool; plan_line text; BEGIN FOR r IN SELECT colname, oper, typ, value[ordinality], matches[ordinality] FROM brinopers, unnest(op) WITH ORDINALITY AS oper LOOP -- prepare the condition IF r.value IS NULL THEN - cond := format('%I %s %L', r.colname, r.oper, r.value); + cond := format('%s %s %L', r.colname, r.oper, r.value); ELSE - cond := format('%I %s %L::%s', r.colname, r.oper, r.value, r.typ); + cond := format('%s %s %L::%s', r.colname, r.oper, r.value, r.typ); END IF; -- run the query using the brin index SET enable_seqscan = 0; SET enable_bitmapscan = 1; plan_ok := false; FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest WHERE %s $y$, cond) LOOP IF plan_line LIKE '%Bitmap Heap Scan on brintest%' THEN plan_ok := true;