diff --git a/doc/src/sgml/ref/postgres-ref.sgml b/doc/src/sgml/ref/postgres-ref.sgml index fc11aa1..380b8b8 100644 --- a/doc/src/sgml/ref/postgres-ref.sgml +++ b/doc/src/sgml/ref/postgres-ref.sgml @@ -405,14 +405,14 @@ PostgreSQL documentation - { s | i | o | b | t | n | m | h } + { s | i | o | b | c | t | n | m | h } Forbids the use of particular scan and join methods: s and i disable sequential and index scans respectively, - o, b and t - disable index-only scans, bitmap index scans, and TID scans + o, b, c and t + disable index-only scans, bitmap index scans, bitmap count scans and TID scans respectively, while n, m, and h disable nested-loop, merge and hash joins respectively. diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index ea19ba6..0056786 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -101,7 +101,7 @@ static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); -static void show_tidbitmap_info(BitmapHeapScanState *planstate, +static void show_tidbitmap_info(long exact_pages, long lossy_pages, ExplainState *es); static void show_instrumentation_count(const char *qlabel, int which, PlanState *planstate, ExplainState *es); @@ -790,6 +790,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used) case T_IndexScan: case T_IndexOnlyScan: case T_BitmapHeapScan: + case T_BitmapCount: case T_TidScan: case T_SubqueryScan: case T_FunctionScan: @@ -933,6 +934,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_BitmapHeapScan: pname = sname = "Bitmap Heap Scan"; break; + case T_BitmapCount: + pname = sname = "Bitmap Count"; + break; case T_TidScan: pname = sname = "Tid Scan"; break; @@ -1123,6 +1127,7 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_SeqScan: case T_SampleScan: case T_BitmapHeapScan: + case T_BitmapCount: case T_TidScan: case T_SubqueryScan: case T_FunctionScan: @@ -1380,7 +1385,28 @@ ExplainNode(PlanState *planstate, List *ancestors, show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); if (es->analyze) - show_tidbitmap_info((BitmapHeapScanState *) planstate, es); + show_tidbitmap_info(((BitmapHeapScanState *) planstate)->exact_pages, + ((BitmapHeapScanState *) planstate)->lossy_pages, + es); + break; + case T_BitmapCount: + show_scan_qual(((BitmapCount *) plan)->bitmapqualorig, + "Recheck Cond", planstate, ancestors, es); + if (((BitmapCount *) plan)->bitmapqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + if (es->analyze) + { + ExplainPropertyLong("Heap Fetches", + ((BitmapCountState *) planstate)->bcs_HeapFetches, es); + show_tidbitmap_info(((BitmapCountState *) planstate)->exact_pages, + ((BitmapCountState *) planstate)->lossy_pages, + es); + } break; case T_SampleScan: show_tablesample(((SampleScan *) plan)->tablesample, @@ -2327,23 +2353,23 @@ show_hash_info(HashState *hashstate, ExplainState *es) * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node */ static void -show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es) +show_tidbitmap_info(long exact_pages, long lossy_pages, ExplainState *es) { if (es->format != EXPLAIN_FORMAT_TEXT) { - ExplainPropertyLong("Exact Heap Blocks", planstate->exact_pages, es); - ExplainPropertyLong("Lossy Heap Blocks", planstate->lossy_pages, es); + ExplainPropertyLong("Exact Heap Blocks", exact_pages, es); + ExplainPropertyLong("Lossy Heap Blocks", lossy_pages, es); } else { - if (planstate->exact_pages > 0 || planstate->lossy_pages > 0) + if (exact_pages > 0 || lossy_pages > 0) { appendStringInfoSpaces(es->str, es->indent * 2); appendStringInfoString(es->str, "Heap Blocks:"); - if (planstate->exact_pages > 0) - appendStringInfo(es->str, " exact=%ld", planstate->exact_pages); - if (planstate->lossy_pages > 0) - appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages); + if (exact_pages > 0) + appendStringInfo(es->str, " exact=%ld", exact_pages); + if (lossy_pages > 0) + appendStringInfo(es->str, " lossy=%ld", lossy_pages); appendStringInfoChar(es->str, '\n'); } } @@ -2622,6 +2648,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) case T_IndexScan: case T_IndexOnlyScan: case T_BitmapHeapScan: + case T_BitmapCount: case T_TidScan: case T_ForeignScan: case T_CustomScan: diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index d1c1324..7336ff2 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -18,7 +18,7 @@ OBJS = execAmi.o execCurrent.o execExpr.o execExprInterp.o \ execReplication.o execScan.o execSRF.o execTuples.o \ execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \ nodeBitmapAnd.o nodeBitmapOr.o \ - nodeBitmapHeapscan.o nodeBitmapIndexscan.o \ + nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeBitmapCount.o \ nodeCustom.o nodeFunctionscan.o nodeGather.o \ nodeHash.o nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ nodeLimit.o nodeLockRows.o nodeGatherMerge.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 5d59f95..e29d555 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -19,6 +19,7 @@ #include "executor/nodeAppend.h" #include "executor/nodeBitmapAnd.h" #include "executor/nodeBitmapHeapscan.h" +#include "executor/nodeBitmapCount.h" #include "executor/nodeBitmapIndexscan.h" #include "executor/nodeBitmapOr.h" #include "executor/nodeCtescan.h" @@ -187,6 +188,10 @@ ExecReScan(PlanState *node) ExecReScanBitmapHeapScan((BitmapHeapScanState *) node); break; + case T_BitmapCountState: + ExecReScanBitmapCount((BitmapCountState *) node); + break; + case T_TidScanState: ExecReScanTidScan((TidScanState *) node); break; diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 5a84742..b06d0c9 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -707,6 +707,13 @@ ExecInitExprRec(Expr *node, PlanState *parent, ExprState *state, aggstate->aggs = lcons(astate, aggstate->aggs); aggstate->numaggs++; } + else if (parent && IsA(parent, BitmapCountState)) + { + /* + * BitmapCountState is a valid node for count(*) + * expression. It needs no additional initialization. + */ + } else { /* planner messed up */ diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 80c77ad..a01a7d4 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -82,6 +82,7 @@ #include "executor/nodeAppend.h" #include "executor/nodeBitmapAnd.h" #include "executor/nodeBitmapHeapscan.h" +#include "executor/nodeBitmapCount.h" #include "executor/nodeBitmapIndexscan.h" #include "executor/nodeBitmapOr.h" #include "executor/nodeCtescan.h" @@ -226,6 +227,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_BitmapCount: + result = (PlanState *) ExecInitBitmapCount((BitmapCount *) node, + estate, eflags); + break; + case T_TidScan: result = (PlanState *) ExecInitTidScan((TidScan *) node, estate, eflags); @@ -459,6 +465,10 @@ ExecProcNode(PlanState *node) result = ExecBitmapHeapScan((BitmapHeapScanState *) node); break; + case T_BitmapCountState: + result = ExecBitmapCount((BitmapCountState *) node); + break; + case T_TidScanState: result = ExecTidScan((TidScanState *) node); break; @@ -727,6 +737,10 @@ ExecEndNode(PlanState *node) ExecEndBitmapHeapScan((BitmapHeapScanState *) node); break; + case T_BitmapCountState: + ExecEndBitmapCount((BitmapCountState *) node); + break; + case T_TidScanState: ExecEndTidScan((TidScanState *) node); break; diff --git a/src/backend/executor/nodeBitmapCount.c b/src/backend/executor/nodeBitmapCount.c new file mode 100644 index 0000000..542615a --- /dev/null +++ b/src/backend/executor/nodeBitmapCount.c @@ -0,0 +1,403 @@ +/*------------------------------------------------------------------------- + * + * nodeBitmapHeapscan.c + * Routines to support bitmapped scans of relations + * + * NOTE: it is critical that this plan type only be used with MVCC-compliant + * snapshots (ie, regular snapshots, not SnapshotAny or one of the other + * special snapshots). The reason is that since index and heap scans are + * decoupled, there can be no assurance that the index tuple prompting a + * visit to a particular heap TID still exists when the visit is made. + * Therefore the tuple might not exist anymore either (which is OK because + * heap_fetch will cope) --- but worse, the tuple slot could have been + * re-used for a newer tuple. With an MVCC snapshot the newer tuple is + * certain to fail the time qual and so it will not be mistakenly returned, + * but with anything else we might return a tuple that doesn't meet the + * required index qual conditions. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeBitmapHeapscan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * ExecBitmapCount scans a relation using bitmap info + * ExecBitmapCountNext workhorse for above + * ExecInitBitmapCountScan creates and initializes state info. + * ExecReScanBitmapCount prepares to rescan the plan. + * ExecEndBitmapCount releases all storage. + */ +#include "postgres.h" + +#include + +#include "access/relscan.h" +#include "access/transam.h" +#include "access/visibilitymap.h" +#include "executor/execdebug.h" +#include "executor/nodeBitmapCount.h" +#include "executor/nodeBitmapHeapscan.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/predicate.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/spccache.h" +#include "utils/snapmgr.h" +#include "utils/tqual.h" + + +static TupleTableSlot *BitmapCountNext(BitmapCountState *node); +static int count_heap_page(BitmapCountState *node, TBMIterateResult *tbmres); + + +/* ---------------------------------------------------------------- + * BitmapCountNext + * + * Perform the bitmap count scan. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +BitmapCountNext(BitmapCountState *node) +{ + HeapScanDesc scan = node->ss.ss_currentScanDesc; + TIDBitmap *tbm; + TBMIterator *tbmiterator; + TBMIterateResult *tbmres; + int count = 0; + + if (node->bcs_finished) + { + return ExecClearTuple(node->ss.ss_ScanTupleSlot); + } + + /* Perform the underlying index scan */ + tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); + if (!tbm || !IsA(tbm, TIDBitmap)) + elog(ERROR, "unrecognized result from subplan"); + + + /* Count the tuples */ + tbmiterator = tbm_begin_iterate(tbm); + for (tbmres = tbm_iterate(tbmiterator); tbmres != NULL; + tbmres = tbm_iterate(tbmiterator)) + { + /* + * Ignore any claimed entries past what we think is the end of the + * relation. (This is probably not necessary given that we got at + * least AccessShareLock on the table before performing any of the + * indexscans, but let's be safe.) + */ + if (tbmres->blockno >= scan->rs_nblocks) + { + continue; + } + + /* ntuples < 0 implies recheck */ + Assert( tbmres->ntuples >= 0 || tbmres->recheck ); + + if (tbmres->recheck) + node->lossy_pages++; + else + node->exact_pages++; + + if (tbmres->recheck + || !VM_ALL_VISIBLE(node->ss.ss_currentRelation, + tbmres->blockno, + &node->bcs_VMBuffer)) + { + node->bcs_HeapFetches++; + count += count_heap_page(node, tbmres); + } + else + { + /* All the tuples on this page are visible, count them all */ + count += tbmres->ntuples; + } + } + + /* Free the resources */ + tbm_end_iterate(tbmiterator); + tbm_free(tbm); + + /* The scan is finished. */ + node->bcs_finished = true; + node->ss.ps.ps_ExprContext->ecxt_aggvalues[0] = count; + node->ss.ps.ps_ExprContext->ecxt_aggnulls[0] = 0; + return ExecProject(node->ss.ps.ps_ProjInfo); +} + +/* + * count_heap_page + * Fetch the heap page, recheck and count the tuples. + */ +static int +count_heap_page(BitmapCountState *node, TBMIterateResult *tbmres) +{ + int count = 0; + Page dp; + ItemId lp; + HeapScanDesc scan = node->ss.ss_currentScanDesc; + OffsetNumber targoffset; + TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; + ExprContext *econtext = node->ss.ps.ps_ExprContext; + + bitgetpage(scan, tbmres); + for (scan->rs_cindex = 0; scan->rs_cindex < scan->rs_ntuples; scan->rs_cindex++) + { + if (scan->rs_ntuples == 0) + { + continue; + } + targoffset = scan->rs_vistuples[scan->rs_cindex]; + dp = (Page) BufferGetPage(scan->rs_cbuf); + lp = PageGetItemId(dp, targoffset); + Assert(ItemIdIsNormal(lp)); + + scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + scan->rs_ctup.t_len = ItemIdGetLength(lp); + scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, + scan->rs_vistuples[scan->rs_cindex]); + + pgstat_count_heap_fetch(scan->rs_rd); + + /* + * Set up the result slot to point to this tuple. Note that the slot + * acquires a pin on the buffer. + */ + ExecStoreTuple(&scan->rs_ctup, + slot, + scan->rs_cbuf, + false); + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + + if (!ExecQual(node->bitmapqualorig, econtext)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + slot->tts_isempty = false; + ExecClearTuple(slot); + continue; + } + ExecClearTuple(slot); + /* OK to count this tuple */ + count++; + } + return count; +} + +/* + * BitmapCountRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +BitmapCountRecheck(BitmapHeapScanState *node, TupleTableSlot *slot) +{ + ExprContext *econtext; + + /* + * extract necessary information from index scan node + */ + econtext = node->ss.ps.ps_ExprContext; + + /* Does the tuple meet the original qual conditions? */ + econtext->ecxt_scantuple = slot; + + ResetExprContext(econtext); + + return ExecQual(node->bitmapqualorig, econtext); +} + +/* ---------------------------------------------------------------- + * ExecBitmapCount(node) + * ---------------------------------------------------------------- + */ +TupleTableSlot * +ExecBitmapCount(BitmapCountState *node) +{ + return ExecScan(&node->ss, + (ExecScanAccessMtd) BitmapCountNext, + (ExecScanRecheckMtd) BitmapCountRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanBitmapCount(node) + * ---------------------------------------------------------------- + */ +void +ExecReScanBitmapCount(BitmapCountState *node) +{ + PlanState *outerPlan = outerPlanState(node); + + /* rescan to release any page pin */ + heap_rescan(node->ss.ss_currentScanDesc, NULL); + + node->bcs_finished = false; + + ExecScanReScan(&node->ss); + + /* + * if chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (outerPlan->chgParam == NULL) + ExecReScan(outerPlan); +} + +/* ---------------------------------------------------------------- + * ExecEndBitmapCount + * ---------------------------------------------------------------- + */ +void +ExecEndBitmapCount(BitmapCountState *node) +{ + Relation relation; + HeapScanDesc scanDesc; + + /* + * extract information from the node + */ + relation = node->ss.ss_currentRelation; + scanDesc = node->ss.ss_currentScanDesc; + + /* Release VM buffer pin, if any. */ + if (node->bcs_VMBuffer != InvalidBuffer) + { + ReleaseBuffer(node->bcs_VMBuffer); + node->bcs_VMBuffer = InvalidBuffer; + } + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clear out tuple table slots + */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close down subplans + */ + ExecEndNode(outerPlanState(node)); + + /* + * close heap scan + */ + heap_endscan(scanDesc); + + /* + * close the heap relation. + */ + ExecCloseScanRelation(relation); +} + +/* ---------------------------------------------------------------- + * ExecInitBitmapCount + * + * Initializes the scan's state information. + * ---------------------------------------------------------------- + */ +BitmapCountState * +ExecInitBitmapCount(BitmapCount *node, EState *estate, int eflags) +{ + BitmapCountState *scanstate; + Relation currentRelation; + ExprContext *econtext; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + /* + * Assert caller didn't ask for an unsafe snapshot --- see comments at + * head of file. + */ + Assert(IsMVCCSnapshot(estate->es_snapshot)); + + /* + * create state structure + */ + scanstate = makeNode(BitmapCountState); + scanstate->ss.ps.plan = (Plan *) node; + scanstate->ss.ps.state = estate; + + scanstate->exact_pages = 0; + scanstate->lossy_pages = 0; + scanstate->bcs_VMBuffer = InvalidBuffer; + scanstate->bcs_HeapFetches = 0; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &scanstate->ss.ps); + + /* + * initialize child expressions + */ + scanstate->ss.ps.qual = + ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate); + scanstate->bitmapqualorig = + ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); + + econtext = scanstate->ss.ps.ps_ExprContext; + econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum)); + econtext->ecxt_aggnulls = (bool *) palloc(sizeof(bool)); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &scanstate->ss.ps); + ExecInitScanTupleSlot(estate, &scanstate->ss); + + /* + * open the base relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); + + scanstate->ss.ss_currentRelation = currentRelation; + + /* + * Even though we aren't going to do a conventional seqscan, it is useful + * to create a HeapScanDesc --- most of the fields in it are usable. + */ + scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, + estate->es_snapshot, + 0, + NULL); + + /* + * get the scan type from the relation descriptor. + */ + ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation)); + + /* + * Initialize result tuple type and projection info. + */ + ExecAssignResultTypeFromTL(&scanstate->ss.ps); + ExecAssignScanProjectionInfo(&scanstate->ss); + + /* + * initialize child nodes + * + * We do this last because the child nodes will open indexscans on our + * relation's indexes, and we want to be sure we have acquired a lock on + * the relation first. + */ + outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); + + /* + * all done. + */ + return scanstate; +} diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 19eb175..e8d2f28 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -52,7 +52,6 @@ static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); -static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres); static inline void BitmapDoneInitializingSharedState( ParallelBitmapHeapState *pstate); static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, @@ -345,7 +344,7 @@ BitmapHeapNext(BitmapHeapScanState *node) * builds an array indicating which tuples on the page are both potentially * interesting according to the bitmap, and visible according to the snapshot. */ -static void +void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) { BlockNumber page = tbmres->blockno; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 1c88d60..42d38eb 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -555,6 +555,27 @@ _copyBitmapHeapScan(const BitmapHeapScan *from) } /* + * _copyBitmapHeapScan + */ +static BitmapCount * +_copyBitmapCount(const BitmapCount *from) +{ + BitmapCount *newnode = makeNode(BitmapCount); + + /* + * copy node superclass fields + */ + CopyScanFields((const Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_NODE_FIELD(bitmapqualorig); + + return newnode; +} + +/* * _copyTidScan */ static TidScan * @@ -4688,6 +4709,9 @@ copyObjectImpl(const void *from) case T_BitmapHeapScan: retval = _copyBitmapHeapScan(from); break; + case T_BitmapCount: + retval = _copyBitmapCount(from); + break; case T_TidScan: retval = _copyTidScan(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index bbb63a4..1791e08 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -570,6 +570,16 @@ _outBitmapHeapScan(StringInfo str, const BitmapHeapScan *node) } static void +_outBitmapCount(StringInfo str, const BitmapCount *node) +{ + WRITE_NODE_TYPE("BITMAPCOUNT"); + + _outScanInfo(str, (const Scan *) node); + + WRITE_NODE_FIELD(bitmapqualorig); +} + +static void _outTidScan(StringInfo str, const TidScan *node) { WRITE_NODE_TYPE("TIDSCAN"); @@ -1739,6 +1749,16 @@ _outBitmapHeapPath(StringInfo str, const BitmapHeapPath *node) } static void +_outBitmapCountPath(StringInfo str, const BitmapCountPath *node) +{ + WRITE_NODE_TYPE("BITMAPCOUNTPATH"); + + _outPathInfo(str, (const Path *) node); + + WRITE_NODE_FIELD(bitmapqual); +} + +static void _outBitmapAndPath(StringInfo str, const BitmapAndPath *node) { WRITE_NODE_TYPE("BITMAPANDPATH"); @@ -3602,6 +3622,9 @@ outNode(StringInfo str, const void *obj) case T_BitmapHeapScan: _outBitmapHeapScan(str, obj); break; + case T_BitmapCount: + _outBitmapCount(str, obj); + break; case T_TidScan: _outTidScan(str, obj); break; @@ -3842,6 +3865,9 @@ outNode(StringInfo str, const void *obj) case T_BitmapAndPath: _outBitmapAndPath(str, obj); break; + case T_BitmapCountPath: + _outBitmapCountPath(str, obj); + break; case T_BitmapOrPath: _outBitmapOrPath(str, obj); break; diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index eab8f68..0d8e2db 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -39,6 +39,7 @@ #include "postgres.h" #include +#include #include "access/htup_details.h" #include "nodes/bitmapset.h" @@ -236,6 +237,7 @@ static void tbm_lossify(TIDBitmap *tbm); static int tbm_comparator(const void *left, const void *right); static int tbm_shared_comparator(const void *left, const void *right, void *arg); +static int tbm_estimate_maxentries(long maxbytes); /* * Simple inline murmur hash implementation for the exact width required, for @@ -281,7 +283,6 @@ TIDBitmap * tbm_create(long maxbytes, dsa_area *dsa) { TIDBitmap *tbm; - long nbuckets; /* Create the TIDBitmap struct and zero all its fields */ tbm = makeNode(TIDBitmap); @@ -289,17 +290,7 @@ tbm_create(long maxbytes, dsa_area *dsa) tbm->mcxt = CurrentMemoryContext; tbm->status = TBM_EMPTY; - /* - * Estimate number of hashtable entries we can have within maxbytes. This - * estimates the hash cost as sizeof(PagetableEntry), which is good enough - * for our purpose. Also count an extra Pointer per entry for the arrays - * created during iteration readout. - */ - nbuckets = maxbytes / - (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer)); - nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */ - nbuckets = Max(nbuckets, 16); /* sanity limit */ - tbm->maxentries = (int) nbuckets; + tbm->maxentries = tbm_estimate_maxentries(maxbytes); tbm->lossify_start = 0; tbm->dsa = dsa; tbm->dsapagetable = InvalidDsaPointer; @@ -699,6 +690,36 @@ tbm_is_empty(const TIDBitmap *tbm) } /* + * tbm_estimate_maxentries + * Estimate number of hashtable entries we can have within maxbytes. This + * estimates the hash cost as sizeof(PagetableEntry), which is good enough + * for our purpose. Also count an extra Pointer per entry for the arrays + * created during iteration readout. + */ +static int +tbm_estimate_maxentries(long maxbytes) +{ + long nbuckets = maxbytes / + (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer)); + + nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */ + nbuckets = Max(nbuckets, 16); /* sanity limit */ + return (int) nbuckets; +} + +/* + * tbm_estimate_lossy_pages + * Estimate the number of lossy pages we would have when putting `total_pages` + * into the `maxbytes` bitmap. + */ +double +tbm_estimate_lossy_pages(long maxbytes, double total_pages) +{ + int maxentries = tbm_estimate_maxentries(maxbytes); + return total_pages < maxentries ? 0. : ( total_pages - maxentries / 2 ); +} + +/* * tbm_begin_iterate - prepare to iterate through a TIDBitmap * * The TBMIterator struct is created in the caller's memory context. diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index a1e1a87..f9aa98d 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -3192,6 +3192,9 @@ print_path(PlannerInfo *root, Path *path, int indent) case T_BitmapOrPath: ptype = "BitmapOrPath"; break; + case T_BitmapCountPath: + ptype = "BitmapCount"; + break; case T_TidPath: ptype = "TidScan"; break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 92de2b7..968d241 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -119,6 +119,7 @@ bool enable_seqscan = true; bool enable_indexscan = true; bool enable_indexonlyscan = true; bool enable_bitmapscan = true; +bool enable_bitmapcount = true; bool enable_tidscan = true; bool enable_sort = true; bool enable_hashagg = true; @@ -1022,6 +1023,145 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, } /* + * estimate_exact_page_fraction + * Given the bitmap index scan tree, estimate the fraction of pages + * that will not require recheck due to lossy tidbitmap + */ +static double +estimate_exact_pages_fraction(Path *bitmapqual) +{ + RelOptInfo *parent = bitmapqual->parent; + List *children; + ListCell *l; + double result; + if (IsA(bitmapqual, IndexPath)) + { + /* + * For a single scan, the number of heap pages that need to be fetched + * is the same as the Mackert and Lohman formula for the case T <= b + * (ie, no re-reads needed). + */ + double index_selectivity = ((IndexPath*) bitmapqual)->indexselectivity; + double tuples_fetched = clamp_row_est(index_selectivity * parent->tuples); + double T = (parent->pages > 1) ? (double) parent->pages : 1.0; + double pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + return 1. - tbm_estimate_lossy_pages(work_mem * 1024, pages_fetched) / pages_fetched; + } + else if (IsA(bitmapqual, BitmapAndPath)) + { + children = ((BitmapAndPath*) bitmapqual)->bitmapquals; + } + else if (IsA(bitmapqual, BitmapOrPath)) + { + children = ((BitmapOrPath*) bitmapqual)->bitmapquals; + } + else + { + Assert(false); + return 0.; + } + + /* + * If a page is lossy in one of the bitmaps, that page in their union or intersection + * will be lossy too. + */ + result = 1.; + foreach(l, children) + { + Path *child = lfirst(l); + result *= estimate_exact_pages_fraction(child); + } + return result; +} + +/* + * cost_bitmap_count + * Determines and returns the cost of counting tuples using a bitmap + * index-then-count plan. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths + */ +void +cost_bitmap_count(Path *path, PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info, + Path *bitmapqual) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + Cost indexTotalCost; + QualCost qpqual_cost; + Cost cpu_run_cost; + double tuples_fetched; + double pages_fetched; + double lossy_pages = 0; + double lossy_tuples = 0; + double spc_seq_page_cost, + spc_random_page_cost; + + /* Should only be applied to base relations */ + Assert(IsA(baserel, RelOptInfo)); + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_RELATION); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + if (!enable_bitmapcount) + startup_cost += disable_cost; + + pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual, + 1. /* loop_count */ , &indexTotalCost, + &tuples_fetched); + + startup_cost += indexTotalCost; + lossy_pages = pages_fetched * ( 1. - estimate_exact_pages_fraction(bitmapqual) ); + + /* We also have to fetch and recheck pages that are not all-visible */ + lossy_pages += ( pages_fetched - lossy_pages ) * ( 1. - baserel->allvisfrac ); + + lossy_tuples = lossy_pages * tuples_fetched / pages_fetched; + + /* Fetch estimated page costs for tablespace containing table. */ + get_tablespace_page_costs(baserel->reltablespace, + &spc_random_page_cost, + &spc_seq_page_cost); + + /* + * Bitmap count executor is not meant to be used to fetch a large quantity + * of pages from heap, so add extra penalty for page fetches. + */ + run_cost += lossy_pages * 2. * spc_random_page_cost; + + /* + * Estimate CPU costs per tuple. We have to pay for rechecking indexquals + * for tuples from lossy bitmap pages, and pay for counting all the + * tuples. + */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_run_cost = cpu_tuple_cost * tuples_fetched; + cpu_run_cost += qpqual_cost.per_tuple * lossy_tuples; + + /* Parallelism is not supported */ + Assert(path->parallel_workers == 0); + + run_cost += cpu_run_cost; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* * cost_bitmap_tree_node * Extract cost and selectivity from a bitmap tree node (index/and/or) */ diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index a5d19f9..df381b9 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -117,7 +117,7 @@ static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel, static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *other_clauses); static List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, - List *clauses, List *other_clauses); + List *clauses, List *other_clauses, bool for_bitmap_count); static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths); static int path_usage_comparator(const void *a, const void *b); @@ -192,6 +192,427 @@ static Const *string_to_const(const char *str, Oid datatype); /* + * get_bitmap_quals() + * Creates indexqual and collects equivalence classes for a bitmap index scan tree + */ +static void +get_bitmap_quals(PlannerInfo *root, Path *bitmapqual, + List **indexqual, + List **indexECs) +{ + if (IsA(bitmapqual, BitmapAndPath)) + { + BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; + List *subindexquals = NIL; + List *subindexECs = NIL; + ListCell *l; + + /* + * There may well be redundant quals among the subplans, since a + * top-level WHERE qual might have gotten used to form several + * different index quals. We don't try exceedingly hard to eliminate + * redundancies, but we do eliminate obvious duplicates by using + * list_concat_unique. + */ + foreach(l, apath->bitmapquals) + { + List *subindexqual; + List *subindexEC; + + get_bitmap_quals(root, (Path *) lfirst(l), + &subindexqual, + &subindexEC); + subindexquals = list_concat_unique(subindexquals, subindexqual); + /* Duplicates in indexECs aren't worth getting rid of */ + subindexECs = list_concat(subindexECs, subindexEC); + } + *indexqual = subindexquals; + *indexECs = subindexECs; + } + else if (IsA(bitmapqual, BitmapOrPath)) + { + BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; + List *subindexquals = NIL; + bool const_true_subindexqual = false; + ListCell *l; + + /* + * Here, we only detect qual-free subplans. A qual-free subplan would + * cause us to generate "... OR true ..." which we may as well reduce + * to just "true". We do not try to eliminate redundant subclauses + * because (a) it's not as likely as in the AND case, and (b) we might + * well be working with hundreds or even thousands of OR conditions, + * perhaps from a long IN list. The performance of list_append_unique + * would be unacceptable. + */ + foreach(l, opath->bitmapquals) + { + List *subindexqual; + List *subindexEC; + + get_bitmap_quals(root, (Path *) lfirst(l), + &subindexqual, + &subindexEC); + if (subindexqual == NIL) + const_true_subindexqual = true; + else if (!const_true_subindexqual) + subindexquals = lappend(subindexquals, + make_ands_explicit(subindexqual)); + } + + /* + * If there were constant-TRUE subquals, the OR reduces to constant + * TRUE. Also, avoid generating one-element ORs, which could happen + * due to redundancy elimination or ScalarArrayOpExpr quals. + */ + if (const_true_subindexqual) + *indexqual = NIL; + else if (list_length(subindexquals) <= 1) + *indexqual = subindexquals; + else + *indexqual = list_make1(make_orclause(subindexquals)); + *indexECs = NIL; + } + else if (IsA(bitmapqual, IndexPath)) + { + IndexPath *ipath = (IndexPath *) bitmapqual; + List *subindexECs; + ListCell *l; + + *indexqual = get_actual_clauses(ipath->indexquals); + foreach(l, ipath->indexinfo->indpred) + { + Expr *pred = (Expr *) lfirst(l); + + /* + * We know that the index predicate must have been implied by the + * query condition as a whole, but it may or may not be implied by + * the conditions that got pushed into the bitmapqual. Avoid + * generating redundant conditions. + */ + if (!predicate_implied_by(list_make1(pred), ipath->indexclauses)) + { + *indexqual = lappend(*indexqual, pred); + } + } + subindexECs = NIL; + foreach(l, ipath->indexquals) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + + if (rinfo->parent_ec) + subindexECs = lappend(subindexECs, rinfo->parent_ec); + } + *indexECs = subindexECs; + } + else + { + elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); + } +} + +/* + * path_clauses_comparator() + * qsort comparator that puts first the paths whose quals use more clauses + */ +static int +path_clauses_comparator(const void *a, const void *b) +{ + PathClauseUsage *pa = *(PathClauseUsage *const *) a; + PathClauseUsage *pb = *(PathClauseUsage *const *) b; + int na = bms_num_members(pa->clauseids); + int nb = bms_num_members(pb->clauseids); + + if (na > nb) + return -1; + if (nb > na) + return 1; + return 0; +} + +/* + * choose_bitmap_count_and + * Given a nonempty list of bitmap paths, AND them into one path + * suitable for bitmap count scan. The result is either a single one of the inputs, or a BitmapAndPath + * combining multiple inputs. + * + * For bitmap count scan, we can only use an index path that checks all + * the restriction clauses and does not require subsequent filtering of the results. + * With this in mind, we try to build an AND path that includes maximum possible restriction + * clauses. + */ +static Path * +choose_bitmap_count_and(PlannerInfo *root, RelOptInfo *rel, List *paths) +{ + int npaths = list_length(paths); + PathClauseUsage **pathinfoarray; + PathClauseUsage *pathinfo; + List *clauselist; + List *bestpaths = NIL; + int i; + ListCell *l; + Bitmapset *clauseids; + List *quals; + + Assert(npaths > 0); /* else caller error */ + if (npaths == 1) + return (Path *) linitial(paths); /* easy case */ + + /* + * Extract clause usage info and detect any paths that use exactly the + * same set of clauses; keep only the cheapest-to-scan of any such groups. + * The surviving paths are put into an array for qsort'ing. + */ + pathinfoarray = (PathClauseUsage **) + palloc(npaths * sizeof(PathClauseUsage *)); + clauselist = NIL; + npaths = 0; + foreach(l, paths) + { + Path *ipath = (Path *) lfirst(l); + + pathinfo = classify_index_clause_usage(ipath, &clauselist); + for (i = 0; i < npaths; i++) + { + if (bms_equal(pathinfo->clauseids, pathinfoarray[i]->clauseids)) + break; + } + if (i < npaths) + { + /* duplicate clauseids, keep the cheaper one */ + Cost ncost; + Cost ocost; + Selectivity nselec; + Selectivity oselec; + + cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec); + cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec); + if (ncost < ocost) + pathinfoarray[i] = pathinfo; + } + else + { + /* not duplicate clauseids, add to array */ + pathinfoarray[npaths++] = pathinfo; + } + } + + /* If only one surviving path, we're done */ + if (npaths == 1) + return pathinfoarray[0]->path; + + /* Sort the surviving paths by the number of clauses */ + qsort(pathinfoarray, npaths, sizeof(PathClauseUsage *), + path_clauses_comparator); + + /* + * Start with the path that includes the most restriction clauses, and + * append all other paths that do not overlap. + */ + clauseids = NULL; + bestpaths = NIL; + quals = NIL; + + for (i = 0; i < npaths; i++) + { + pathinfo = pathinfoarray[i]; + if (bms_overlap(pathinfo->clauseids, clauseids)) + continue; + if (pathinfo->preds) + { + bool redundant = false; + + /* we check each predicate clause separately */ + foreach(l, pathinfo->preds) + { + Node *np = (Node *) lfirst(l); + + if (predicate_implied_by(list_make1(np), quals)) + { + redundant = true; + break; /* out of inner foreach loop */ + } + } + if (redundant) + continue; + } + clauseids = bms_union(clauseids, pathinfo->clauseids); + bestpaths = lappend(bestpaths, pathinfo->path); + quals = list_concat(quals, list_copy(pathinfo->quals)); + quals = list_concat(quals, list_copy(pathinfo->preds)); + } + + if (list_length(bestpaths) == 1) + return (Path *) linitial(bestpaths); /* no need for AND */ + return (Path *) create_bitmap_and_path(root, rel, bestpaths); +} + +/* + * consider_bitmap_count_path() + * Try to create a path that calculates the given COUNT aggregate + * by performing a bitmap index scan (or a combination of such), + * and then counting bits in the resulting bitmap. + */ +BitmapCountPath * +consider_bitmap_count_path(PlannerInfo *root, RelOptInfo *rel) +{ + List *indexpaths = NIL; + List *bitindexpaths = NIL; + IndexClauseSet rclauseset; + ListCell *lc; + Path *bitmapqual; + ListCell *l; + List *indexquals; + List *indexECs; + + /* Skip the whole mess if no indexes */ + if (rel->indexlist == NIL) + return NULL; + + /* Examine each index in turn */ + foreach(lc, rel->indexlist) + { + List *index_clauses; + List *clause_columns; + Relids outer_relids; + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + IndexPath *ipath; + + /* Protect limited-size array in IndexClauseSets */ + Assert(index->ncolumns <= INDEX_MAX_KEYS); + + /* + * Ignore partial indexes that do not match the query. + * (generate_bitmap_or_paths() might be able to do something with + * them, but that's of no concern here.) + */ + if (index->indpred != NIL && !index->predOK) + continue; + + /* We need bitmap index scans here */ + if (!index->amhasgetbitmap) + { + continue; + } + + /* + * Identify the restriction clauses that can match the index. + */ + MemSet(&rclauseset, 0, sizeof(rclauseset)); + match_restriction_clauses_to_index(rel, index, &rclauseset); + + index_clauses = NIL; + clause_columns = NIL; + outer_relids = bms_copy(rel->lateral_relids); + for (int indexcol = 0; indexcol < index->ncolumns; indexcol++) + { + ListCell *lc; + + foreach(lc, rclauseset.indexclauses[indexcol]) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + Assert(IsA(rinfo, RestrictInfo)); + index_clauses = lappend(index_clauses, rinfo); + clause_columns = lappend_int(clause_columns, indexcol); + outer_relids = bms_add_members(outer_relids, rinfo->clause_relids); + } + + /* + * If no clauses match the first index column, check for + * amoptionalkey restriction. We can't generate a scan over an + * index with amoptionalkey = false unless there's at least one + * index clause. (When working on columns after the first, this + * test cannot fail. It is always okay for columns after the + * first to not have any clauses.) + */ + if (index_clauses == NIL && !index->amoptionalkey) + { + break; + } + } + if (index_clauses == NIL) + { + continue; + } + /* We do not want the index's rel itself listed in outer_relids */ + outer_relids = bms_del_member(outer_relids, rel->relid); + /* Enforce convention that outer_relids is exactly NULL if empty */ + if (bms_is_empty(outer_relids)) + outer_relids = NULL; + + ipath = create_index_path(root, index, + index_clauses, + clause_columns, + NIL /* orderbyclauses */ , + NIL /* orderbyclausecols */ , + NIL /* useful_pathkeys */ , + NoMovementScanDirection, + false /* index-only scan */ , + outer_relids, + 0 /* loop count */ , + false /* partial path */ ); + + bitindexpaths = lappend(bitindexpaths, ipath); + } + + /* + * Generate BitmapOrPaths for any suitable OR-clauses present in the + * restriction list. Add these to bitindexpaths. + */ + indexpaths = generate_bitmap_or_paths(root, rel, + rel->baserestrictinfo, NIL, + true /* for_bitmap_count */ ); + bitindexpaths = list_concat(bitindexpaths, indexpaths); + + /* + * If we found anything usable, generate a BitmapCountPath for the most + * promising combination of restriction bitmap index paths. Note there + * will be only one such path no matter how many indexes exist. This + * should be sufficient since there's basically only one figure of merit + * (total cost) for such a path. + */ + if (bitindexpaths == NIL) + { + return NULL; + } + + bitmapqual = choose_bitmap_count_and(root, rel, bitindexpaths); + + /* + * Check that all restrictions are satisfied with this index scan + */ + get_bitmap_quals(root, bitmapqual, &indexquals, &indexECs); + + foreach(l, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; + + Assert(IsA(rinfo, RestrictInfo)); + if (rinfo->pseudoconstant) + { + /* + * Bitmap count plans do not work with gating Result nodes. Give + * up. + */ + return 0; + } + if (list_member(indexquals, clause)) + continue; /* simple duplicate */ + if (rinfo->parent_ec && list_member_ptr(indexECs, rinfo->parent_ec)) + continue; /* derived from same EquivalenceClass */ + if (!contain_mutable_functions(clause) && + predicate_implied_by(list_make1(clause), indexquals)) + continue; /* provably implied by indexquals */ + return 0; + } + + return create_bitmap_count_path(root, rel, bitmapqual, + rel->lateral_relids); +} + +/* * create_index_paths() * Generate all interesting index paths for the given relation. * Candidate paths are added to the rel's pathlist (using add_path). @@ -312,7 +733,8 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) * restriction list. Add these to bitindexpaths. */ indexpaths = generate_bitmap_or_paths(root, rel, - rel->baserestrictinfo, NIL); + rel->baserestrictinfo, NIL, + false /* for_bitmap_count */ ); bitindexpaths = list_concat(bitindexpaths, indexpaths); /* @@ -320,7 +742,8 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) * the joinclause list. Add these to bitjoinpaths. */ indexpaths = generate_bitmap_or_paths(root, rel, - joinorclauses, rel->baserestrictinfo); + joinorclauses, rel->baserestrictinfo, + false /* for_bitmap_count */ ); bitjoinpaths = list_concat(bitjoinpaths, indexpaths); /* @@ -1260,10 +1683,12 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, * other_clauses is a list of additional clauses that can be assumed true * for the purpose of generating indexquals, but are not to be searched for * ORs. (See build_paths_for_OR() for motivation.) + * `for_bitmap_count` determines which AND path selection algorithm to use: + * one for bitmap heap scan or one for bitmap count scan */ static List * generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, - List *clauses, List *other_clauses) + List *clauses, List *other_clauses, bool for_bitmap_count) { List *result = NIL; List *all_clauses; @@ -1309,7 +1734,8 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, indlist = list_concat(indlist, generate_bitmap_or_paths(root, rel, andargs, - all_clauses)); + all_clauses, + for_bitmap_count)); } else { @@ -1338,7 +1764,14 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, * OK, pick the most promising AND combination, and add it to * pathlist. */ - bitmapqual = choose_bitmap_and(root, rel, indlist); + if (for_bitmap_count) + { + bitmapqual = choose_bitmap_count_and(root, rel, indlist); + } + else + { + bitmapqual = choose_bitmap_and(root, rel, indlist); + } pathlist = lappend(pathlist, bitmapqual); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index d357479..6e4617d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -123,6 +123,10 @@ static Scan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root, BitmapHeapPath *best_path, List *tlist, List *scan_clauses); +static BitmapCount *create_bitmap_count_plan(PlannerInfo *root, + BitmapCountPath *best_path, + List *tlist, + List *scan_clauses); static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, List **qual, List **indexqual, List **indexECs); static void bitmap_subplan_mark_shared(Plan *plan); @@ -183,6 +187,10 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist, Plan *lefttree, List *bitmapqualorig, Index scanrelid); +static BitmapCount *make_bitmap_count(List *qptlist, + Plan *lefttree, + List *bitmapqualorig, + Index scanrelid); static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid, List *tidquals); static SubqueryScan *make_subqueryscan(List *qptlist, @@ -359,6 +367,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) case T_IndexScan: case T_IndexOnlyScan: case T_BitmapHeapScan: + case T_BitmapCount: case T_TidScan: case T_SubqueryScan: case T_FunctionScan: @@ -626,6 +635,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) scan_clauses); break; + case T_BitmapCount: + plan = (Plan *) create_bitmap_count_plan(root, + (BitmapCountPath *) best_path, + tlist, + scan_clauses); + break; + case T_TidScan: plan = (Plan *) create_tidscan_plan(root, (TidPath *) best_path, @@ -2752,6 +2768,109 @@ create_bitmap_scan_plan(PlannerInfo *root, } /* + * create_bitmap_count_plan + * Returns a bitmap count plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static BitmapCount * +create_bitmap_count_plan(PlannerInfo *root, + BitmapCountPath *_best_path, + List *tlist, + List *scan_clauses) +{ + BitmapCountPath *best_path = _best_path; + Index baserelid = best_path->path.parent->relid; + Plan *bitmapqualplan; + List *bitmapqualorig; + List *indexquals; + List *indexECs; + List *qpqual; + ListCell *l; + BitmapCount *scan_plan; + + /* it should be a base rel... */ + Assert(baserelid > 0); + Assert(best_path->path.parent->rtekind == RTE_RELATION); + + /* Process the bitmapqual tree into a Plan tree and qual lists */ + bitmapqualplan = create_bitmap_subplan(root, best_path->bitmapqual, + &bitmapqualorig, &indexquals, + &indexECs); + + /* + * The qpqual list must contain all restrictions not automatically handled + * by the index, other than pseudoconstant clauses which will be handled + * by a separate gating plan node. All the predicates in the indexquals + * will be checked (either by the index itself, or by + * nodeBitmapHeapscan.c), but if there are any "special" operators + * involved then they must be added to qpqual. The upshot is that qpqual + * must contain scan_clauses minus whatever appears in indexquals. + * + * This loop is similar to the comparable code in create_indexscan_plan(), + * but with some differences because it has to compare the scan clauses to + * stripped (no RestrictInfos) indexquals. See comments there for more + * info. + * + * In normal cases simple equal() checks will be enough to spot duplicate + * clauses, so we try that first. We next see if the scan clause is + * redundant with any top-level indexqual by virtue of being generated + * from the same EC. After that, try predicate_implied_by(). + * + * Unlike create_indexscan_plan(), the predicate_implied_by() test here is + * useful for getting rid of qpquals that are implied by index predicates, + * because the predicate conditions are included in the "indexquals" + * returned by create_bitmap_subplan(). Bitmap scans have to do it that + * way because predicate conditions need to be rechecked if the scan + * becomes lossy, so they have to be included in bitmapqualorig. + */ + qpqual = NIL; + foreach(l, scan_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; + + Assert(IsA(rinfo, RestrictInfo)); + if (rinfo->pseudoconstant) + continue; /* we may drop pseudoconstants here */ + if (list_member(indexquals, clause)) + continue; /* simple duplicate */ + if (rinfo->parent_ec && list_member_ptr(indexECs, rinfo->parent_ec)) + continue; /* derived from same EquivalenceClass */ + if (!contain_mutable_functions(clause) && + predicate_implied_by(list_make1(clause), indexquals)) + continue; /* provably implied by indexquals */ + qpqual = lappend(qpqual, rinfo); + } + + /* + * No point using bitmap count if we have to recheck some quals for every + * tuple. Such a path must not have been generated. + */ + Assert(!qpqual); + + /* + * We have to replace any outer-relation variables with nestloop params in + * the qpqual and bitmapqualorig expressions. (This was already done for + * expressions attached to plan nodes in the bitmapqualplan tree.) + */ + if (best_path->path.param_info) + { + bitmapqualorig = (List *) + replace_nestloop_params(root, (Node *) bitmapqualorig); + } + + /* Finally ready to build the plan node */ + scan_plan = make_bitmap_count(tlist, + bitmapqualplan, + bitmapqualorig, + baserelid); + + copy_generic_path_info(&scan_plan->scan.plan, &best_path->path); + + return scan_plan; +} + +/* * Given a bitmapqual tree, generate the Plan tree that implements it * * As byproducts, we also return in *qual and *indexqual the qual lists @@ -4836,7 +4955,7 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples) /* * bitmap_subplan_mark_shared - * Set isshared flag in bitmap subplan so that it will be created in + * Set isshared flag in bitmap subplan so that it will be created in * shared memory. */ static void @@ -5002,6 +5121,24 @@ make_bitmap_heapscan(List *qptlist, return node; } +static BitmapCount * +make_bitmap_count(List *qptlist, + Plan *lefttree, + List *bitmapqualorig, + Index scanrelid) +{ + BitmapCount *node = makeNode(BitmapCount); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->lefttree = lefttree; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->bitmapqualorig = bitmapqualorig; + + return node; +} + static TidScan * make_tidscan(List *qptlist, List *qpqual, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index f99257b..3e412f3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1423,6 +1423,161 @@ inheritance_planner(PlannerInfo *root) SS_assign_special_param(root))); } +/* + * find_count_aggs_walker + * Walks the expression tree _node_ and searches for COUNT aggregates + * for which it might be possible to perform a bitmap count scan, + * and returns them in the _context_ list. + */ +static bool +find_count_aggs_walker(Node *node, List **context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *aggref = (Aggref *) node; + TargetEntry *curTarget; + + Assert(aggref->agglevelsup == 0); + + /* + * We might implement the optimization when a FILTER clause is present + * by adding the filter to the quals of the generated subquery. For + * now, just punt. + */ + if (aggref->aggfilter != NULL) + return true; + + /* This optimization is only supported for count(*). */ + if ( aggref->aggfnoid != 2803) + { + return true; + } + Assert(aggref->args == NIL); + + curTarget = aggref->args ? (TargetEntry *) linitial(aggref->args) : NULL; + + if (curTarget && contain_mutable_functions((Node *) curTarget->expr)) + return true; /* not potentially indexable */ + + if (curTarget && type_is_rowtype(exprType((Node *) curTarget->expr))) + return true; /* IS NOT NULL would have weird semantics */ + + *context = lappend(*context, aggref); + + /* + * We need not recurse into the argument, since it can't contain any + * aggregates. + */ + return false; + } + Assert(!IsA(node, SubLink)); + return expression_tree_walker(node, find_count_aggs_walker, + (void *) context); +} + +/* + * preprocess_count_aggregates + * + * Checks to see whether the query contains COUNT aggregate functions that might + * be optimizable via bitmap index scans. If it does, create a bitmap count path + * and put it to the (UPPERREL_GROUP_AGG, NULL) upperrel. + * + * This must be called by grouping_planner() after query_planner(), because the + * simple rel and rte arrays must be initialized already. + * + */ +static void +preprocess_count_aggregates(PlannerInfo *root, List *tlist) +{ + List *agg_list = 0; + Query *parse = root->parse; + FromExpr *jtnode; + RangeTblRef *rtr; + RangeTblEntry *rte; + RelOptInfo *rel; + BitmapCountPath *path; + + find_count_aggs_walker((Node *) tlist, &agg_list); + if (list_length(agg_list) != 1 || list_length(tlist) != 1) + { + /* + * Current implementation only works with one aggregate. + */ + return; + } + + Assert(!parse->setOperations); /* shouldn't get here if a setop */ + Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ + + /* + * Reject unoptimizable cases. + * + * We don't handle GROUP BY or windowing, because our current + * implementations of grouping require looking at all the rows anyway, and + * so there's not much point in optimizing COUNT. + */ + if (parse->groupClause || list_length(parse->groupingSets) > 1 || + parse->hasWindowFuncs) + return; + + /* + * Reject if query contains any CTEs; there's no way to build an indexscan + * on one so we couldn't succeed here. (If the CTEs are unreferenced, + * that's not true, but it doesn't seem worth expending cycles to check.) + */ + if (parse->cteList) + return; + + /* + * We also restrict the query to reference exactly one table, since join + * conditions can't be handled reasonably. (We could perhaps handle a + * query containing cartesian-product joins, but it hardly seems worth the + * trouble.) However, the single table could be buried in several levels + * of FromExpr due to subqueries. Note the "single" table could be an + * inheritance parent, too, including the case of a UNION ALL subquery + * that's been flattened to an appendrel. + */ + jtnode = parse->jointree; + while (IsA(jtnode, FromExpr)) + { + if (list_length(jtnode->fromlist) != 1) + return; + jtnode = linitial(jtnode->fromlist); + } + if (!IsA(jtnode, RangeTblRef)) + return; + + rtr = (RangeTblRef *) jtnode; + rte = planner_rt_fetch(rtr->rtindex, root); + + if (rte->rtekind == RTE_RELATION + && rte->tablesample == NULL + && rte->relkind == RELKIND_RELATION) + { + /* ordinary relation, ok */ + } + else + return; + + Assert(root->simple_rel_array); + rel = root->simple_rel_array[rtr->rtindex]; + + Assert(rel); + + path = consider_bitmap_count_path(root, rel); + if (path) + { + Path *finalPath = apply_projection_to_path(root, rel, (Path *) path, + create_pathtarget(root, tlist)); + RelOptInfo *parentRel = fetch_upper_rel(root, + UPPERREL_GROUP_AGG, NULL); + + add_path(parentRel, finalPath); + } +} + /*-------------------- * grouping_planner * Perform planning steps related to grouping, aggregation, etc. @@ -1684,6 +1839,12 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel = query_planner(root, tlist, standard_qp_callback, &qp_extra); + /* Preprocess COUNT aggregates, if any */ + if (parse->hasAggs) + { + preprocess_count_aggregates(root, tlist); + } + /* * Convert the query's result tlist into PathTarget format. * diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 4e3f6ee..f504e1e 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -523,6 +523,19 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->bitmapqualorig, rtoffset); } break; + case T_BitmapCount: + { + BitmapCount *splan = (BitmapCount *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, rtoffset); + splan->bitmapqualorig = + fix_scan_list(root, splan->bitmapqualorig, rtoffset); + } + break; case T_TidScan: { TidScan *splan = (TidScan *) plan; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index db0e5b3..a7a5947 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2363,6 +2363,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_BitmapCount: + finalize_primnode((Node *) ((BitmapCount *) plan)->bitmapqualorig, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_TidScan: finalize_primnode((Node *) ((TidScan *) plan)->tidquals, &context); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 999ebce..6f345e4 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1093,6 +1093,39 @@ create_bitmap_heap_path(PlannerInfo *root, } /* + * create_bitmap_count_path + * Creates a path node for a bitmap count. + * + * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes. + * 'required_outer' is the set of outer relids for a parameterized path. + */ +BitmapCountPath * +create_bitmap_count_path(PlannerInfo *root, + RelOptInfo *rel, + Path *bitmapqual, + Relids required_outer) +{ + BitmapCountPath *pathnode = makeNode(BitmapCountPath); + + pathnode->path.pathtype = T_BitmapCount; + pathnode->path.parent = rel; + pathnode->path.pathtarget = rel->reltarget; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.pathkeys = NIL; /* always unordered */ + + pathnode->bitmapqual = bitmapqual; + + cost_bitmap_count(&pathnode->path, root, rel, + pathnode->path.param_info, + bitmapqual); + return pathnode; +} + +/* * create_bitmap_and_path * Creates a path node representing a BitmapAnd. */ diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 3055b48..2aae616 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3237,6 +3237,8 @@ set_plan_disabling_options(const char *arg, GucContext context, GucSource source case 'b': /* bitmapscan */ tmp = "enable_bitmapscan"; break; + case 'c': /* bitmapcount */ + tmp = "enable_bitmapcount"; case 't': /* tidscan */ tmp = "enable_tidscan"; break; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 8b5f064..0a325d2 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -841,6 +841,15 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, { + {"enable_bitmapcount", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of bitmap-count plans."), + NULL + }, + &enable_bitmapcount, + true, + NULL, NULL, NULL + }, + { {"enable_tidscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of TID scan plans."), NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 8a93bdc..01710d0 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -281,6 +281,7 @@ # - Planner Method Configuration - #enable_bitmapscan = on +#enable_bitmapcount = on #enable_hashagg = on #enable_hashjoin = on #enable_indexscan = on diff --git a/src/include/executor/nodeBitmapCount.h b/src/include/executor/nodeBitmapCount.h new file mode 100644 index 0000000..e32ef56 --- /dev/null +++ b/src/include/executor/nodeBitmapCount.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * + * nodeBitmapCount.h + * + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeBitmapCount.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEBITMAPCOUNTSCAN_H +#define NODEBITMAPCOUNTSCAN_H + +#include "nodes/execnodes.h" + +extern BitmapCountState *ExecInitBitmapCount(BitmapCount *node, + EState *estate, int eflags); +extern TupleTableSlot *ExecBitmapCount(BitmapCountState *node); +extern void ExecEndBitmapCount(BitmapCountState *node); +extern void ExecReScanBitmapCount(BitmapCountState *node); + +#endif /* NODEBITMAPCOUNTSCAN_H */ diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h index 465c58e..8ed4317 100644 --- a/src/include/executor/nodeBitmapHeapscan.h +++ b/src/include/executor/nodeBitmapHeapscan.h @@ -28,4 +28,5 @@ extern void ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc); +extern void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres); #endif /* NODEBITMAPHEAPSCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 11a6850..b428e6d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1305,6 +1305,27 @@ typedef struct BitmapHeapScanState ParallelBitmapHeapState *pstate; } BitmapHeapScanState; +/* + * BitmapHeapScanState information + * + * bitmapqualorig execution state for bitmapqualorig expressions + * exact_pages total number of exact pages retrieved + * lossy_pages total number of lossy pages retrieved + * bcs_finished true when bitmap count scan is finished + * bcs_VMBuffer buffer for visibility map testing + * bcs_HeapFetches the number of pages fetched from heap + */ +typedef struct BitmapCountState +{ + ScanState ss; /* its first field is NodeTag */ + ExprState *bitmapqualorig; + long exact_pages; + long lossy_pages; + bool bcs_finished; + Buffer bcs_VMBuffer; + long bcs_HeapFetches; +} BitmapCountState; + /* ---------------- * TidScanState information * diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 963ce45..b72c054 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -57,6 +57,7 @@ typedef enum NodeTag T_IndexOnlyScan, T_BitmapIndexScan, T_BitmapHeapScan, + T_BitmapCount, T_TidScan, T_SubqueryScan, T_FunctionScan, @@ -108,6 +109,7 @@ typedef enum NodeTag T_IndexOnlyScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, + T_BitmapCountState, T_TidScanState, T_SubqueryScanState, T_FunctionScanState, @@ -220,6 +222,7 @@ typedef enum NodeTag T_BitmapHeapPath, T_BitmapAndPath, T_BitmapOrPath, + T_BitmapCountPath, T_TidPath, T_SubqueryScanPath, T_ForeignPath, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 6e531b6..320904e 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -451,6 +451,17 @@ typedef struct BitmapHeapScan } BitmapHeapScan; /* ---------------- + * bitmap count node + * For now, uses the same data as the bitmap heap scan node. + * ---------------- + */ +typedef struct BitmapCount +{ + Scan scan; + List *bitmapqualorig; +} BitmapCount; + +/* ---------------- * tid scan node * * tidquals is an implicitly OR'ed list of qual expressions of the form diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 8930edf..6bd4194 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1027,6 +1027,16 @@ typedef struct BitmapHeapPath } BitmapHeapPath; /* + * BitmapCountPath is similar to the BitmapHeapPath, but instead of performing + * a heap scan, it counts the tuples and returns the result. + */ +typedef struct BitmapCountPath +{ + Path path; + Path *bitmapqual; /* IndexPath, BitmapAndPath, BitmapOrPath */ +} BitmapCountPath; + +/* * BitmapAndPath represents a BitmapAnd plan node; it can only appear as * part of the substructure of a BitmapHeapPath. The Path structure is * a bit more heavyweight than we really need for this, but for simplicity diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index 87f4bb7..939ec6e 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -62,6 +62,8 @@ extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b); extern bool tbm_is_empty(const TIDBitmap *tbm); +extern double tbm_estimate_lossy_pages(long maxbytes, double total_pages); + extern TBMIterator *tbm_begin_iterate(TIDBitmap *tbm); extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm); extern TBMIterateResult *tbm_iterate(TBMIterator *iterator); diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index d9a9b12..e985137 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -59,6 +59,7 @@ extern bool enable_seqscan; extern bool enable_indexscan; extern bool enable_indexonlyscan; extern bool enable_bitmapscan; +extern bool enable_bitmapcount; extern bool enable_tidscan; extern bool enable_sort; extern bool enable_hashagg; @@ -81,6 +82,9 @@ extern void cost_index(IndexPath *path, PlannerInfo *root, extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info, Path *bitmapqual, double loop_count); +extern void cost_bitmap_count(Path *path, PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info, + Path *bitmapqual); extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root); extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root); extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec); @@ -190,7 +194,7 @@ extern void set_tablefunc_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, - Path *bitmapqual, int loop_count, Cost *cost, double *tuple); + Path *bitmapqual, int loop_count, Cost *cost, double *tuple); /* * prototypes for clausesel.c diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index c72c7e0..d246573 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -49,6 +49,10 @@ extern IndexPath *create_index_path(PlannerInfo *root, Relids required_outer, double loop_count, bool partial_path); +extern BitmapCountPath *create_bitmap_count_path(PlannerInfo *root, + RelOptInfo *rel, + Path *bitmapqual, + Relids required_outer); extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 25fe78c..195caea 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -57,7 +57,7 @@ extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel); extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages); extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, - Path *bitmapqual); + Path *bitmapqual); #ifdef OPTIMIZER_DEBUG extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); @@ -67,6 +67,8 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); * indxpath.c * routines to generate index paths */ +extern BitmapCountPath *consider_bitmap_count_path(PlannerInfo *root, + RelOptInfo *rel); extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel); extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, diff --git a/src/test/regress/expected/bitmapcount.out b/src/test/regress/expected/bitmapcount.out new file mode 100644 index 0000000..bc866a4 --- /dev/null +++ b/src/test/regress/expected/bitmapcount.out @@ -0,0 +1,169 @@ +-- Test bitmap count +set enable_bitmapcount to on; +set enable_bitmapscan to off; +set enable_seqscan to off; +set enable_indexscan to off; +set enable_indexonlyscan to off; +-- Basic bitmap count scan +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + QUERY PLAN +--------------------------------------------------------------------------- + Bitmap Count on tenk1 + Recheck Cond: (((thousand = 42) OR (thousand = 99)) AND (hundred = 42)) + -> BitmapAnd + -> BitmapOr + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 99) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) +(10 rows) + +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + count +------- + 10 +(1 row) + +explain (costs off) +select count(*) from tenk1 + where ( hundred = 42 and thousand = 42 ) or thousand = 99; + QUERY PLAN +--------------------------------------------------------------------------- + Bitmap Count on tenk1 + Recheck Cond: (((thousand = 42) AND (hundred = 42)) OR (thousand = 99)) + -> BitmapOr + -> BitmapAnd + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 99) +(10 rows) + +select count(*) from tenk1 + where ( hundred = 42 and thousand = 42 ) or thousand = 99; + count +------- + 20 +(1 row) + +explain (costs off) +select count(*) from tenk1 + where (hundred = 42 or thousand = 42) or thousand = 99; + QUERY PLAN +------------------------------------------------------------------------ + Bitmap Count on tenk1 + Recheck Cond: ((hundred = 42) OR (thousand = 42) OR (thousand = 99)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 99) +(9 rows) + +select count(*) from tenk1 + where (hundred = 42 or thousand = 42) or thousand = 99; + count +------- + 110 +(1 row) + +explain (costs off) +select count(*) from tenk1 + where hundred = 42 and thousand = 42 and unique1 = 10; + QUERY PLAN +------------------------------------------------------------------------- + Bitmap Count on tenk1 + Recheck Cond: ((thousand = 42) AND (hundred = 42) AND (unique1 = 10)) + -> BitmapAnd + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 = 10) +(9 rows) + +select count(*) from tenk1 + where hundred = 42 and thousand = 42 and unique1 = 10; + count +------- + 0 +(1 row) + +-- Pseudoconstant qual +explain (costs off) +select count(*) from tenk1 + where hundred = 42 and (thousand = 42 and thousand = 99); + QUERY PLAN +------------------------------------------------------------------- + Aggregate + -> Result + One-Time Filter: false + -> Bitmap Heap Scan on tenk1 + Recheck Cond: ((thousand = 42) AND (hundred = 42)) + -> BitmapAnd + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = 42) + -> Bitmap Index Scan on tenk1_hundred + Index Cond: (hundred = 42) +(10 rows) + +-- Test lossy bitmap & visibility map usage +CREATE TABLE bmcounttest (a int, b int, t text) WITH (autovacuum_enabled = false); +INSERT INTO bmcounttest + SELECT (r%53), (r%59), 'foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo' + FROM generate_series(1,70000) r; +CREATE INDEX i_bmtest_a ON bmcounttest(a); +CREATE INDEX i_bmtest_b ON bmcounttest(b); +-- Test bitmap-and and bitmap-or. +SELECT count(*) FROM bmcounttest WHERE a = 1 AND b = 1; + count +------- + 23 +(1 row) + +SELECT count(*) FROM bmcounttest WHERE a = 1 OR b = 1; + count +------- + 2485 +(1 row) + +-- Lower work_mem to trigger use of lossy bitmaps +set work_mem = 64; +SELECT count(*) FROM bmcounttest WHERE a = 1 AND b = 1; + count +------- + 23 +(1 row) + +SELECT count(*) FROM bmcounttest WHERE a = 1 OR b = 1; + count +------- + 2485 +(1 row) + +-- Populate visibility map +vacuum analyze bmcounttest; +SELECT count(*) FROM bmcounttest WHERE a = 1 AND b = 1; + count +------- + 23 +(1 row) + +SELECT count(*) FROM bmcounttest WHERE a = 1 OR b = 1; + count +------- + 2485 +(1 row) + +-- clean up +DROP TABLE bmcounttest; diff --git a/src/test/regress/expected/create_am.out b/src/test/regress/expected/create_am.out index 1b464aa..b27ea51 100644 --- a/src/test/regress/expected/create_am.out +++ b/src/test/regress/expected/create_am.out @@ -41,6 +41,7 @@ DROP INDEX grect2ind; SET enable_seqscan = OFF; SET enable_indexscan = ON; SET enable_bitmapscan = OFF; +SET enable_bitmapcount = OFF; EXPLAIN (COSTS OFF) SELECT * FROM fast_emp4000 WHERE home_base @ '(200,200),(2000,1000)'::box diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 26cd059..e2b5676 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -92,6 +92,7 @@ CREATE INDEX sp_radix_ind ON radix_text_tbl USING spgist (t); SET enable_seqscan = ON; SET enable_indexscan = OFF; SET enable_bitmapscan = OFF; +SET enable_bitmapcount = OFF; SELECT * FROM fast_emp4000 WHERE home_base @ '(200,200),(2000,1000)'::box ORDER BY (home_base[0])[0]; diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index a811265..0124304 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -16,6 +16,7 @@ SET enable_seqscan TO on; SET enable_indexscan TO on; -- for the moment, we don't want index-only scans here SET enable_indexonlyscan TO off; +SET enable_bitmapcount TO off; -- wait to let any prior tests finish dumping out stats; -- else our messages might get lost due to contention SELECT pg_sleep_for('2 seconds'); diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 568b783..05010ec 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -72,6 +72,7 @@ select count(*) >= 0 as ok from pg_prepared_xacts; select name, setting from pg_settings where name like 'enable%'; name | setting ----------------------+--------- + enable_bitmapcount | on enable_bitmapscan | on enable_gathermerge | on enable_hashagg | on @@ -84,7 +85,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(12 rows) +(13 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index b2fc9e2..e170be9 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -120,6 +120,7 @@ create index wowidx on test_tsvector using gist (a); SET enable_seqscan=OFF; SET enable_indexscan=ON; SET enable_bitmapscan=OFF; +SET enable_bitmapcount=OFF; explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; QUERY PLAN ------------------------------------------------------- diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 9f95b01..145e829 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -79,7 +79,7 @@ ignore: random # ---------- # Another group of parallel tests # ---------- -test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace prepared_xacts delete +test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates bitmapcount transactions random portals arrays btree_index hash_index update namespace prepared_xacts delete # ---------- # Another group of parallel tests diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index e026b7c..58086c3 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -91,6 +91,7 @@ test: union test: case test: join test: aggregates +test: bitmapcount test: transactions ignore: random test: random diff --git a/src/test/regress/sql/bitmapcount.sql b/src/test/regress/sql/bitmapcount.sql new file mode 100644 index 0000000..20ee9c5 --- /dev/null +++ b/src/test/regress/sql/bitmapcount.sql @@ -0,0 +1,66 @@ +-- Test bitmap count + +set enable_bitmapcount to on; +set enable_bitmapscan to off; +set enable_seqscan to off; +set enable_indexscan to off; +set enable_indexonlyscan to off; + +-- Basic bitmap count scan +EXPLAIN (COSTS OFF) +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); +SELECT count(*) FROM tenk1 + WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); + +explain (costs off) +select count(*) from tenk1 + where ( hundred = 42 and thousand = 42 ) or thousand = 99; +select count(*) from tenk1 + where ( hundred = 42 and thousand = 42 ) or thousand = 99; + +explain (costs off) +select count(*) from tenk1 + where (hundred = 42 or thousand = 42) or thousand = 99; +select count(*) from tenk1 + where (hundred = 42 or thousand = 42) or thousand = 99; + +explain (costs off) +select count(*) from tenk1 + where hundred = 42 and thousand = 42 and unique1 = 10; +select count(*) from tenk1 + where hundred = 42 and thousand = 42 and unique1 = 10; + +-- Pseudoconstant qual +explain (costs off) +select count(*) from tenk1 + where hundred = 42 and (thousand = 42 and thousand = 99); + +-- Test lossy bitmap & visibility map usage +CREATE TABLE bmcounttest (a int, b int, t text) WITH (autovacuum_enabled = false); + +INSERT INTO bmcounttest + SELECT (r%53), (r%59), 'foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo' + FROM generate_series(1,70000) r; + +CREATE INDEX i_bmtest_a ON bmcounttest(a); +CREATE INDEX i_bmtest_b ON bmcounttest(b); + +-- Test bitmap-and and bitmap-or. +SELECT count(*) FROM bmcounttest WHERE a = 1 AND b = 1; +SELECT count(*) FROM bmcounttest WHERE a = 1 OR b = 1; + +-- Lower work_mem to trigger use of lossy bitmaps +set work_mem = 64; + +SELECT count(*) FROM bmcounttest WHERE a = 1 AND b = 1; +SELECT count(*) FROM bmcounttest WHERE a = 1 OR b = 1; + +-- Populate visibility map +vacuum analyze bmcounttest; + +SELECT count(*) FROM bmcounttest WHERE a = 1 AND b = 1; +SELECT count(*) FROM bmcounttest WHERE a = 1 OR b = 1; + +-- clean up +DROP TABLE bmcounttest; diff --git a/src/test/regress/sql/create_am.sql b/src/test/regress/sql/create_am.sql index 2f116d9..86eddea 100644 --- a/src/test/regress/sql/create_am.sql +++ b/src/test/regress/sql/create_am.sql @@ -44,6 +44,7 @@ DROP INDEX grect2ind; SET enable_seqscan = OFF; SET enable_indexscan = ON; SET enable_bitmapscan = OFF; +SET enable_bitmapcount = OFF; EXPLAIN (COSTS OFF) SELECT * FROM fast_emp4000 diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 1648072..dcf09f3 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -133,6 +133,7 @@ CREATE INDEX sp_radix_ind ON radix_text_tbl USING spgist (t); SET enable_seqscan = ON; SET enable_indexscan = OFF; SET enable_bitmapscan = OFF; +SET enable_bitmapcount = OFF; SELECT * FROM fast_emp4000 WHERE home_base @ '(200,200),(2000,1000)'::box diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index b3e2efa..e5c81bf 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -13,6 +13,7 @@ SET enable_seqscan TO on; SET enable_indexscan TO on; -- for the moment, we don't want index-only scans here SET enable_indexonlyscan TO off; +SET enable_bitmapcount TO off; -- wait to let any prior tests finish dumping out stats; -- else our messages might get lost due to contention diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index e4b21f8..5de07fc 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -57,6 +57,7 @@ create index wowidx on test_tsvector using gist (a); SET enable_seqscan=OFF; SET enable_indexscan=ON; SET enable_bitmapscan=OFF; +SET enable_bitmapcount=OFF; explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';