From 57235546ea9a02c4d68496df1c584b93d5a093d9 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Mon, 8 Jun 2020 20:45:08 +0200 Subject: [PATCH v37 5/6] Btree implementation of skipping Implementation of Index Skip Scan for Btree. To make it suitable for both situations when there are small number of distinct values and significant amount of distinct values the following approach is taken - instead of searching from the root for every value we're searching for then first on the current page, and then if not found continue searching from the root. Author: Jesper Pedersen, Dmitry Dolgov Reviewed-by: Thomas Munro, David Rowley, Floris Van Nee, Kyotaro Horiguchi, Tomas Vondra, Peter Geoghegan --- src/backend/access/nbtree/nbtree.c | 13 + src/backend/access/nbtree/nbtsearch.c | 506 ++++++++++++- src/include/access/nbtree.h | 7 + src/test/regress/expected/join.out | 3 + src/test/regress/expected/select_distinct.out | 666 ++++++++++++++++++ src/test/regress/expected/sysviews.out | 2 +- src/test/regress/sql/join.sql | 5 + src/test/regress/sql/select_distinct.sql | 286 ++++++++ 8 files changed, 1486 insertions(+), 2 deletions(-) diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index c822b49a71..8156660585 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -136,6 +136,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = btbulkdelete; amroutine->amvacuumcleanup = btvacuumcleanup; amroutine->amcanreturn = btcanreturn; + amroutine->amskip = btskip; amroutine->amcostestimate = btcostestimate; amroutine->amoptions = btoptions; amroutine->amproperty = btproperty; @@ -386,6 +387,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys) */ so->currTuples = so->markTuples = NULL; + so->skipScanKey = NULL; + scan->xs_itupdesc = RelationGetDescr(rel); scan->opaque = so; @@ -453,6 +456,16 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, _bt_preprocess_array_keys(scan); } +/* + * btskip() -- skip to the beginning of the next key prefix + */ +bool +btskip(IndexScanDesc scan, ScanDirection direction, + ScanDirection indexdir, int prefix) +{ + return _bt_skip(scan, direction, indexdir, prefix); +} + /* * btendscan() -- close down a scan */ diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 8f6575fdf1..10704eb3d5 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -45,7 +45,11 @@ static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, static Buffer _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot); static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); static inline void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir); - +static inline void _bt_update_skip_scankeys(IndexScanDesc scan, + Relation indexRel); +static inline bool _bt_scankey_within_page(IndexScanDesc scan, + BTScanInsert key, + Buffer buf); /* * _bt_drop_lock_and_maybe_pin() @@ -1486,6 +1490,452 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) return true; } +/* + * _bt_skip() -- Skip items that have the same prefix as the most recently + * fetched index tuple. + * + * The current position is set so that a subsequent call to _bt_next will + * fetch the first tuple that differs in the leading 'prefix' keys. + * + * There are four different kinds of skipping (depending on dir and + * indexdir, that are important to distinguish, especially in the presense + * of an index condition: + * + * * Advancing forward and reading forward + * simple scan + * + * * Advancing forward and reading backward + * scan inside a cursor fetching backward, when skipping is necessary + * right from the start + * + * * Advancing backward and reading forward + * scan with order by desc inside a cursor fetching forward, when + * skipping is necessary right from the start + * + * * Advancing backward and reading backward + * simple scan with order by desc + * + * The current page is searched for the next unique value. If none is found + * we will do a scan from the root in order to find the next page with + * a unique value. + */ +bool +_bt_skip(IndexScanDesc scan, ScanDirection dir, + ScanDirection indexdir, int prefix) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTStack stack; + Buffer buf; + OffsetNumber offnum; + BTScanPosItem *currItem; + Relation indexRel = scan->indexRelation; + bool scanstart = !BTScanPosIsValid(so->currPos); + + /* We want to return tuples, and we need a starting point */ + Assert(scan->xs_want_itup); + Assert(scan->xs_itup); + + if (so->numKilled > 0) + _bt_killitems(scan); + + /* If skipScanKey is NULL then we initialize it with _bt_mkscankey */ + if (so->skipScanKey == NULL) + { + so->skipScanKey = _bt_mkscankey(indexRel, scan->xs_itup); + so->skipScanKey->keysz = prefix; + so->skipScanKey->scantid = NULL; + } + so->skipScanKey->nextkey = ScanDirectionIsForward(dir); + _bt_update_skip_scankeys(scan, indexRel); + + /* Check if the next unique key can be found within the current page. + * Since we do not lock the current page between jumps, it's possible + * that it was splitted since the last time we saw it. This is fine in + * case of scanning forward, since page split to the right and we are + * still on the left most page. In case of scanning backwards it's + * possible to loose some pages and we need to remember the previous + * page, and then follow the right link from the current page until we + * find the original one. + * + * Since the whole idea of checking the current page is to protect + * ourselves and make more performant statistic mismatch case when + * there are too many distinct values for jumping, it's not clear if + * the complexity of this solution in case of backward scan is + * justified, so for now just avoid it. + */ + if (BufferIsValid(so->currPos.buf) && ScanDirectionIsForward(dir)) + { + _bt_lockbuf(indexRel, so->currPos.buf, BT_READ); + + if (_bt_scankey_within_page(scan, so->skipScanKey, so->currPos.buf)) + { + bool keyFound = false; + + offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, so->currPos.buf); + + /* Lock the page for SERIALIZABLE transactions */ + PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(so->currPos.buf), + scan->xs_snapshot); + + /* We know in which direction to look */ + _bt_initialize_more_data(so, dir); + + /* Now read the data */ + keyFound = _bt_readpage(scan, dir, offnum); + + _bt_unlockbuf(indexRel, so->currPos.buf); + ReleaseBuffer(so->currPos.buf); + so->currPos.buf = InvalidBuffer; + + if (keyFound) + { + /* set IndexTuple */ + currItem = &so->currPos.items[so->currPos.itemIndex]; + scan->xs_heaptid = currItem->heapTid; + scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); + return true; + } + } + else + _bt_unlockbuf(indexRel, so->currPos.buf); + } + + if (BufferIsValid(so->currPos.buf)) + { + ReleaseBuffer(so->currPos.buf); + so->currPos.buf = InvalidBuffer; + } + + /* + * We haven't found scan key within the current page, so let's scan from + * the root. Use _bt_search and _bt_binsrch to get the buffer and offset + * number + */ + stack = _bt_search(scan->indexRelation, so->skipScanKey, + &buf, BT_READ, scan->xs_snapshot); + _bt_freestack(stack); + so->currPos.buf = buf; + offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf); + + /* Lock the page for SERIALIZABLE transactions */ + PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(buf), + scan->xs_snapshot); + + /* We know in which direction to look */ + _bt_initialize_more_data(so, dir); + + /* + * Simplest case is when both directions are forward, when we are already + * at the next distinct key at the beginning of the series (so everything + * else would be done in _bt_readpage) + * + * The case when both directions are backwards is also simple, but we need + * to go one step back, since we need a last element from the previous + * series. + */ + if ((ScanDirectionIsBackward(dir) && ScanDirectionIsBackward(indexdir)) || + (ScanDirectionIsForward(dir) && ScanDirectionIsBackward(indexdir) & scanstart)) + offnum = OffsetNumberPrev(offnum); + + /* + * Andvance backward but read forward. At this moment we are at the next + * distinct key at the beginning of the series. In case if scan just + * started, we can read forward without doing anything else. Otherwise + * find previous distinct key and the beginning of it's series and read + * forward from there. To do so, go back one step, perform binary search + * to find the first item in the series and let _bt_readpage do everything + * else. + */ + else if (ScanDirectionIsBackward(dir) && ScanDirectionIsForward(indexdir) && !scanstart) + { + /* Reading forward means we expect to see more data on the right */ + so->currPos.moreRight = true; + + offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf); + + /* One step back to find a previous value */ + _bt_readpage(scan, dir, offnum); + + _bt_unlockbuf(indexRel, so->currPos.buf); + if (_bt_next(scan, dir)) + { + _bt_lockbuf(indexRel, so->currPos.buf, BT_READ); + _bt_update_skip_scankeys(scan, indexRel); + + /* + * And now find the last item from the sequence for the + * current, value with the intention do OffsetNumberNext. As a + * result we end up on a first element from the sequence. + */ + if (_bt_scankey_within_page(scan, so->skipScanKey, so->currPos.buf)) + offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf); + else + { + if (BufferIsValid(so->currPos.buf)) + { + /* Before leaving current page, deal with any killed items */ + if (so->numKilled > 0) + _bt_killitems(scan); + + _bt_unlockbuf(indexRel, so->currPos.buf); + ReleaseBuffer(so->currPos.buf); + so->currPos.buf = InvalidBuffer; + } + + stack = _bt_search(scan->indexRelation, so->skipScanKey, + &buf, BT_READ, scan->xs_snapshot); + _bt_freestack(stack); + so->currPos.buf = buf; + offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf); + } + } + else + { + pfree(so->skipScanKey); + so->skipScanKey = NULL; + return false; + } + } + + /* + * Advance forward but read backward. At this moment we are at the next + * distinct key at the beginning of the series. In case if scan just + * started, we can go one step back and read forward without doing + * anything else. Otherwise find the next distinct key and the beginning + * of it's series, go one step back and read backward from there. + * + * An interesting situation can happen if one of distinct keys do not pass + * a corresponding index condition at all. In this case reading backward + * can lead to a previous distinct key being found, creating a loop. To + * avoid that check the value to be returned, and jump one more time if + * it's the same as at the beginning. Note that we do not check visibility + * here, and dead tuples could also lead to the same situation. This has to + * be checked on the caller side. + */ + else if (ScanDirectionIsForward(dir) && ScanDirectionIsBackward(indexdir) && !scanstart) + { + IndexTuple startItup = CopyIndexTuple(scan->xs_itup); + bool nextFound = false; + + /* Reading backwards means we expect to see more data on the left */ + so->currPos.moreLeft = true; + + while (!nextFound) + { + IndexTuple itup; + OffsetNumber jumpOffset; + CHECK_FOR_INTERRUPTS(); + + /* + * Find a next index tuple to update scan key. It could be at + * the end, so check for max offset + */ + if (!_bt_readpage(scan, ForwardScanDirection, offnum)) + { + /* + * There's no actually-matching data on this page. Try to + * advance to the next page. Return false if there's no + * matching data at all. + */ + _bt_unlockbuf(indexRel, so->currPos.buf); + if (!_bt_steppage(scan, dir)) + { + pfree(so->skipScanKey); + so->skipScanKey = NULL; + return false; + } + _bt_lockbuf(indexRel, so->currPos.buf, BT_READ); + } + + currItem = &so->currPos.items[so->currPos.firstItem]; + itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); + + scan->xs_itup = itup; + + _bt_update_skip_scankeys(scan, indexRel); + if (BufferIsValid(so->currPos.buf)) + { + /* Before leaving current page, deal with any killed items */ + if (so->numKilled > 0) + _bt_killitems(scan); + + _bt_unlockbuf(indexRel, so->currPos.buf); + ReleaseBuffer(so->currPos.buf); + so->currPos.buf = InvalidBuffer; + } + + stack = _bt_search(scan->indexRelation, so->skipScanKey, + &buf, BT_READ, scan->xs_snapshot); + _bt_freestack(stack); + so->currPos.buf = buf; + + /* + * We need to remember the original offset after the jump, + * since in case of looping this would be the next starting + * point + */ + jumpOffset = offnum = _bt_binsrch(scan->indexRelation, + so->skipScanKey, buf); + offnum = OffsetNumberPrev(offnum); + + if (!_bt_readpage(scan, indexdir, offnum)) + { + /* + * There's no actually-matching data on this page. Try to + * advance to the next page. Return false if there's no + * matching data at all. + */ + _bt_unlockbuf(indexRel, so->currPos.buf); + if (!_bt_steppage(scan, indexdir)) + { + pfree(so->skipScanKey); + so->skipScanKey = NULL; + return false; + } + _bt_lockbuf(indexRel, so->currPos.buf, BT_READ); + } + + currItem = &so->currPos.items[so->currPos.lastItem]; + itup = CopyIndexTuple((IndexTuple) + (so->currTuples + currItem->tupleOffset)); + + /* + * To check if we returned the same tuple, try to find a + * startItup on the current page. For that we need to update + * scankey to match the whole tuple and set nextkey to return + * an exact tuple, not the next one. If the tuple we found in + * this way is equal to what we wanted to return, it means we + * are in the loop, return offnum to the original position and + * jump further + * + * Note that to compare tids we need to keep the leaf pinned, + * otherwise there is a danger of vacuum cleaning up relevant + * tuples. + */ + scan->xs_itup = startItup; + _bt_update_skip_scankeys(scan, indexRel); + + so->skipScanKey->keysz = IndexRelationGetNumberOfKeyAttributes(indexRel); + so->skipScanKey->nextkey = false; + + if (_bt_scankey_within_page(scan, so->skipScanKey, so->currPos.buf)) + { + OffsetNumber maxoff, startOffset; + IndexTuple verifiedItup; + Page page = BufferGetPage(so->currPos.buf); + startOffset = _bt_binsrch(scan->indexRelation, + so->skipScanKey, + so->currPos.buf); + + maxoff = PageGetMaxOffsetNumber(page); + + /* Now read the data */ + if (_bt_readpage(scan, ForwardScanDirection, startOffset)) + { + ItemPointer resultTids, verifyTids; + int nresult = 1, + nverify = 1; + + currItem = &so->currPos.items[so->currPos.itemIndex]; + verifiedItup = (IndexTuple) (so->currTuples + currItem->tupleOffset); + + /* + * We need to keep in mind that tuples we deal with + * could be also posting tuples and represent a list of + * tids. + */ + if (BTreeTupleIsPosting(verifiedItup)) + { + nverify = BTreeTupleGetNPosting(verifiedItup); + verifyTids = BTreeTupleGetPosting(verifiedItup); + for (int i = 1; i < nverify; i++) + verifyTids[i] = *BTreeTupleGetPostingN(verifiedItup, i); + } + else + verifyTids = &verifiedItup->t_tid; + + if (BTreeTupleIsPosting(itup)) + { + nresult = BTreeTupleGetNPosting(itup); + resultTids = BTreeTupleGetPosting(itup); + for (int i = 1; i < nresult; i++) + resultTids[i] = *BTreeTupleGetPostingN(itup, i); + } + else + resultTids = &itup->t_tid; + + /* One not equal means they're not equal. */ + for(int i = 0; i < nverify; i++) + { + for(int j = 0; j < nresult; j++) + { + if (!ItemPointerEquals(&resultTids[j], &verifyTids[i])) + { + nextFound = true; + break; + } + } + } + + if (!nextFound) + offnum = jumpOffset; + } + + if ((offnum > maxoff) && (so->currPos.nextPage == P_NONE)) + { + _bt_unlockbuf(indexRel, so->currPos.buf); + + BTScanPosUnpinIfPinned(so->currPos); + BTScanPosInvalidate(so->currPos); + + pfree(so->skipScanKey); + so->skipScanKey = NULL; + return false; + } + } + else + /* + * If startItup could be not found within the current page, + * assume we found something new + */ + nextFound = true; + + /* Return original scankey options */ + so->skipScanKey->keysz = prefix; + so->skipScanKey->nextkey = ScanDirectionIsForward(dir); + } + } + + /* Now read the data */ + if (!_bt_readpage(scan, indexdir, offnum)) + { + /* + * There's no actually-matching data on this page. Try to advance to + * the next page. Return false if there's no matching data at all. + */ + _bt_unlockbuf(indexRel, so->currPos.buf); + if (!_bt_steppage(scan, dir)) + { + pfree(so->skipScanKey); + so->skipScanKey = NULL; + return false; + } + } + else + /* Drop the lock, and maybe the pin, on the current page */ + _bt_unlockbuf(indexRel, so->currPos.buf); + + /* And set IndexTuple */ + currItem = &so->currPos.items[so->currPos.itemIndex]; + scan->xs_heaptid = currItem->heapTid; + scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); + + so->currPos.moreLeft = true; + so->currPos.moreRight = true; + + return true; +} + /* * _bt_readpage() -- Load data from current index page into so->currPos * @@ -2482,3 +2932,57 @@ _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir) so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ } + +/* + * _bt_update_skip_scankeys() -- set up a new values for the existing scankeys + * based on the current index tuple + */ +static inline void +_bt_update_skip_scankeys(IndexScanDesc scan, Relation indexRel) +{ + TupleDesc itupdesc; + int indnkeyatts, + i; + BTScanOpaque so = (BTScanOpaque) scan->opaque; + ScanKey scankeys = so->skipScanKey->scankeys; + + itupdesc = RelationGetDescr(indexRel); + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRel); + for (i = 0; i < indnkeyatts; i++) + { + Datum datum; + bool null; + int flags; + + datum = index_getattr(scan->xs_itup, i + 1, itupdesc, &null); + flags = (null ? SK_ISNULL : 0) | + (indexRel->rd_indoption[i] << SK_BT_INDOPTION_SHIFT); + scankeys[i].sk_flags = flags; + scankeys[i].sk_argument = datum; + } +} + +/* + * _bt_scankey_within_page() -- check if the provided scankey could be found + * within a page, specified by the buffer. + * + * Scankey nextkey will tell us if we need to find a current key or the next + * one, which affects whether or not it's ok to be equal to the page highkey. + */ +static inline bool +_bt_scankey_within_page(IndexScanDesc scan, BTScanInsert key, Buffer buf) +{ + OffsetNumber low, high; + Page page = BufferGetPage(buf); + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + int high_compare = key->nextkey ? 0 : 1; + + low = P_FIRSTDATAKEY(opaque); + high = PageGetMaxOffsetNumber(page); + + if (unlikely(high < low)) + return false; + + return (_bt_compare(scan->indexRelation, key, page, low) > 0 && + _bt_compare(scan->indexRelation, key, page, high) < high_compare); +} diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 65d9698b89..5cffc9b5ae 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -938,6 +938,9 @@ typedef struct BTScanOpaqueData */ int markItemIndex; /* itemIndex, or -1 if not valid */ + /* Work space for _bt_skip */ + BTScanInsert skipScanKey; /* used to control skipping */ + /* keep these last in struct for efficiency */ BTScanPosData currPos; /* current position data */ BTScanPosData markPos; /* marked position, if any */ @@ -1099,6 +1102,8 @@ extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate); extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum); extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); +extern bool _bt_skip(IndexScanDesc scan, ScanDirection dir, + ScanDirection indexdir, int prefix); extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, Snapshot snapshot); @@ -1123,6 +1128,8 @@ extern void _bt_end_vacuum_callback(int code, Datum arg); extern Size BTreeShmemSize(void); extern void BTreeShmemInit(void); extern bytea *btoptions(Datum reloptions, bool validate); +extern bool btskip(IndexScanDesc scan, ScanDirection dir, + ScanDirection indexdir, int prefix); extern bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index a46b1573bd..a47e9faed2 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4429,6 +4429,8 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s -> Seq Scan on d (8 rows) +-- disable index skip scan to prevent it interfering with the plan +set enable_indexskipscan to off; -- similarly, but keying off a DISTINCT clause explain (costs off) select d.* from d left join (select distinct * from b) s @@ -4446,6 +4448,7 @@ select d.* from d left join (select distinct * from b) s -> Seq Scan on d (9 rows) +set enable_indexskipscan to on; -- check join removal works when uniqueness of the join condition is enforced -- by a UNION explain (costs off) diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 11c6f50fbf..ea8c9bee49 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -306,3 +306,669 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +-- index only skip scan +CREATE TABLE distinct_a (a int, b int, c int); +INSERT INTO distinct_a ( + SELECT five, tenthous, 10 FROM + generate_series(1, 5) five, + generate_series(1, 10000) tenthous +); +CREATE INDEX ON distinct_a (a, b); +CREATE INDEX ON distinct_a ((a + 1)); +ANALYZE distinct_a; +SELECT DISTINCT a FROM distinct_a; + a +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT DISTINCT a FROM distinct_a WHERE a = 1; + a +--- + 1 +(1 row) + +SELECT DISTINCT a FROM distinct_a ORDER BY a DESC; + a +--- + 5 + 4 + 3 + 2 + 1 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT DISTINCT a FROM distinct_a; + QUERY PLAN +-------------------------------------------------------- + Index Only Scan using distinct_a_a_b_idx on distinct_a + Skip scan: true +(2 rows) + +-- test index skip scan with a condition on a non unique field +SELECT DISTINCT ON (a) a, b FROM distinct_a WHERE b = 2; + a | b +---+--- + 1 | 2 + 2 | 2 + 3 | 2 + 4 | 2 + 5 | 2 +(5 rows) + +-- test index skip scan backwards +SELECT DISTINCT ON (a) a, b FROM distinct_a ORDER BY a DESC, b DESC; + a | b +---+------- + 5 | 10000 + 4 | 10000 + 3 | 10000 + 2 | 10000 + 1 | 10000 +(5 rows) + +-- test index skip scan for expressions +EXPLAIN (COSTS OFF) +SELECT DISTINCT (a + 1) FROM distinct_a; + QUERY PLAN +---------------------------------------------------- + Index Scan using distinct_a_expr_idx on distinct_a + Skip scan: true +(2 rows) + +SELECT DISTINCT (a + 1) FROM distinct_a; + ?column? +---------- + 2 + 3 + 4 + 5 + 6 +(5 rows) + +-- check colums order +CREATE INDEX distinct_a_b_a on distinct_a (b, a); +SELECT DISTINCT a FROM distinct_a WHERE b = 2; + a +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2; + a | b +---+--- + 1 | 2 + 2 | 2 + 3 | 2 + 4 | 2 + 5 | 2 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT DISTINCT a FROM distinct_a WHERE b = 2; + QUERY PLAN +---------------------------------------------------- + Index Only Scan using distinct_a_b_a on distinct_a + Skip scan: true + Index Cond: (b = 2) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2; + QUERY PLAN +---------------------------------------------------- + Index Only Scan using distinct_a_b_a on distinct_a + Skip scan: true + Index Cond: (b = 2) +(3 rows) + +DROP INDEX distinct_a_b_a; +-- test opposite scan/index directions inside a cursor +-- forward/backward +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a, b; +FETCH FROM c; + a | b +---+--- + 1 | 1 +(1 row) + +FETCH BACKWARD FROM c; + a | b +---+--- +(0 rows) + +FETCH 6 FROM c; + a | b +---+--- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +FETCH BACKWARD 6 FROM c; + a | b +---+--- + 5 | 1 + 4 | 1 + 3 | 1 + 2 | 1 + 1 | 1 +(5 rows) + +FETCH 6 FROM c; + a | b +---+--- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +FETCH BACKWARD 6 FROM c; + a | b +---+--- + 5 | 1 + 4 | 1 + 3 | 1 + 2 | 1 + 1 | 1 +(5 rows) + +END; +-- backward/forward +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a DESC, b DESC; +FETCH FROM c; + a | b +---+------- + 5 | 10000 +(1 row) + +FETCH BACKWARD FROM c; + a | b +---+--- +(0 rows) + +FETCH 6 FROM c; + a | b +---+------- + 5 | 10000 + 4 | 10000 + 3 | 10000 + 2 | 10000 + 1 | 10000 +(5 rows) + +FETCH BACKWARD 6 FROM c; + a | b +---+------- + 1 | 10000 + 2 | 10000 + 3 | 10000 + 4 | 10000 + 5 | 10000 +(5 rows) + +FETCH 6 FROM c; + a | b +---+------- + 5 | 10000 + 4 | 10000 + 3 | 10000 + 2 | 10000 + 1 | 10000 +(5 rows) + +FETCH BACKWARD 6 FROM c; + a | b +---+------- + 1 | 10000 + 2 | 10000 + 3 | 10000 + 4 | 10000 + 5 | 10000 +(5 rows) + +END; +-- test missing values and skipping from the end +CREATE TABLE distinct_abc(a int, b int, c int); +CREATE INDEX ON distinct_abc(a, b, c); +INSERT INTO distinct_abc + VALUES (1, 1, 1), + (1, 1, 2), + (1, 2, 2), + (1, 2, 3), + (2, 2, 1), + (2, 2, 3), + (3, 1, 1), + (3, 1, 2), + (3, 2, 2), + (3, 2, 3); +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2; + QUERY PLAN +-------------------------------------------------------------- + Index Only Scan using distinct_abc_a_b_c_idx on distinct_abc + Skip scan: true + Index Cond: (c = 2) +(3 rows) + +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2; +FETCH ALL FROM c; + a | b | c +---+---+--- + 1 | 1 | 2 + 3 | 1 | 2 +(2 rows) + +FETCH BACKWARD ALL FROM c; + a | b | c +---+---+--- + 3 | 1 | 2 + 1 | 1 | 2 +(2 rows) + +END; +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2 +ORDER BY a DESC, b DESC; + QUERY PLAN +----------------------------------------------------------------------- + Index Only Scan Backward using distinct_abc_a_b_c_idx on distinct_abc + Skip scan: true + Index Cond: (c = 2) +(3 rows) + +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2 +ORDER BY a DESC, b DESC; +FETCH ALL FROM c; + a | b | c +---+---+--- + 3 | 2 | 2 + 1 | 2 | 2 +(2 rows) + +FETCH BACKWARD ALL FROM c; + a | b | c +---+---+--- + 1 | 2 | 2 + 3 | 2 | 2 +(2 rows) + +END; +DROP TABLE distinct_abc; +-- index skip scan +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a ORDER BY a; + a | b | c +---+---+---- + 1 | 1 | 10 + 2 | 1 | 10 + 3 | 1 | 10 + 4 | 1 | 10 + 5 | 1 | 10 +(5 rows) + +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a WHERE a = 1 ORDER BY a; + a | b | c +---+---+---- + 1 | 1 | 10 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a ORDER BY a; + QUERY PLAN +--------------------------------------------------- + Index Scan using distinct_a_a_b_idx on distinct_a + Skip scan: true +(2 rows) + +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a WHERE a = 1 ORDER BY a; + QUERY PLAN +--------------------------------------------------- + Index Scan using distinct_a_a_b_idx on distinct_a + Skip scan: true + Index Cond: (a = 1) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT DISTINCT * +FROM distinct_a; + QUERY PLAN +------------------------------ + HashAggregate + Group Key: a, b, c + -> Seq Scan on distinct_a +(3 rows) + +-- check colums order +SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10; + a +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10; + QUERY PLAN +--------------------------------------------------------- + Unique + -> Index Scan using distinct_a_a_b_idx on distinct_a + Index Cond: (b = 2) + Filter: (c = 10) +(4 rows) + +-- check projection case +SELECT DISTINCT a, a FROM distinct_a WHERE b = 2; + a | a +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +SELECT DISTINCT a, 1 FROM distinct_a WHERE b = 2; + a | ?column? +---+---------- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +-- test cursor forward/backward movements +BEGIN; +DECLARE c SCROLL CURSOR FOR SELECT DISTINCT a FROM distinct_a; +FETCH FROM c; + a +--- + 1 +(1 row) + +FETCH BACKWARD FROM c; + a +--- +(0 rows) + +FETCH 6 FROM c; + a +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +FETCH BACKWARD 6 FROM c; + a +--- + 5 + 4 + 3 + 2 + 1 +(5 rows) + +FETCH 6 FROM c; + a +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +FETCH BACKWARD 6 FROM c; + a +--- + 5 + 4 + 3 + 2 + 1 +(5 rows) + +END; +DROP TABLE distinct_a; +-- test tuples visibility +CREATE TABLE distinct_visibility (a int, b int); +INSERT INTO distinct_visibility (select a, b from generate_series(1,5) a, generate_series(1, 10000) b); +CREATE INDEX ON distinct_visibility (a, b); +ANALYZE distinct_visibility; +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b; + a | b +---+--- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +DELETE FROM distinct_visibility WHERE a = 2 and b = 1; +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b; + a | b +---+--- + 1 | 1 + 2 | 2 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC; + a | b +---+------- + 5 | 10000 + 4 | 10000 + 3 | 10000 + 2 | 10000 + 1 | 10000 +(5 rows) + +DELETE FROM distinct_visibility WHERE a = 2 and b = 10000; +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC; + a | b +---+------- + 5 | 10000 + 4 | 10000 + 3 | 10000 + 2 | 9999 + 1 | 10000 +(5 rows) + +DROP TABLE distinct_visibility; +-- test page boundaries +CREATE TABLE distinct_boundaries AS + SELECT a, b::int2 b, (b % 2)::int2 c FROM + generate_series(1, 5) a, + generate_series(1,366) b; +CREATE INDEX ON distinct_boundaries (a, b, c); +ANALYZE distinct_boundaries; +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a, b, c from distinct_boundaries +WHERE b >= 1 and c = 0 ORDER BY a, b; + QUERY PLAN +---------------------------------------------------------------------------- + Index Only Scan using distinct_boundaries_a_b_c_idx on distinct_boundaries + Skip scan: true + Index Cond: ((b >= 1) AND (c = 0)) +(3 rows) + +SELECT DISTINCT ON (a) a, b, c from distinct_boundaries +WHERE b >= 1 and c = 0 ORDER BY a, b; + a | b | c +---+---+--- + 1 | 2 | 0 + 2 | 2 | 0 + 3 | 2 | 0 + 4 | 2 | 0 + 5 | 2 | 0 +(5 rows) + +DROP TABLE distinct_boundaries; +-- test tuple killing +-- DESC ordering +CREATE TABLE distinct_killed AS + SELECT a, b, b % 2 AS c, 10 AS d + FROM generate_series(1, 5) a, + generate_series(1,1000) b; +CREATE INDEX ON distinct_killed (a, b, c, d); +DELETE FROM distinct_killed where a = 3; +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c,d + FROM distinct_killed ORDER BY a DESC, b DESC; + FETCH FORWARD ALL FROM c; + a | b | c | d +---+------+---+---- + 5 | 1000 | 0 | 10 + 4 | 1000 | 0 | 10 + 2 | 1000 | 0 | 10 + 1 | 1000 | 0 | 10 +(4 rows) + + FETCH BACKWARD ALL FROM c; + a | b | c | d +---+------+---+---- + 1 | 1000 | 0 | 10 + 2 | 1000 | 0 | 10 + 4 | 1000 | 0 | 10 + 5 | 1000 | 0 | 10 +(4 rows) + +COMMIT; +DROP TABLE distinct_killed; +-- regular ordering +CREATE TABLE distinct_killed AS + SELECT a, b, b % 2 AS c, 10 AS d + FROM generate_series(1, 5) a, + generate_series(1,1000) b; +CREATE INDEX ON distinct_killed (a, b, c, d); +DELETE FROM distinct_killed where a = 3; +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c,d + FROM distinct_killed ORDER BY a, b; + FETCH FORWARD ALL FROM c; + a | b | c | d +---+---+---+---- + 1 | 1 | 1 | 10 + 2 | 1 | 1 | 10 + 4 | 1 | 1 | 10 + 5 | 1 | 1 | 10 +(4 rows) + + FETCH BACKWARD ALL FROM c; + a | b | c | d +---+---+---+---- + 5 | 1 | 1 | 10 + 4 | 1 | 1 | 10 + 2 | 1 | 1 | 10 + 1 | 1 | 1 | 10 +(4 rows) + +COMMIT; +DROP TABLE distinct_killed; +-- partial delete +CREATE TABLE distinct_killed AS + SELECT a, b, b % 2 AS c, 10 AS d + FROM generate_series(1, 5) a, + generate_series(1,1000) b; +CREATE INDEX ON distinct_killed (a, b, c, d); +DELETE FROM distinct_killed WHERE a = 3 AND b <= 999; +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c,d + FROM distinct_killed ORDER BY a DESC, b DESC; + FETCH FORWARD ALL FROM c; + a | b | c | d +---+------+---+---- + 5 | 1000 | 0 | 10 + 4 | 1000 | 0 | 10 + 3 | 1000 | 0 | 10 + 2 | 1000 | 0 | 10 + 1 | 1000 | 0 | 10 +(5 rows) + + FETCH BACKWARD ALL FROM c; + a | b | c | d +---+------+---+---- + 1 | 1000 | 0 | 10 + 2 | 1000 | 0 | 10 + 3 | 1000 | 0 | 10 + 4 | 1000 | 0 | 10 + 5 | 1000 | 0 | 10 +(5 rows) + +COMMIT; +DROP TABLE distinct_killed; +-- test posting lists +CREATE TABLE distinct_posting (a int, b int, c int); +CREATE INDEX ON distinct_posting (a, b, c); +INSERT INTO distinct_posting + VALUES (1, 1, 1), + (1, 1, 2), + (1, 2, 2), + (1, 2, 3), + (2, 2, 1), + (2, 2, 3), + (3, 1, 1), + (3, 1, 2), + (3, 2, 2), + (3, 2, 3); +INSERT INTO distinct_posting ( + SELECT 1 as a, 1 as b, 1 AS c + FROM generate_series(1,1000) i +); +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c FROM distinct_posting WHERE c = 2 + ORDER BY a DESC, b DESC; + FETCH ALL FROM c; + a | b | c +---+---+--- + 3 | 2 | 2 + 1 | 2 | 2 +(2 rows) + + FETCH BACKWARD ALL FROM c; + a | b | c +---+---+--- + 1 | 2 | 2 + 3 | 2 | 2 +(2 rows) + +COMMIT; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 5cbc2c49cf..a3465bb77e 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -107,7 +107,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(21 rows) +(19 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 1403e0ffe7..13e35fda88 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1519,11 +1519,16 @@ explain (costs off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id; +-- disable index skip scan to prevent it interfering with the plan +set enable_indexskipscan to off; + -- similarly, but keying off a DISTINCT clause explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; +set enable_indexskipscan to on; + -- check join removal works when uniqueness of the join condition is enforced -- by a UNION explain (costs off) diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 33102744eb..f732b1c658 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -135,3 +135,289 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; + +-- index only skip scan +CREATE TABLE distinct_a (a int, b int, c int); +INSERT INTO distinct_a ( + SELECT five, tenthous, 10 FROM + generate_series(1, 5) five, + generate_series(1, 10000) tenthous +); +CREATE INDEX ON distinct_a (a, b); +CREATE INDEX ON distinct_a ((a + 1)); +ANALYZE distinct_a; + +SELECT DISTINCT a FROM distinct_a; +SELECT DISTINCT a FROM distinct_a WHERE a = 1; +SELECT DISTINCT a FROM distinct_a ORDER BY a DESC; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT a FROM distinct_a; + +-- test index skip scan with a condition on a non unique field +SELECT DISTINCT ON (a) a, b FROM distinct_a WHERE b = 2; + +-- test index skip scan backwards +SELECT DISTINCT ON (a) a, b FROM distinct_a ORDER BY a DESC, b DESC; + +-- test index skip scan for expressions +EXPLAIN (COSTS OFF) +SELECT DISTINCT (a + 1) FROM distinct_a; +SELECT DISTINCT (a + 1) FROM distinct_a; + +-- check colums order +CREATE INDEX distinct_a_b_a on distinct_a (b, a); + +SELECT DISTINCT a FROM distinct_a WHERE b = 2; +SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT a FROM distinct_a WHERE b = 2; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2; + +DROP INDEX distinct_a_b_a; + +-- test opposite scan/index directions inside a cursor +-- forward/backward +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a, b; + +FETCH FROM c; +FETCH BACKWARD FROM c; + +FETCH 6 FROM c; +FETCH BACKWARD 6 FROM c; + +FETCH 6 FROM c; +FETCH BACKWARD 6 FROM c; + +END; + +-- backward/forward +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a DESC, b DESC; + +FETCH FROM c; +FETCH BACKWARD FROM c; + +FETCH 6 FROM c; +FETCH BACKWARD 6 FROM c; + +FETCH 6 FROM c; +FETCH BACKWARD 6 FROM c; + +END; + +-- test missing values and skipping from the end +CREATE TABLE distinct_abc(a int, b int, c int); +CREATE INDEX ON distinct_abc(a, b, c); +INSERT INTO distinct_abc + VALUES (1, 1, 1), + (1, 1, 2), + (1, 2, 2), + (1, 2, 3), + (2, 2, 1), + (2, 2, 3), + (3, 1, 1), + (3, 1, 2), + (3, 2, 2), + (3, 2, 3); + +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2; + +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2; + +FETCH ALL FROM c; +FETCH BACKWARD ALL FROM c; + +END; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2 +ORDER BY a DESC, b DESC; + +BEGIN; +DECLARE c SCROLL CURSOR FOR +SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2 +ORDER BY a DESC, b DESC; + +FETCH ALL FROM c; +FETCH BACKWARD ALL FROM c; + +END; + +DROP TABLE distinct_abc; + +-- index skip scan +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a ORDER BY a; +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a WHERE a = 1 ORDER BY a; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a ORDER BY a; +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a, b, c +FROM distinct_a WHERE a = 1 ORDER BY a; +EXPLAIN (COSTS OFF) +SELECT DISTINCT * +FROM distinct_a; + +-- check colums order +SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10; + +-- check projection case +SELECT DISTINCT a, a FROM distinct_a WHERE b = 2; +SELECT DISTINCT a, 1 FROM distinct_a WHERE b = 2; + +-- test cursor forward/backward movements +BEGIN; +DECLARE c SCROLL CURSOR FOR SELECT DISTINCT a FROM distinct_a; + +FETCH FROM c; +FETCH BACKWARD FROM c; + +FETCH 6 FROM c; +FETCH BACKWARD 6 FROM c; + +FETCH 6 FROM c; +FETCH BACKWARD 6 FROM c; + +END; + +DROP TABLE distinct_a; + +-- test tuples visibility +CREATE TABLE distinct_visibility (a int, b int); +INSERT INTO distinct_visibility (select a, b from generate_series(1,5) a, generate_series(1, 10000) b); +CREATE INDEX ON distinct_visibility (a, b); +ANALYZE distinct_visibility; + +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b; +DELETE FROM distinct_visibility WHERE a = 2 and b = 1; +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b; + +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC; +DELETE FROM distinct_visibility WHERE a = 2 and b = 10000; +SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC; +DROP TABLE distinct_visibility; + +-- test page boundaries +CREATE TABLE distinct_boundaries AS + SELECT a, b::int2 b, (b % 2)::int2 c FROM + generate_series(1, 5) a, + generate_series(1,366) b; + +CREATE INDEX ON distinct_boundaries (a, b, c); +ANALYZE distinct_boundaries; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (a) a, b, c from distinct_boundaries +WHERE b >= 1 and c = 0 ORDER BY a, b; + +SELECT DISTINCT ON (a) a, b, c from distinct_boundaries +WHERE b >= 1 and c = 0 ORDER BY a, b; + +DROP TABLE distinct_boundaries; + +-- test tuple killing + +-- DESC ordering +CREATE TABLE distinct_killed AS + SELECT a, b, b % 2 AS c, 10 AS d + FROM generate_series(1, 5) a, + generate_series(1,1000) b; + +CREATE INDEX ON distinct_killed (a, b, c, d); + +DELETE FROM distinct_killed where a = 3; + +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c,d + FROM distinct_killed ORDER BY a DESC, b DESC; + FETCH FORWARD ALL FROM c; + FETCH BACKWARD ALL FROM c; +COMMIT; + +DROP TABLE distinct_killed; + +-- regular ordering +CREATE TABLE distinct_killed AS + SELECT a, b, b % 2 AS c, 10 AS d + FROM generate_series(1, 5) a, + generate_series(1,1000) b; + +CREATE INDEX ON distinct_killed (a, b, c, d); + +DELETE FROM distinct_killed where a = 3; + +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c,d + FROM distinct_killed ORDER BY a, b; + FETCH FORWARD ALL FROM c; + FETCH BACKWARD ALL FROM c; +COMMIT; + +DROP TABLE distinct_killed; + +-- partial delete +CREATE TABLE distinct_killed AS + SELECT a, b, b % 2 AS c, 10 AS d + FROM generate_series(1, 5) a, + generate_series(1,1000) b; + +CREATE INDEX ON distinct_killed (a, b, c, d); + +DELETE FROM distinct_killed WHERE a = 3 AND b <= 999; + +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c,d + FROM distinct_killed ORDER BY a DESC, b DESC; + FETCH FORWARD ALL FROM c; + FETCH BACKWARD ALL FROM c; +COMMIT; + +DROP TABLE distinct_killed; + +-- test posting lists +CREATE TABLE distinct_posting (a int, b int, c int); +CREATE INDEX ON distinct_posting (a, b, c); +INSERT INTO distinct_posting + VALUES (1, 1, 1), + (1, 1, 2), + (1, 2, 2), + (1, 2, 3), + (2, 2, 1), + (2, 2, 3), + (3, 1, 1), + (3, 1, 2), + (3, 2, 2), + (3, 2, 3); + +INSERT INTO distinct_posting ( + SELECT 1 as a, 1 as b, 1 AS c + FROM generate_series(1,1000) i +); + +BEGIN; + DECLARE c SCROLL CURSOR FOR + SELECT DISTINCT ON (a) a,b,c FROM distinct_posting WHERE c = 2 + ORDER BY a DESC, b DESC; + FETCH ALL FROM c; + + FETCH BACKWARD ALL FROM c; +COMMIT; -- 2.21.0