diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml index 8bd0bad..ed9625e 100644 --- a/doc/src/sgml/btree.sgml +++ b/doc/src/sgml/btree.sgml @@ -200,6 +200,20 @@ planner relies on them for optimization purposes. + + FIXME!!! + To implement the distance ordered (nearest-neighbor) search, we only need + to define a distance operator (usually it called <->) with a correpsonding + operator family for distance comparison in the index's operator class. + These operators must satisfy the following assumptions for all non-null + values A,B,C of the datatype: + + A <-> B = B <-> A symmetric law + if A = B, then A <-> C = B <-> C distance equivalence + if (A <= B and B <= C) or (A >= B and B >= C), + then A <-> B <= A <-> C monotonicity + + diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index df7d16f..9015557 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -175,6 +175,17 @@ CREATE INDEX test1_id_index ON test1 (id); + B-tree indexes are also capable of optimizing nearest-neighbor + searches, such as + date '2017-05-05' LIMIT 10; +]]> + + which finds the ten events closest to a given target date. The ability + to do this is again dependent on the particular operator class being used. + + + index hash diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index 9446f8b..93094bc 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -1242,7 +1242,8 @@ SELECT sum(x) OVER (ORDER BY x RANGE BETWEEN 5 PRECEDING AND 10 FOLLOWING) Ordering Operators - Some index access methods (currently, only GiST and SP-GiST) support the concept of + Some index access methods (currently, only B-tree, GiST and SP-GiST) + support the concept of ordering operators. What we have been discussing so far are search operators. A search operator is one for which the index can be searched to find all rows satisfying diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 3680e69..3f7e1b1 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -659,3 +659,20 @@ routines must treat it accordingly. The actual key stored in the item is irrelevant, and need not be stored at all. This arrangement corresponds to the fact that an L&Y non-leaf page has one more pointer than key. + +Nearest-neighbor search +----------------------- + +There is a special scan strategy for nearest-neighbor (kNN) search, +that is used in queries with ORDER BY distance clauses like this: +SELECT * FROM tab WHERE col > const1 ORDER BY col <-> const2 LIMIT k. +But, unlike GiST, B-tree supports only a one ordering operator on the +first index column. + +At the beginning of kNN scan, we need to determine which strategy we +will use --- a special bidirectional or a ordinary unidirectional. +If the point from which we measure the distance falls into the scan range, +we use bidirectional scan starting from this point, else we use simple +unidirectional scan in the right direction. Algorithm of a bidirectional +scan is very simple: at each step we advancing scan in that direction, +which has the nearest point. diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 55c7833..9270a85 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -25,6 +25,9 @@ #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/execnodes.h" +#include "nodes/primnodes.h" +#include "nodes/relation.h" +#include "optimizer/paths.h" #include "pgstat.h" #include "postmaster/autovacuum.h" #include "storage/condition_variable.h" @@ -33,6 +36,7 @@ #include "storage/lmgr.h" #include "storage/smgr.h" #include "utils/builtins.h" +#include "utils/datum.h" #include "utils/index_selfuncs.h" #include "utils/memutils.h" @@ -79,6 +83,7 @@ typedef enum typedef struct BTParallelScanDescData { BlockNumber btps_scanPage; /* latest or next page to be scanned */ + BlockNumber btps_knnScanPage; /* secondary latest or next page to be scanned */ BTPS_State btps_pageStatus; /* indicates whether next page is * available for scan. see above for * possible states of parallel scan. */ @@ -97,6 +102,9 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno); +static bool btmatchorderby(IndexOptInfo *index, List *pathkeys, + List **orderby_clauses_p, List **clause_columns_p); + /* * Btree handler function: return IndexAmRoutine with access method parameters @@ -107,7 +115,7 @@ bthandler(PG_FUNCTION_ARGS) { IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); - amroutine->amstrategies = BTMaxStrategyNumber; + amroutine->amstrategies = BtreeMaxStrategyNumber; amroutine->amsupport = BTNProcs; amroutine->amcanorder = true; amroutine->amcanbackward = true; @@ -143,7 +151,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amestimateparallelscan = btestimateparallelscan; amroutine->aminitparallelscan = btinitparallelscan; amroutine->amparallelrescan = btparallelrescan; - amroutine->ammatchorderby = NULL; + amroutine->ammatchorderby = btmatchorderby; PG_RETURN_POINTER(amroutine); } @@ -215,23 +223,30 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; BTScanState state = &so->state; + ScanDirection arraydir = + scan->numberOfOrderBys > 0 ? ForwardScanDirection : dir; bool res; /* btree indexes are never lossy */ scan->xs_recheck = false; + scan->xs_recheckorderby = false; + + if (so->scanDirection != NoMovementScanDirection) + dir = so->scanDirection; /* * If we have any array keys, initialize them during first call for a * scan. We can't do this in btrescan because we don't know the scan * direction at that time. */ - if (so->numArrayKeys && !BTScanPosIsValid(state->currPos)) + if (so->numArrayKeys && !BTScanPosIsValid(state->currPos) && + (!so->knnState || !BTScanPosIsValid(so->knnState->currPos))) { /* punt if we have any unsatisfiable array keys */ if (so->numArrayKeys < 0) return false; - _bt_start_array_keys(scan, dir); + _bt_start_array_keys(scan, arraydir); } /* This loop handles advancing to the next array elements, if any */ @@ -242,7 +257,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) * the appropriate direction. If we haven't done so yet, we call * _bt_first() to get the first item in the scan. */ - if (!BTScanPosIsValid(state->currPos)) + if (!BTScanPosIsValid(state->currPos) && + (!so->knnState || !BTScanPosIsValid(so->knnState->currPos))) res = _bt_first(scan, dir); else { @@ -277,7 +293,7 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) if (res) break; /* ... otherwise see if we have more array keys to deal with */ - } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir)); + } while (so->numArrayKeys && _bt_advance_array_keys(scan, arraydir)); return res; } @@ -350,9 +366,6 @@ btbeginscan(Relation rel, int nkeys, int norderbys) IndexScanDesc scan; BTScanOpaque so; - /* no order by operators allowed */ - Assert(norderbys == 0); - /* get the scan */ scan = RelationGetIndexScan(rel, nkeys, norderbys); @@ -379,6 +392,9 @@ btbeginscan(Relation rel, int nkeys, int norderbys) * scan->xs_itupdesc whether we'll need it or not, since that's so cheap. */ so->state.currTuples = so->state.markTuples = NULL; + so->knnState = NULL; + so->distanceTypeByVal = true; + so->scanDirection = NoMovementScanDirection; scan->xs_itupdesc = RelationGetDescr(rel); @@ -408,6 +424,8 @@ _bt_release_current_position(BTScanState state, Relation indexRelation, static void _bt_release_scan_state(IndexScanDesc scan, BTScanState state, bool free) { + BTScanOpaque so = (BTScanOpaque) scan->opaque; + /* No need to invalidate positions, if the RAM is about to be freed. */ _bt_release_current_position(state, scan->indexRelation, !free); @@ -424,6 +442,17 @@ _bt_release_scan_state(IndexScanDesc scan, BTScanState state, bool free) } else BTScanPosInvalidate(state->markPos); + + if (!so->distanceTypeByVal) + { + if (DatumGetPointer(state->currDistance)) + pfree(DatumGetPointer(state->currDistance)); + state->currDistance = PointerGetDatum(NULL); + + if (DatumGetPointer(state->markDistance)) + pfree(DatumGetPointer(state->markDistance)); + state->markDistance = PointerGetDatum(NULL); + } } /* @@ -438,6 +467,13 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, _bt_release_scan_state(scan, state, false); + if (so->knnState) + { + _bt_release_scan_state(scan, so->knnState, true); + pfree(so->knnState); + so->knnState = NULL; + } + so->arrayKeyCount = 0; /* FIXME in _bt_release_scan_state */ /* @@ -469,6 +505,14 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ + if (orderbys && scan->numberOfOrderBys > 0) + memmove(scan->orderByData, + orderbys, + scan->numberOfOrderBys * sizeof(ScanKeyData)); + + so->scanDirection = NoMovementScanDirection; + so->distanceTypeByVal = true; + /* If any keys are SK_SEARCHARRAY type, set up array-key info */ _bt_preprocess_array_keys(scan); } @@ -483,6 +527,12 @@ btendscan(IndexScanDesc scan) _bt_release_scan_state(scan, &so->state, true); + if (so->knnState) + { + _bt_release_scan_state(scan, so->knnState, true); + pfree(so->knnState); + } + /* Release storage */ if (so->keyData != NULL) pfree(so->keyData); @@ -494,7 +544,7 @@ btendscan(IndexScanDesc scan) } static void -_bt_mark_current_position(BTScanState state) +_bt_mark_current_position(BTScanOpaque so, BTScanState state) { /* There may be an old mark with a pin (but no lock). */ BTScanPosUnpinIfPinned(state->markPos); @@ -512,6 +562,21 @@ _bt_mark_current_position(BTScanState state) BTScanPosInvalidate(state->markPos); state->markItemIndex = -1; } + + if (so->knnState) + { + if (!so->distanceTypeByVal) + pfree(DatumGetPointer(state->markDistance)); + + state->markIsNull = !BTScanPosIsValid(state->currPos) || + state->currIsNull; + + state->markDistance = + state->markIsNull ? PointerGetDatum(NULL) + : datumCopy(state->currDistance, + so->distanceTypeByVal, + so->distanceTypeLen); + } } /* @@ -522,7 +587,13 @@ btmarkpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; - _bt_mark_current_position(&so->state); + _bt_mark_current_position(so, &so->state); + + if (so->knnState) + { + _bt_mark_current_position(so, so->knnState); + so->markRightIsNearest = so->currRightIsNearest; + } /* Also record the current positions of any array keys */ if (so->numArrayKeys) @@ -532,6 +603,8 @@ btmarkpos(IndexScanDesc scan) static void _bt_restore_marked_position(IndexScanDesc scan, BTScanState state) { + BTScanOpaque so = (BTScanOpaque) scan->opaque; + if (state->markItemIndex >= 0) { /* @@ -567,6 +640,19 @@ _bt_restore_marked_position(IndexScanDesc scan, BTScanState state) state->markPos.nextTupleOffset); } } + + if (so->knnState) + { + if (!so->distanceTypeByVal) + pfree(DatumGetPointer(state->currDistance)); + + state->currIsNull = state->markIsNull; + state->currDistance = + state->markIsNull ? PointerGetDatum(NULL) + : datumCopy(state->markDistance, + so->distanceTypeByVal, + so->distanceTypeLen); + } } /* @@ -588,6 +674,7 @@ btinitparallelscan(void *target) SpinLockInit(&bt_target->btps_mutex); bt_target->btps_scanPage = InvalidBlockNumber; + bt_target->btps_knnScanPage = InvalidBlockNumber; bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED; bt_target->btps_arrayKeyCount = 0; ConditionVariableInit(&bt_target->btps_cv); @@ -614,6 +701,7 @@ btparallelrescan(IndexScanDesc scan) */ SpinLockAcquire(&btscan->btps_mutex); btscan->btps_scanPage = InvalidBlockNumber; + btscan->btps_knnScanPage = InvalidBlockNumber; btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED; btscan->btps_arrayKeyCount = 0; SpinLockRelease(&btscan->btps_mutex); @@ -638,7 +726,7 @@ btparallelrescan(IndexScanDesc scan) * Callers should ignore the value of pageno if the return value is false. */ bool -_bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno) +_bt_parallel_seize(IndexScanDesc scan, BTScanState state, BlockNumber *pageno) { BTScanOpaque so = (BTScanOpaque) scan->opaque; BTPS_State pageStatus; @@ -646,12 +734,17 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno) bool status = true; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; + BlockNumber *scanPage; *pageno = P_NONE; btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan, parallel_scan->ps_offset); + scanPage = state == &so->state + ? &btscan->btps_scanPage + : &btscan->btps_knnScanPage; + while (1) { SpinLockAcquire(&btscan->btps_mutex); @@ -677,7 +770,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno) * of advancing it to a new page! */ btscan->btps_pageStatus = BTPARALLEL_ADVANCING; - *pageno = btscan->btps_scanPage; + *pageno = *scanPage; exit_loop = true; } SpinLockRelease(&btscan->btps_mutex); @@ -696,19 +789,42 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno) * can now begin advancing the scan. */ void -_bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page) +_bt_parallel_release(IndexScanDesc scan, BTScanState state, + BlockNumber scan_page) { + BTScanOpaque so = (BTScanOpaque) scan->opaque; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; + BlockNumber *scanPage; + BlockNumber *otherScanPage; + bool status_changed = false; + bool knnScan = so->knnState != NULL; btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan, parallel_scan->ps_offset); + if (!state || state == &so->state) + { + scanPage = &btscan->btps_scanPage; + otherScanPage = &btscan->btps_knnScanPage; + } + else + { + scanPage = &btscan->btps_knnScanPage; + otherScanPage = &btscan->btps_scanPage; + } SpinLockAcquire(&btscan->btps_mutex); - btscan->btps_scanPage = scan_page; - btscan->btps_pageStatus = BTPARALLEL_IDLE; + *scanPage = scan_page; + /* switch to idle state only if both KNN pages are initialized */ + if (!knnScan || *otherScanPage != InvalidBlockNumber) + { + btscan->btps_pageStatus = BTPARALLEL_IDLE; + status_changed = true; + } SpinLockRelease(&btscan->btps_mutex); - ConditionVariableSignal(&btscan->btps_cv); + + if (status_changed) + ConditionVariableSignal(&btscan->btps_cv); } /* @@ -719,12 +835,15 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page) * advance to the next page. */ void -_bt_parallel_done(IndexScanDesc scan) +_bt_parallel_done(IndexScanDesc scan, BTScanState state) { BTScanOpaque so = (BTScanOpaque) scan->opaque; ParallelIndexScanDesc parallel_scan = scan->parallel_scan; BTParallelScanDesc btscan; + BlockNumber *scanPage; + BlockNumber *otherScanPage; bool status_changed = false; + bool knnScan = so->knnState != NULL; /* Do nothing, for non-parallel scans */ if (parallel_scan == NULL) @@ -733,18 +852,41 @@ _bt_parallel_done(IndexScanDesc scan) btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan, parallel_scan->ps_offset); + if (!state || state == &so->state) + { + scanPage = &btscan->btps_scanPage; + otherScanPage = &btscan->btps_knnScanPage; + } + else + { + scanPage = &btscan->btps_knnScanPage; + otherScanPage = &btscan->btps_scanPage; + } + /* * Mark the parallel scan as done for this combination of scan keys, * unless some other process already did so. See also * _bt_advance_array_keys. */ SpinLockAcquire(&btscan->btps_mutex); - if (so->arrayKeyCount >= btscan->btps_arrayKeyCount && - btscan->btps_pageStatus != BTPARALLEL_DONE) + + Assert(btscan->btps_pageStatus == BTPARALLEL_ADVANCING); + + if (so->arrayKeyCount >= btscan->btps_arrayKeyCount) { - btscan->btps_pageStatus = BTPARALLEL_DONE; + *scanPage = P_NONE; status_changed = true; + + /* switch to "done" state only if both KNN scans are done */ + if (!knnScan || *otherScanPage == P_NONE) + btscan->btps_pageStatus = BTPARALLEL_DONE; + /* else switch to "idle" state only if both KNN scans are initialized */ + else if (*otherScanPage != InvalidBlockNumber) + btscan->btps_pageStatus = BTPARALLEL_IDLE; + else + status_changed = false; } + SpinLockRelease(&btscan->btps_mutex); /* wake up all the workers associated with this parallel scan */ @@ -774,6 +916,7 @@ _bt_parallel_advance_array_keys(IndexScanDesc scan) if (btscan->btps_pageStatus == BTPARALLEL_DONE) { btscan->btps_scanPage = InvalidBlockNumber; + btscan->btps_knnScanPage = InvalidBlockNumber; btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED; btscan->btps_arrayKeyCount++; } @@ -859,6 +1002,12 @@ btrestrpos(IndexScanDesc scan) _bt_restore_array_keys(scan); _bt_restore_marked_position(scan, &so->state); + + if (so->knnState) + { + _bt_restore_marked_position(scan, so->knnState); + so->currRightIsNearest = so->markRightIsNearest; + } } /* @@ -1394,3 +1543,30 @@ btcanreturn(Relation index, int attno) { return true; } + +/* + * btmatchorderby() -- Check whether KNN-search strategy is applicable to + * the given ORDER BY distance operator. + */ +static bool +btmatchorderby(IndexOptInfo *index, List *pathkeys, + List **orderby_clauses_p, List **clause_columns_p) +{ + Expr *expr; + /* ORDER BY distance to the first index column is only supported */ + int indexcol = 0; + + if (list_length(pathkeys) != 1) + return false; /* only one ORDER BY clause is supported */ + + expr = match_orderbyop_pathkey(index, castNode(PathKey, linitial(pathkeys)), + &indexcol); + + if (!expr) + return false; + + *orderby_clauses_p = lappend(*orderby_clauses_p, expr); + *clause_columns_p = lappend_int(*clause_columns_p, 0); + + return true; +} diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 2b63e0c..6b58dd52 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -32,12 +32,14 @@ static void _bt_saveitem(BTScanState state, int itemIndex, static bool _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir); static bool _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, ScanDirection dir); -static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, - ScanDirection dir); +static bool _bt_parallel_readpage(IndexScanDesc scan, BTScanState state, + BlockNumber blkno, ScanDirection dir); static Buffer _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot); static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp); static inline void _bt_initialize_more_data(BTScanState state, ScanDirection dir); +static BTScanState _bt_alloc_knn_scan(IndexScanDesc scan); +static bool _bt_start_knn_scan(IndexScanDesc scan, bool left, bool right); /* @@ -598,6 +600,156 @@ _bt_load_first_page(IndexScanDesc scan, BTScanState state, ScanDirection dir, } /* + * _bt_calc_current_dist() -- Calculate distance from the current item + * of the scan state to the target order-by ScanKey argument. + */ +static void +_bt_calc_current_dist(IndexScanDesc scan, BTScanState state) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanPosItem *currItem = &state->currPos.items[state->currPos.itemIndex]; + IndexTuple itup = (IndexTuple) (state->currTuples + currItem->tupleOffset); + ScanKey scankey = &scan->orderByData[0]; + Datum value; + + value = index_getattr(itup, 1, scan->xs_itupdesc, &state->currIsNull); + + if (state->currIsNull) + return; /* NULL distance */ + + value = FunctionCall2Coll(&scankey->sk_func, + scankey->sk_collation, + value, + scankey->sk_argument); + + /* free previous distance value for by-ref types */ + if (!so->distanceTypeByVal && DatumGetPointer(state->currDistance)) + pfree(DatumGetPointer(state->currDistance)); + + state->currDistance = value; +} + +/* + * _bt_compare_current_dist() -- Compare current distances of the left and right scan states. + * + * NULL distances are considered to be greater than any non-NULL distances. + * + * Returns true if right distance is lesser than left, otherwise false. + */ +static bool +_bt_compare_current_dist(BTScanOpaque so, BTScanState rstate, BTScanState lstate) +{ + if (lstate->currIsNull) + return true; /* non-NULL < NULL */ + + if (rstate->currIsNull) + return false; /* NULL > non-NULL */ + + return DatumGetBool(FunctionCall2Coll(&so->distanceCmpProc, + InvalidOid, /* XXX collation for distance comparison */ + rstate->currDistance, + lstate->currDistance)); +} + +/* + * _bt_alloc_knn_scan() -- Allocate additional backward scan state for KNN. + */ +static BTScanState +_bt_alloc_knn_scan(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState lstate = (BTScanState) palloc(sizeof(BTScanStateData)); + + _bt_allocate_tuple_workspaces(lstate); + + if (!scan->xs_want_itup) + { + /* We need to request index tuples for distance comparison. */ + scan->xs_want_itup = true; + _bt_allocate_tuple_workspaces(&so->state); + } + + BTScanPosInvalidate(lstate->currPos); + lstate->currPos.moreLeft = false; + lstate->currPos.moreRight = false; + BTScanPosInvalidate(lstate->markPos); + lstate->markItemIndex = -1; + lstate->killedItems = NULL; + lstate->numKilled = 0; + lstate->currDistance = PointerGetDatum(NULL); + lstate->markDistance = PointerGetDatum(NULL); + + return so->knnState = lstate; +} + +static bool +_bt_start_knn_scan(IndexScanDesc scan, bool left, bool right) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState rstate; /* right (forward) main scan state */ + BTScanState lstate; /* additional left (backward) KNN scan state */ + + if (!left && !right) + return false; /* empty result */ + + rstate = &so->state; + lstate = so->knnState; + + if (left && right) + { + /* + * We have found items in both scan directions, + * determine nearest item to return. + */ + _bt_calc_current_dist(scan, rstate); + _bt_calc_current_dist(scan, lstate); + so->currRightIsNearest = _bt_compare_current_dist(so, rstate, lstate); + + /* Reset right flag if the left item is nearer. */ + right = so->currRightIsNearest; + } + + /* Return current item of the selected scan direction. */ + return _bt_return_current_item(scan, right ? rstate : lstate); +} + +/* + * _bt_init_knn_scan() -- Init additional scan state for KNN search. + * + * Caller must pin and read-lock scan->state.currPos.buf buffer. + * + * If empty result was found returned false. + * Otherwise prepared current item, and returned true. + */ +static bool +_bt_init_knn_scan(IndexScanDesc scan, OffsetNumber offnum) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState rstate = &so->state; /* right (forward) main scan state */ + BTScanState lstate; /* additional left (backward) KNN scan state */ + Buffer buf = rstate->currPos.buf; + bool left, + right; + + lstate = _bt_alloc_knn_scan(scan); + + /* Bump pin and lock count before BTScanPosData copying. */ + IncrBufferRefCount(buf); + LockBuffer(buf, BT_READ); + + memcpy(&lstate->currPos, &rstate->currPos, sizeof(BTScanPosData)); + lstate->currPos.moreLeft = true; + lstate->currPos.moreRight = false; + + /* Load first pages from the both scans. */ + right = _bt_load_first_page(scan, rstate, ForwardScanDirection, offnum); + left = _bt_load_first_page(scan, lstate, BackwardScanDirection, + OffsetNumberPrev(offnum)); + + return _bt_start_knn_scan(scan, left, right); +} + +/* * _bt_first() -- Find the first item in a scan. * * We need to be clever about the direction of scan, the search @@ -655,6 +807,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!so->qual_ok) return false; + strat_total = BTEqualStrategyNumber; + + if (scan->numberOfOrderBys > 0) + { + if (_bt_process_orderings(scan, startKeys, &keysCount, notnullkeys)) + /* use bidirectional KNN scan */ + strat_total = BtreeKNNSearchStrategyNumber; + + /* use selected KNN scan direction */ + if (so->scanDirection != NoMovementScanDirection) + dir = so->scanDirection; + } + /* * For parallel scans, get the starting page from shared state. If the * scan has not started, proceed to find out first leaf page in the usual @@ -663,19 +828,50 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) */ if (scan->parallel_scan != NULL) { - status = _bt_parallel_seize(scan, &blkno); + status = _bt_parallel_seize(scan, &so->state, &blkno); if (!status) return false; - else if (blkno == P_NONE) - { - _bt_parallel_done(scan); - return false; - } else if (blkno != InvalidBlockNumber) { - if (!_bt_parallel_readpage(scan, blkno, dir)) - return false; - goto readcomplete; + bool knn = strat_total == BtreeKNNSearchStrategyNumber; + bool right; + bool left; + + if (knn) + _bt_alloc_knn_scan(scan); + + if (blkno == P_NONE) + { + _bt_parallel_done(scan, &so->state); + right = false; + } + else + right = _bt_parallel_readpage(scan, &so->state, blkno, + knn ? ForwardScanDirection : dir); + + if (!knn) + return right && _bt_return_current_item(scan, &so->state); + + /* seize additional backward KNN scan */ + left = _bt_parallel_seize(scan, so->knnState, &blkno); + + if (left) + { + if (blkno == P_NONE) + { + _bt_parallel_done(scan, so->knnState); + left = false; + } + else + { + /* backward scan should be already initialized */ + Assert(blkno != InvalidBlockNumber); + left = _bt_parallel_readpage(scan, so->knnState, blkno, + BackwardScanDirection); + } + } + + return _bt_start_knn_scan(scan, left, right); } } @@ -725,8 +921,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * storing their addresses into the local startKeys[] array. *---------- */ - strat_total = BTEqualStrategyNumber; - if (so->numberOfKeys > 0) + + if (so->numberOfKeys > 0 && + /* startKeys for KNN search already have been initialized */ + strat_total != BtreeKNNSearchStrategyNumber) { AttrNumber curattr; ScanKey chosen; @@ -866,7 +1064,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!match) { /* No match, so mark (parallel) scan finished */ - _bt_parallel_done(scan); + _bt_parallel_done(scan, NULL); } return match; @@ -901,7 +1099,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) Assert(subkey->sk_flags & SK_ROW_MEMBER); if (subkey->sk_flags & SK_ISNULL) { - _bt_parallel_done(scan); + _bt_parallel_done(scan, NULL); return false; } memcpy(scankeys + i, subkey, sizeof(ScanKeyData)); @@ -1081,6 +1279,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) break; case BTGreaterEqualStrategyNumber: + case BtreeKNNSearchStrategyNumber: /* * Find first item >= scankey. (This is only used for forward @@ -1128,7 +1327,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * mark parallel scan as done, so that all the workers can finish * their scan */ - _bt_parallel_done(scan); + _bt_parallel_done(scan, NULL); BTScanPosInvalidate(*currPos); return false; @@ -1167,17 +1366,21 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) Assert(!BTScanPosIsValid(*currPos)); currPos->buf = buf; + if (strat_total == BtreeKNNSearchStrategyNumber) + return _bt_init_knn_scan(scan, offnum); + if (!_bt_load_first_page(scan, &so->state, dir, offnum)) - return false; + return false; /* empty result */ -readcomplete: /* OK, currPos->itemIndex says what to return */ return _bt_return_current_item(scan, &so->state); } /* - * Advance to next tuple on current page; or if there's no more, - * try to step to the next page with data. + * _bt_next_item() -- Advance to next tuple on current page; + * or if there's no more, try to step to the next page with data. + * + * If there are no more matching records in the given direction */ static bool _bt_next_item(IndexScanDesc scan, BTScanState state, ScanDirection dir) @@ -1197,6 +1400,51 @@ _bt_next_item(IndexScanDesc scan, BTScanState state, ScanDirection dir) } /* + * _bt_next_nearest() -- Return next nearest item from bidirectional KNN scan. + */ +static bool +_bt_next_nearest(IndexScanDesc scan) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanState rstate = &so->state; + BTScanState lstate = so->knnState; + bool right = BTScanPosIsValid(rstate->currPos); + bool left = BTScanPosIsValid(lstate->currPos); + bool advanceRight; + + if (right && left) + advanceRight = so->currRightIsNearest; + else if (right) + advanceRight = true; + else if (left) + advanceRight = false; + else + return false; /* end of the scan */ + + if (advanceRight) + right = _bt_next_item(scan, rstate, ForwardScanDirection); + else + left = _bt_next_item(scan, lstate, BackwardScanDirection); + + if (!left && !right) + return false; /* end of the scan */ + + if (left && right) + { + /* + * If there are items in both scans we must recalculate distance + * in the advanced scan. + */ + _bt_calc_current_dist(scan, advanceRight ? rstate : lstate); + so->currRightIsNearest = _bt_compare_current_dist(so, rstate, lstate); + right = so->currRightIsNearest; + } + + /* return nearest item */ + return _bt_return_current_item(scan, right ? rstate : lstate); +} + +/* * _bt_next() -- Get the next item in a scan. * * On entry, so->currPos describes the current page, which may be pinned @@ -1215,6 +1463,10 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + if (so->knnState) + /* return next neareset item from KNN scan */ + return _bt_next_nearest(scan); + if (!_bt_next_item(scan, &so->state, dir)) return false; @@ -1267,9 +1519,9 @@ _bt_readpage(IndexScanDesc scan, BTScanState state, ScanDirection dir, if (scan->parallel_scan) { if (ScanDirectionIsForward(dir)) - _bt_parallel_release(scan, opaque->btpo_next); + _bt_parallel_release(scan, state, opaque->btpo_next); else - _bt_parallel_release(scan, BufferGetBlockNumber(pos->buf)); + _bt_parallel_release(scan, state, BufferGetBlockNumber(pos->buf)); } minoff = P_FIRSTDATAKEY(opaque); @@ -1443,7 +1695,7 @@ _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir) * Seize the scan to get the next block number; if the scan has * ended already, bail out. */ - status = _bt_parallel_seize(scan, &blkno); + status = _bt_parallel_seize(scan, state, &blkno); if (!status) { /* release the previous buffer, if pinned */ @@ -1475,13 +1727,19 @@ _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir) * Seize the scan to get the current block number; if the scan has * ended already, bail out. */ - status = _bt_parallel_seize(scan, &blkno); + status = _bt_parallel_seize(scan, state, &blkno); BTScanPosUnpinIfPinned(*currPos); if (!status) { BTScanPosInvalidate(*currPos); return false; } + if (blkno == P_NONE) + { + _bt_parallel_done(scan, state); + BTScanPosInvalidate(*currPos); + return false; + } } else { @@ -1531,7 +1789,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, */ if (blkno == P_NONE || !currPos->moreRight) { - _bt_parallel_done(scan); + _bt_parallel_done(scan, state); BTScanPosInvalidate(*currPos); return false; } @@ -1554,14 +1812,14 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, else if (scan->parallel_scan != NULL) { /* allow next page be processed by parallel worker */ - _bt_parallel_release(scan, opaque->btpo_next); + _bt_parallel_release(scan, state, opaque->btpo_next); } /* nope, keep going */ if (scan->parallel_scan != NULL) { _bt_relbuf(rel, currPos->buf); - status = _bt_parallel_seize(scan, &blkno); + status = _bt_parallel_seize(scan, state, &blkno); if (!status) { BTScanPosInvalidate(*currPos); @@ -1620,7 +1878,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, if (!currPos->moreLeft) { _bt_relbuf(rel, currPos->buf); - _bt_parallel_done(scan); + _bt_parallel_done(scan, state); BTScanPosInvalidate(*currPos); return false; } @@ -1631,7 +1889,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, /* if we're physically at end of index, return failure */ if (currPos->buf == InvalidBuffer) { - _bt_parallel_done(scan); + _bt_parallel_done(scan, state); BTScanPosInvalidate(*currPos); return false; } @@ -1655,7 +1913,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, else if (scan->parallel_scan != NULL) { /* allow next page be processed by parallel worker */ - _bt_parallel_release(scan, BufferGetBlockNumber(currPos->buf)); + _bt_parallel_release(scan, state, BufferGetBlockNumber(currPos->buf)); } /* @@ -1667,7 +1925,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, if (scan->parallel_scan != NULL) { _bt_relbuf(rel, currPos->buf); - status = _bt_parallel_seize(scan, &blkno); + status = _bt_parallel_seize(scan, state, &blkno); if (!status) { BTScanPosInvalidate(*currPos); @@ -1688,17 +1946,16 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, * indicate success. */ static bool -_bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir) +_bt_parallel_readpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno, + ScanDirection dir) { - BTScanOpaque so = (BTScanOpaque) scan->opaque; + _bt_initialize_more_data(state, dir); - _bt_initialize_more_data(&so->state, dir); - - if (!_bt_readnextpage(scan, &so->state, blkno, dir)) + if (!_bt_readnextpage(scan, state, blkno, dir)) return false; /* Drop the lock, and maybe the pin, on the current page */ - _bt_drop_lock_and_maybe_pin(scan, &so->state.currPos); + _bt_drop_lock_and_maybe_pin(scan, &state->currPos); return true; } diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 9bf453c..cd81490 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -20,16 +20,21 @@ #include "access/nbtree.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "catalog/pg_amop.h" #include "miscadmin.h" #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/syscache.h" typedef struct BTSortArrayContext { FmgrInfo flinfo; + FmgrInfo distflinfo; + FmgrInfo distcmpflinfo; + ScanKey distkey; Oid collation; bool reverse; } BTSortArrayContext; @@ -49,6 +54,9 @@ static void _bt_mark_scankey_required(ScanKey skey); static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, ScanDirection dir, bool *continuescan); +static void _bt_get_distance_cmp_proc(ScanKey distkey, Oid opfamily, Oid leftargtype, + FmgrInfo *finfo, int16 *typlen, bool *typbyval); + /* @@ -445,6 +453,7 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey, { Relation rel = scan->indexRelation; Oid elemtype; + Oid opfamily; RegProcedure cmp_proc; BTSortArrayContext cxt; int last_non_dup; @@ -462,6 +471,53 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey, if (elemtype == InvalidOid) elemtype = rel->rd_opcintype[skey->sk_attno - 1]; + opfamily = rel->rd_opfamily[skey->sk_attno - 1]; + + if (scan->numberOfOrderBys <= 0) + { + cxt.distkey = NULL; + cxt.reverse = reverse; + } + else + { + /* Init procedures for distance calculation and comparison. */ + ScanKey distkey = &scan->orderByData[0]; + ScanKeyData distkey2; + Oid disttype = distkey->sk_subtype; + Oid distopr; + RegProcedure distproc; + + if (!OidIsValid(disttype)) + disttype = rel->rd_opcintype[skey->sk_attno - 1]; + + /* Lookup distance operator in index column's operator family. */ + distopr = get_opfamily_member(opfamily, + elemtype, + disttype, + distkey->sk_strategy); + + if (!OidIsValid(distopr)) + elog(ERROR, "missing operator (%u,%u) for strategy %d in opfamily %u", + elemtype, disttype, BtreeKNNSearchStrategyNumber, opfamily); + + distproc = get_opcode(distopr); + + if (!RegProcedureIsValid(distproc)) + elog(ERROR, "missing code for operator %u", distopr); + + fmgr_info(distproc, &cxt.distflinfo); + + distkey2 = *distkey; + fmgr_info_copy(&distkey2.sk_func, &cxt.distflinfo, CurrentMemoryContext); + distkey2.sk_subtype = disttype; + + _bt_get_distance_cmp_proc(&distkey2, opfamily, elemtype, + &cxt.distcmpflinfo, NULL, NULL); + + cxt.distkey = distkey; + cxt.reverse = false; /* supported only ascending ordering */ + } + /* * Look up the appropriate comparison function in the opfamily. * @@ -470,19 +526,17 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey, * non-cross-type support functions for any datatype that it supports at * all. */ - cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1], + cmp_proc = get_opfamily_proc(opfamily, elemtype, elemtype, BTORDER_PROC); if (!RegProcedureIsValid(cmp_proc)) elog(ERROR, "missing support function %d(%u,%u) in opfamily %u", - BTORDER_PROC, elemtype, elemtype, - rel->rd_opfamily[skey->sk_attno - 1]); + BTORDER_PROC, elemtype, elemtype, opfamily); /* Sort the array elements */ fmgr_info(cmp_proc, &cxt.flinfo); cxt.collation = skey->sk_collation; - cxt.reverse = reverse; qsort_arg((void *) elems, nelems, sizeof(Datum), _bt_compare_array_elements, (void *) &cxt); @@ -514,6 +568,23 @@ _bt_compare_array_elements(const void *a, const void *b, void *arg) BTSortArrayContext *cxt = (BTSortArrayContext *) arg; int32 compare; + if (cxt->distkey) + { + Datum dista = FunctionCall2Coll(&cxt->distflinfo, + cxt->collation, + da, + cxt->distkey->sk_argument); + Datum distb = FunctionCall2Coll(&cxt->distflinfo, + cxt->collation, + db, + cxt->distkey->sk_argument); + bool cmp = DatumGetBool(FunctionCall2Coll(&cxt->distcmpflinfo, + cxt->collation, + dista, + distb)); + return cmp ? -1 : 1; + } + compare = DatumGetInt32(FunctionCall2Coll(&cxt->flinfo, cxt->collation, da, db)); @@ -2075,6 +2146,39 @@ btproperty(Oid index_oid, int attno, *res = true; return true; + case AMPROP_DISTANCE_ORDERABLE: + { + Oid opclass, + opfamily, + opcindtype; + + /* answer only for columns, not AM or whole index */ + if (attno == 0) + return false; + + opclass = get_index_column_opclass(index_oid, attno); + + if (!OidIsValid(opclass)) + { + *res = false; /* non-key attribute */ + return true; + } + + if (!get_opclass_opfamily_and_input_type(opclass, + &opfamily, &opcindtype)) + { + *isnull = true; + return true; + } + + *res = SearchSysCacheExists(AMOPSTRATEGY, + ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(opcindtype), + ObjectIdGetDatum(opcindtype), + Int16GetDatum(BtreeKNNSearchStrategyNumber)); + return true; + } + default: return false; /* punt to generic code */ } @@ -2223,3 +2327,264 @@ _bt_allocate_tuple_workspaces(BTScanState state) state->currTuples = (char *) palloc(BLCKSZ * 2); state->markTuples = state->currTuples + BLCKSZ; } + +static bool +_bt_compare_row_key_with_ordering_key(ScanKey row, ScanKey ord, bool *result) +{ + ScanKey subkey = (ScanKey) DatumGetPointer(row->sk_argument); + int32 cmpresult; + + Assert(subkey->sk_attno == 1); + Assert(subkey->sk_flags & SK_ROW_MEMBER); + + if (subkey->sk_flags & SK_ISNULL) + return false; + + /* Perform the test --- three-way comparison not bool operator */ + cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func, + subkey->sk_collation, + ord->sk_argument, + subkey->sk_argument)); + + if (subkey->sk_flags & SK_BT_DESC) + cmpresult = -cmpresult; + + /* + * At this point cmpresult indicates the overall result of the row + * comparison, and subkey points to the deciding column (or the last + * column if the result is "="). + */ + switch (subkey->sk_strategy) + { + /* EQ and NE cases aren't allowed here */ + case BTLessStrategyNumber: + *result = cmpresult < 0; + break; + case BTLessEqualStrategyNumber: + *result = cmpresult <= 0; + break; + case BTGreaterEqualStrategyNumber: + *result = cmpresult >= 0; + break; + case BTGreaterStrategyNumber: + *result = cmpresult > 0; + break; + default: + elog(ERROR, "unrecognized RowCompareType: %d", + (int) subkey->sk_strategy); + *result = false; /* keep compiler quiet */ + } + + return true; +} + +/* _bt_select_knn_search_strategy() -- Determine which KNN scan strategy to use: + * bidirectional or unidirectional. We are checking here if the + * ordering scankey argument falls into the scan range: if it falls + * we must use bidirectional scan, otherwise we use unidirectional. + * + * Returns BtreeKNNSearchStrategyNumber for bidirectional scan or + * strategy number of non-matched scankey for unidirectional. + */ +static StrategyNumber +_bt_select_knn_search_strategy(IndexScanDesc scan, ScanKey ord) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + ScanKey cond; + + for (cond = so->keyData; cond < so->keyData + so->numberOfKeys; cond++) + { + bool result; + + if (cond->sk_attno != 1) + break; /* only interesting in the first index attribute */ + + if (cond->sk_strategy == BTEqualStrategyNumber) + /* always use simple unidirectional scan for equals operators */ + return BTEqualStrategyNumber; + + if (cond->sk_flags & SK_ROW_HEADER) + { + if (!_bt_compare_row_key_with_ordering_key(cond, ord, &result)) + return BTEqualStrategyNumber; /* ROW(fist_index_attr, ...) IS NULL */ + } + else + { + if (!_bt_compare_scankey_args(scan, cond, ord, cond, &result)) + elog(ERROR, "could not compare ordering key"); + } + + if (!result) + /* + * Ordering scankey argument is out of scan range, + * use unidirectional scan. + */ + return cond->sk_strategy; + } + + return BtreeKNNSearchStrategyNumber; /* use bidirectional scan */ +} + +static Oid +_bt_get_sortfamily_for_opfamily_op(Oid opfamily, Oid lefttype, Oid righttype, + StrategyNumber strategy) +{ + HeapTuple tp; + Form_pg_amop amop_tup; + Oid sortfamily; + + tp = SearchSysCache4(AMOPSTRATEGY, + ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(lefttype), + ObjectIdGetDatum(righttype), + Int16GetDatum(strategy)); + if (!HeapTupleIsValid(tp)) + return InvalidOid; + amop_tup = (Form_pg_amop) GETSTRUCT(tp); + sortfamily = amop_tup->amopsortfamily; + ReleaseSysCache(tp); + + return sortfamily; +} + +/* + * _bt_get_distance_cmp_proc() -- Init procedure for comparsion of distances + * between "leftargtype" and "distkey". + */ +static void +_bt_get_distance_cmp_proc(ScanKey distkey, Oid opfamily, Oid leftargtype, + FmgrInfo *finfo, int16 *typlen, bool *typbyval) +{ + RegProcedure opcode; + Oid sortfamily; + Oid opno; + Oid distanceType; + + distanceType = get_func_rettype(distkey->sk_func.fn_oid); + + sortfamily = _bt_get_sortfamily_for_opfamily_op(opfamily, leftargtype, + distkey->sk_subtype, + distkey->sk_strategy); + + if (!OidIsValid(sortfamily)) + elog(ERROR, "could not find sort family for btree ordering operator"); + + opno = get_opfamily_member(sortfamily, + distanceType, + distanceType, + BTLessEqualStrategyNumber); + + if (!OidIsValid(opno)) + elog(ERROR, "could not find operator for btree distance comparison"); + + opcode = get_opcode(opno); + + if (!RegProcedureIsValid(opcode)) + elog(ERROR, + "could not find procedure for btree distance comparison operator"); + + fmgr_info(opcode, finfo); + + if (typlen) + get_typlenbyval(distanceType, typlen, typbyval); +} + +/* + * _bt_init_distance_comparison() -- Init distance typlen/typbyval and its + * comparison procedure. + */ +static void +_bt_init_distance_comparison(IndexScanDesc scan, ScanKey ord) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Relation rel = scan->indexRelation; + + _bt_get_distance_cmp_proc(ord, + rel->rd_opfamily[ord->sk_attno - 1], + rel->rd_opcintype[ord->sk_attno - 1], + &so->distanceCmpProc, + &so->distanceTypeLen, + &so->distanceTypeByVal); + + if (!so->distanceTypeByVal) + { + so->state.currDistance = PointerGetDatum(NULL); + so->state.markDistance = PointerGetDatum(NULL); + } +} + +/* + * _bt_process_orderings() -- Process ORDER BY distance scankeys and + * select corresponding KNN strategy. + * + * If bidirectional scan is selected then one scankey is initialized + * using bufKeys and placed into startKeys/keysCount, true is returned. + * + * Otherwise, so->scanDirection is set and false is returned. + */ +bool +_bt_process_orderings(IndexScanDesc scan, ScanKey *startKeys, int *keysCount, + ScanKeyData bufKeys[]) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + ScanKey ord = scan->orderByData; + + if (scan->numberOfOrderBys > 1 || ord->sk_attno != 1) + /* it should not happen, see btmatchorderby() */ + elog(ERROR, "only one btree ordering operator " + "for the first index column is supported"); + + Assert(ord->sk_strategy == BtreeKNNSearchStrategyNumber); + + switch (_bt_select_knn_search_strategy(scan, ord)) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + /* + * Ordering key argument is greater than all values in scan range. + * select backward scan direction. + */ + so->scanDirection = BackwardScanDirection; + return false; + + case BTEqualStrategyNumber: + /* Use default unidirectional scan direction. */ + return false; + + case BTGreaterEqualStrategyNumber: + case BTGreaterStrategyNumber: + /* + * Ordering key argument is lesser than all values in scan range. + * select forward scan direction. + */ + so->scanDirection = ForwardScanDirection; + return false; + + case BtreeKNNSearchStrategyNumber: + /* + * Ordering key argument falls into scan range, + * use bidirectional scan. + */ + break; + } + + _bt_init_distance_comparison(scan, ord); + + /* Init btree search key with ordering key argument. */ + ScanKeyEntryInitialize(&bufKeys[0], + (scan->indexRelation->rd_indoption[ord->sk_attno - 1] << + SK_BT_INDOPTION_SHIFT) | + SK_ORDER_BY | + SK_SEARCHNULL /* only for invalid procedure oid, see + * assert in ScanKeyEntryInitialize */, + ord->sk_attno, + BtreeKNNSearchStrategyNumber, + ord->sk_subtype, + ord->sk_collation, + InvalidOid, + ord->sk_argument); + + startKeys[(*keysCount)++] = &bufKeys[0]; + + return true; +} diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c index f24091c..be4e843 100644 --- a/src/backend/access/nbtree/nbtvalidate.c +++ b/src/backend/access/nbtree/nbtvalidate.c @@ -22,9 +22,17 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" #include "utils/regproc.h" #include "utils/syscache.h" +#define BTRequiredOperatorSet \ + ((1 << BTLessStrategyNumber) | \ + (1 << BTLessEqualStrategyNumber) | \ + (1 << BTEqualStrategyNumber) | \ + (1 << BTGreaterEqualStrategyNumber) | \ + (1 << BTGreaterStrategyNumber)) + /* * Validator for a btree opclass. @@ -132,10 +140,11 @@ btvalidate(Oid opclassoid) { HeapTuple oprtup = &oprlist->members[i]->tuple; Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup); + Oid op_rettype; /* Check that only allowed strategy numbers exist */ if (oprform->amopstrategy < 1 || - oprform->amopstrategy > BTMaxStrategyNumber) + oprform->amopstrategy > BtreeMaxStrategyNumber) { ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -146,20 +155,29 @@ btvalidate(Oid opclassoid) result = false; } - /* btree doesn't support ORDER BY operators */ - if (oprform->amoppurpose != AMOP_SEARCH || - OidIsValid(oprform->amopsortfamily)) + /* btree supports ORDER BY operators */ + if (oprform->amoppurpose != AMOP_SEARCH) { - ereport(INFO, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s", - opfamilyname, "btree", - format_operator(oprform->amopopr)))); - result = false; + /* ... and operator result must match the claimed btree opfamily */ + op_rettype = get_op_rettype(oprform->amopopr); + if (!opfamily_can_sort_type(oprform->amopsortfamily, op_rettype)) + { + ereport(INFO, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s", + opfamilyname, "btree", + format_operator(oprform->amopopr)))); + result = false; + } + } + else + { + /* Search operators must always return bool */ + op_rettype = BOOLOID; } /* Check operator signature --- same for all btree strategies */ - if (!check_amop_signature(oprform->amopopr, BOOLOID, + if (!check_amop_signature(oprform->amopopr, op_rettype, oprform->amoplefttype, oprform->amoprighttype)) { @@ -214,12 +232,8 @@ btvalidate(Oid opclassoid) * or support functions for this datatype pair. The only things * considered optional are the sortsupport and in_range functions. */ - if (thisgroup->operatorset != - ((1 << BTLessStrategyNumber) | - (1 << BTLessEqualStrategyNumber) | - (1 << BTEqualStrategyNumber) | - (1 << BTGreaterEqualStrategyNumber) | - (1 << BTGreaterStrategyNumber))) + if ((thisgroup->operatorset & BTRequiredOperatorSet) != + BTRequiredOperatorSet) { ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index e043664..7c9b97b 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -988,6 +988,10 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, * if we are only trying to build bitmap indexscans, nor if we have to * assume the scan is unordered. */ + useful_pathkeys = NIL; + orderbyclauses = NIL; + orderbyclausecols = NIL; + pathkeys_possibly_useful = (scantype != ST_BITMAPSCAN && !found_lower_saop_clause && has_useful_pathkeys(root, rel)); @@ -998,10 +1002,10 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, ForwardScanDirection); useful_pathkeys = truncate_useless_pathkeys(root, rel, index_pathkeys); - orderbyclauses = NIL; - orderbyclausecols = NIL; } - else if (index->ammatchorderby && pathkeys_possibly_useful) + + if (useful_pathkeys == NIL && + index->ammatchorderby && pathkeys_possibly_useful) { /* see if we can generate ordering operators for query_pathkeys */ match_pathkeys_to_index(index, root->query_pathkeys, @@ -1012,12 +1016,6 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, else useful_pathkeys = NIL; } - else - { - useful_pathkeys = NIL; - orderbyclauses = NIL; - orderbyclausecols = NIL; - } /* * 3. Check if an index-only scan is possible. If we're not building diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 388b311..9c8a384 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -460,6 +460,12 @@ typedef struct BTScanStateData /* keep these last in struct for efficiency */ BTScanPosData currPos; /* current position data */ BTScanPosData markPos; /* marked position, if any */ + + /* KNN-search fields: */ + Datum currDistance; /* current distance */ + Datum markDistance; /* marked distance */ + bool currIsNull; /* current item is NULL */ + bool markIsNull; /* marked item is NULL */ } BTScanStateData, *BTScanState; typedef struct BTScanOpaqueData @@ -478,7 +484,17 @@ typedef struct BTScanOpaqueData BTArrayKeyInfo *arrayKeys; /* info about each equality-type array key */ MemoryContext arrayContext; /* scan-lifespan context for array data */ - BTScanStateData state; + BTScanStateData state; /* main scan state */ + + /* KNN-search fields: */ + BTScanState knnState; /* optional scan state for KNN search */ + ScanDirection scanDirection; /* selected scan direction for + * unidirectional KNN scan */ + FmgrInfo distanceCmpProc; /* distance comparison procedure */ + int16 distanceTypeLen; /* distance typlen */ + bool distanceTypeByVal; /* distance typebyval */ + bool currRightIsNearest; /* current right item is nearest */ + bool markRightIsNearest; /* marked right item is nearest */ } BTScanOpaqueData; typedef BTScanOpaqueData *BTScanOpaque; @@ -524,11 +540,12 @@ extern bool btcanreturn(Relation index, int attno); /* * prototypes for internal functions in nbtree.c */ -extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno); -extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page); -extern void _bt_parallel_done(IndexScanDesc scan); +extern bool _bt_parallel_seize(IndexScanDesc scan, BTScanState state, BlockNumber *pageno); +extern void _bt_parallel_release(IndexScanDesc scan, BTScanState state, BlockNumber scan_page); +extern void _bt_parallel_done(IndexScanDesc scan, BTScanState state); extern void _bt_parallel_advance_array_keys(IndexScanDesc scan); + /* * prototypes for functions in nbtinsert.c */ @@ -609,6 +626,8 @@ extern bool btproperty(Oid index_oid, int attno, extern IndexTuple _bt_nonkey_truncate(Relation rel, IndexTuple itup); extern bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum); extern void _bt_allocate_tuple_workspaces(BTScanState state); +extern bool _bt_process_orderings(IndexScanDesc scan, + ScanKey *startKeys, int *keysCount, ScanKeyData bufKeys[]); /* * prototypes for functions in nbtvalidate.c diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h index 0db11a1..f44b41a 100644 --- a/src/include/access/stratnum.h +++ b/src/include/access/stratnum.h @@ -32,7 +32,10 @@ typedef uint16 StrategyNumber; #define BTGreaterEqualStrategyNumber 4 #define BTGreaterStrategyNumber 5 -#define BTMaxStrategyNumber 5 +#define BTMaxStrategyNumber 5 /* number of canonical B-tree strategies */ + +#define BtreeKNNSearchStrategyNumber 6 /* for <-> (distance) */ +#define BtreeMaxStrategyNumber 6 /* number of extended B-tree strategies */ /* diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index 6faa9d7..c75ef39 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -347,10 +347,10 @@ ROLLBACK; CREATE OPERATOR FAMILY alt_opf4 USING btree; ALTER OPERATOR FAMILY alt_opf4 USING invalid_index_method ADD OPERATOR 1 < (int4, int2); -- invalid indexing_method ERROR: access method "invalid_index_method" does not exist -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 6 < (int4, int2); -- operator number should be between 1 and 5 -ERROR: invalid operator number 6, must be between 1 and 5 -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 5 -ERROR: invalid operator number 0, must be between 1 and 5 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 7 < (int4, int2); -- operator number should be between 1 and 6 +ERROR: invalid operator number 7, must be between 1 and 6 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 6 +ERROR: invalid operator number 0, must be between 1 and 6 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types ERROR: operator argument types must be specified in ALTER OPERATOR FAMILY ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- function number should be between 1 and 5 @@ -397,11 +397,12 @@ DROP OPERATOR FAMILY alt_opf8 USING btree; CREATE OPERATOR FAMILY alt_opf9 USING gist; ALTER OPERATOR FAMILY alt_opf9 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; DROP OPERATOR FAMILY alt_opf9 USING gist; --- Should fail. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +-- Should work. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +BEGIN TRANSACTION; CREATE OPERATOR FAMILY alt_opf10 USING btree; ALTER OPERATOR FAMILY alt_opf10 USING btree ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; -ERROR: access method "btree" does not support ordering operators DROP OPERATOR FAMILY alt_opf10 USING btree; +ROLLBACK; -- Should work. Textbook case of ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR ORDER BY CREATE OPERATOR FAMILY alt_opf11 USING gist; ALTER OPERATOR FAMILY alt_opf11 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index 84fd900..73e6e206 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -295,8 +295,8 @@ ROLLBACK; -- Should fail. Invalid values for ALTER OPERATOR FAMILY .. ADD / DROP CREATE OPERATOR FAMILY alt_opf4 USING btree; ALTER OPERATOR FAMILY alt_opf4 USING invalid_index_method ADD OPERATOR 1 < (int4, int2); -- invalid indexing_method -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 6 < (int4, int2); -- operator number should be between 1 and 5 -ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 5 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 7 < (int4, int2); -- operator number should be between 1 and 6 +ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 6 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- function number should be between 1 and 5 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 6 btint42cmp(int4, int2); -- function number should be between 1 and 5 @@ -340,10 +340,12 @@ CREATE OPERATOR FAMILY alt_opf9 USING gist; ALTER OPERATOR FAMILY alt_opf9 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; DROP OPERATOR FAMILY alt_opf9 USING gist; --- Should fail. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +-- Should work. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY +BEGIN TRANSACTION; CREATE OPERATOR FAMILY alt_opf10 USING btree; ALTER OPERATOR FAMILY alt_opf10 USING btree ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops; DROP OPERATOR FAMILY alt_opf10 USING btree; +ROLLBACK; -- Should work. Textbook case of ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR ORDER BY CREATE OPERATOR FAMILY alt_opf11 USING gist;