diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml
index 8bd0bad..ed9625e 100644
--- a/doc/src/sgml/btree.sgml
+++ b/doc/src/sgml/btree.sgml
@@ -200,6 +200,20 @@
planner relies on them for optimization purposes.
+
+ FIXME!!!
+ To implement the distance ordered (nearest-neighbor) search, we only need
+ to define a distance operator (usually it called <->) with a correpsonding
+ operator family for distance comparison in the index's operator class.
+ These operators must satisfy the following assumptions for all non-null
+ values A,B,C of the datatype:
+
+ A <-> B = B <-> A symmetric law
+ if A = B, then A <-> C = B <-> C distance equivalence
+ if (A <= B and B <= C) or (A >= B and B >= C),
+ then A <-> B <= A <-> C monotonicity
+
+
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml
index df7d16f..9015557 100644
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -175,6 +175,17 @@ CREATE INDEX test1_id_index ON test1 (id);
+ B-tree indexes are also capable of optimizing nearest-neighbor>
+ searches, such as
+ date '2017-05-05' LIMIT 10;
+]]>
+
+ which finds the ten events closest to a given target date. The ability
+ to do this is again dependent on the particular operator class being used.
+
+
+
index
hash
diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml
index 9446f8b..93094bc 100644
--- a/doc/src/sgml/xindex.sgml
+++ b/doc/src/sgml/xindex.sgml
@@ -1242,7 +1242,8 @@ SELECT sum(x) OVER (ORDER BY x RANGE BETWEEN 5 PRECEDING AND 10 FOLLOWING)
Ordering Operators
- Some index access methods (currently, only GiST and SP-GiST) support the concept of
+ Some index access methods (currently, only B-tree, GiST and SP-GiST)
+ support the concept of
ordering operators. What we have been discussing so far
are search operators. A search operator is one for which
the index can be searched to find all rows satisfying
diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README
index 3680e69..3f7e1b1 100644
--- a/src/backend/access/nbtree/README
+++ b/src/backend/access/nbtree/README
@@ -659,3 +659,20 @@ routines must treat it accordingly. The actual key stored in the
item is irrelevant, and need not be stored at all. This arrangement
corresponds to the fact that an L&Y non-leaf page has one more pointer
than key.
+
+Nearest-neighbor search
+-----------------------
+
+There is a special scan strategy for nearest-neighbor (kNN) search,
+that is used in queries with ORDER BY distance clauses like this:
+SELECT * FROM tab WHERE col > const1 ORDER BY col <-> const2 LIMIT k.
+But, unlike GiST, B-tree supports only a one ordering operator on the
+first index column.
+
+At the beginning of kNN scan, we need to determine which strategy we
+will use --- a special bidirectional or a ordinary unidirectional.
+If the point from which we measure the distance falls into the scan range,
+we use bidirectional scan starting from this point, else we use simple
+unidirectional scan in the right direction. Algorithm of a bidirectional
+scan is very simple: at each step we advancing scan in that direction,
+which has the nearest point.
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 55c7833..9270a85 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -25,6 +25,9 @@
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
+#include "nodes/primnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/paths.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/condition_variable.h"
@@ -33,6 +36,7 @@
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
+#include "utils/datum.h"
#include "utils/index_selfuncs.h"
#include "utils/memutils.h"
@@ -79,6 +83,7 @@ typedef enum
typedef struct BTParallelScanDescData
{
BlockNumber btps_scanPage; /* latest or next page to be scanned */
+ BlockNumber btps_knnScanPage; /* secondary latest or next page to be scanned */
BTPS_State btps_pageStatus; /* indicates whether next page is
* available for scan. see above for
* possible states of parallel scan. */
@@ -97,6 +102,9 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
BlockNumber orig_blkno);
+static bool btmatchorderby(IndexOptInfo *index, List *pathkeys,
+ List **orderby_clauses_p, List **clause_columns_p);
+
/*
* Btree handler function: return IndexAmRoutine with access method parameters
@@ -107,7 +115,7 @@ bthandler(PG_FUNCTION_ARGS)
{
IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
- amroutine->amstrategies = BTMaxStrategyNumber;
+ amroutine->amstrategies = BtreeMaxStrategyNumber;
amroutine->amsupport = BTNProcs;
amroutine->amcanorder = true;
amroutine->amcanbackward = true;
@@ -143,7 +151,7 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->amestimateparallelscan = btestimateparallelscan;
amroutine->aminitparallelscan = btinitparallelscan;
amroutine->amparallelrescan = btparallelrescan;
- amroutine->ammatchorderby = NULL;
+ amroutine->ammatchorderby = btmatchorderby;
PG_RETURN_POINTER(amroutine);
}
@@ -215,23 +223,30 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
BTScanState state = &so->state;
+ ScanDirection arraydir =
+ scan->numberOfOrderBys > 0 ? ForwardScanDirection : dir;
bool res;
/* btree indexes are never lossy */
scan->xs_recheck = false;
+ scan->xs_recheckorderby = false;
+
+ if (so->scanDirection != NoMovementScanDirection)
+ dir = so->scanDirection;
/*
* If we have any array keys, initialize them during first call for a
* scan. We can't do this in btrescan because we don't know the scan
* direction at that time.
*/
- if (so->numArrayKeys && !BTScanPosIsValid(state->currPos))
+ if (so->numArrayKeys && !BTScanPosIsValid(state->currPos) &&
+ (!so->knnState || !BTScanPosIsValid(so->knnState->currPos)))
{
/* punt if we have any unsatisfiable array keys */
if (so->numArrayKeys < 0)
return false;
- _bt_start_array_keys(scan, dir);
+ _bt_start_array_keys(scan, arraydir);
}
/* This loop handles advancing to the next array elements, if any */
@@ -242,7 +257,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
* the appropriate direction. If we haven't done so yet, we call
* _bt_first() to get the first item in the scan.
*/
- if (!BTScanPosIsValid(state->currPos))
+ if (!BTScanPosIsValid(state->currPos) &&
+ (!so->knnState || !BTScanPosIsValid(so->knnState->currPos)))
res = _bt_first(scan, dir);
else
{
@@ -277,7 +293,7 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
if (res)
break;
/* ... otherwise see if we have more array keys to deal with */
- } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir));
+ } while (so->numArrayKeys && _bt_advance_array_keys(scan, arraydir));
return res;
}
@@ -350,9 +366,6 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
IndexScanDesc scan;
BTScanOpaque so;
- /* no order by operators allowed */
- Assert(norderbys == 0);
-
/* get the scan */
scan = RelationGetIndexScan(rel, nkeys, norderbys);
@@ -379,6 +392,9 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
* scan->xs_itupdesc whether we'll need it or not, since that's so cheap.
*/
so->state.currTuples = so->state.markTuples = NULL;
+ so->knnState = NULL;
+ so->distanceTypeByVal = true;
+ so->scanDirection = NoMovementScanDirection;
scan->xs_itupdesc = RelationGetDescr(rel);
@@ -408,6 +424,8 @@ _bt_release_current_position(BTScanState state, Relation indexRelation,
static void
_bt_release_scan_state(IndexScanDesc scan, BTScanState state, bool free)
{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+
/* No need to invalidate positions, if the RAM is about to be freed. */
_bt_release_current_position(state, scan->indexRelation, !free);
@@ -424,6 +442,17 @@ _bt_release_scan_state(IndexScanDesc scan, BTScanState state, bool free)
}
else
BTScanPosInvalidate(state->markPos);
+
+ if (!so->distanceTypeByVal)
+ {
+ if (DatumGetPointer(state->currDistance))
+ pfree(DatumGetPointer(state->currDistance));
+ state->currDistance = PointerGetDatum(NULL);
+
+ if (DatumGetPointer(state->markDistance))
+ pfree(DatumGetPointer(state->markDistance));
+ state->markDistance = PointerGetDatum(NULL);
+ }
}
/*
@@ -438,6 +467,13 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
_bt_release_scan_state(scan, state, false);
+ if (so->knnState)
+ {
+ _bt_release_scan_state(scan, so->knnState, true);
+ pfree(so->knnState);
+ so->knnState = NULL;
+ }
+
so->arrayKeyCount = 0; /* FIXME in _bt_release_scan_state */
/*
@@ -469,6 +505,14 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
scan->numberOfKeys * sizeof(ScanKeyData));
so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
+ if (orderbys && scan->numberOfOrderBys > 0)
+ memmove(scan->orderByData,
+ orderbys,
+ scan->numberOfOrderBys * sizeof(ScanKeyData));
+
+ so->scanDirection = NoMovementScanDirection;
+ so->distanceTypeByVal = true;
+
/* If any keys are SK_SEARCHARRAY type, set up array-key info */
_bt_preprocess_array_keys(scan);
}
@@ -483,6 +527,12 @@ btendscan(IndexScanDesc scan)
_bt_release_scan_state(scan, &so->state, true);
+ if (so->knnState)
+ {
+ _bt_release_scan_state(scan, so->knnState, true);
+ pfree(so->knnState);
+ }
+
/* Release storage */
if (so->keyData != NULL)
pfree(so->keyData);
@@ -494,7 +544,7 @@ btendscan(IndexScanDesc scan)
}
static void
-_bt_mark_current_position(BTScanState state)
+_bt_mark_current_position(BTScanOpaque so, BTScanState state)
{
/* There may be an old mark with a pin (but no lock). */
BTScanPosUnpinIfPinned(state->markPos);
@@ -512,6 +562,21 @@ _bt_mark_current_position(BTScanState state)
BTScanPosInvalidate(state->markPos);
state->markItemIndex = -1;
}
+
+ if (so->knnState)
+ {
+ if (!so->distanceTypeByVal)
+ pfree(DatumGetPointer(state->markDistance));
+
+ state->markIsNull = !BTScanPosIsValid(state->currPos) ||
+ state->currIsNull;
+
+ state->markDistance =
+ state->markIsNull ? PointerGetDatum(NULL)
+ : datumCopy(state->currDistance,
+ so->distanceTypeByVal,
+ so->distanceTypeLen);
+ }
}
/*
@@ -522,7 +587,13 @@ btmarkpos(IndexScanDesc scan)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
- _bt_mark_current_position(&so->state);
+ _bt_mark_current_position(so, &so->state);
+
+ if (so->knnState)
+ {
+ _bt_mark_current_position(so, so->knnState);
+ so->markRightIsNearest = so->currRightIsNearest;
+ }
/* Also record the current positions of any array keys */
if (so->numArrayKeys)
@@ -532,6 +603,8 @@ btmarkpos(IndexScanDesc scan)
static void
_bt_restore_marked_position(IndexScanDesc scan, BTScanState state)
{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+
if (state->markItemIndex >= 0)
{
/*
@@ -567,6 +640,19 @@ _bt_restore_marked_position(IndexScanDesc scan, BTScanState state)
state->markPos.nextTupleOffset);
}
}
+
+ if (so->knnState)
+ {
+ if (!so->distanceTypeByVal)
+ pfree(DatumGetPointer(state->currDistance));
+
+ state->currIsNull = state->markIsNull;
+ state->currDistance =
+ state->markIsNull ? PointerGetDatum(NULL)
+ : datumCopy(state->markDistance,
+ so->distanceTypeByVal,
+ so->distanceTypeLen);
+ }
}
/*
@@ -588,6 +674,7 @@ btinitparallelscan(void *target)
SpinLockInit(&bt_target->btps_mutex);
bt_target->btps_scanPage = InvalidBlockNumber;
+ bt_target->btps_knnScanPage = InvalidBlockNumber;
bt_target->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
bt_target->btps_arrayKeyCount = 0;
ConditionVariableInit(&bt_target->btps_cv);
@@ -614,6 +701,7 @@ btparallelrescan(IndexScanDesc scan)
*/
SpinLockAcquire(&btscan->btps_mutex);
btscan->btps_scanPage = InvalidBlockNumber;
+ btscan->btps_knnScanPage = InvalidBlockNumber;
btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
btscan->btps_arrayKeyCount = 0;
SpinLockRelease(&btscan->btps_mutex);
@@ -638,7 +726,7 @@ btparallelrescan(IndexScanDesc scan)
* Callers should ignore the value of pageno if the return value is false.
*/
bool
-_bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
+_bt_parallel_seize(IndexScanDesc scan, BTScanState state, BlockNumber *pageno)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
BTPS_State pageStatus;
@@ -646,12 +734,17 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
bool status = true;
ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
BTParallelScanDesc btscan;
+ BlockNumber *scanPage;
*pageno = P_NONE;
btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
parallel_scan->ps_offset);
+ scanPage = state == &so->state
+ ? &btscan->btps_scanPage
+ : &btscan->btps_knnScanPage;
+
while (1)
{
SpinLockAcquire(&btscan->btps_mutex);
@@ -677,7 +770,7 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
* of advancing it to a new page!
*/
btscan->btps_pageStatus = BTPARALLEL_ADVANCING;
- *pageno = btscan->btps_scanPage;
+ *pageno = *scanPage;
exit_loop = true;
}
SpinLockRelease(&btscan->btps_mutex);
@@ -696,19 +789,42 @@ _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno)
* can now begin advancing the scan.
*/
void
-_bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page)
+_bt_parallel_release(IndexScanDesc scan, BTScanState state,
+ BlockNumber scan_page)
{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
BTParallelScanDesc btscan;
+ BlockNumber *scanPage;
+ BlockNumber *otherScanPage;
+ bool status_changed = false;
+ bool knnScan = so->knnState != NULL;
btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
parallel_scan->ps_offset);
+ if (!state || state == &so->state)
+ {
+ scanPage = &btscan->btps_scanPage;
+ otherScanPage = &btscan->btps_knnScanPage;
+ }
+ else
+ {
+ scanPage = &btscan->btps_knnScanPage;
+ otherScanPage = &btscan->btps_scanPage;
+ }
SpinLockAcquire(&btscan->btps_mutex);
- btscan->btps_scanPage = scan_page;
- btscan->btps_pageStatus = BTPARALLEL_IDLE;
+ *scanPage = scan_page;
+ /* switch to idle state only if both KNN pages are initialized */
+ if (!knnScan || *otherScanPage != InvalidBlockNumber)
+ {
+ btscan->btps_pageStatus = BTPARALLEL_IDLE;
+ status_changed = true;
+ }
SpinLockRelease(&btscan->btps_mutex);
- ConditionVariableSignal(&btscan->btps_cv);
+
+ if (status_changed)
+ ConditionVariableSignal(&btscan->btps_cv);
}
/*
@@ -719,12 +835,15 @@ _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page)
* advance to the next page.
*/
void
-_bt_parallel_done(IndexScanDesc scan)
+_bt_parallel_done(IndexScanDesc scan, BTScanState state)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
ParallelIndexScanDesc parallel_scan = scan->parallel_scan;
BTParallelScanDesc btscan;
+ BlockNumber *scanPage;
+ BlockNumber *otherScanPage;
bool status_changed = false;
+ bool knnScan = so->knnState != NULL;
/* Do nothing, for non-parallel scans */
if (parallel_scan == NULL)
@@ -733,18 +852,41 @@ _bt_parallel_done(IndexScanDesc scan)
btscan = (BTParallelScanDesc) OffsetToPointer((void *) parallel_scan,
parallel_scan->ps_offset);
+ if (!state || state == &so->state)
+ {
+ scanPage = &btscan->btps_scanPage;
+ otherScanPage = &btscan->btps_knnScanPage;
+ }
+ else
+ {
+ scanPage = &btscan->btps_knnScanPage;
+ otherScanPage = &btscan->btps_scanPage;
+ }
+
/*
* Mark the parallel scan as done for this combination of scan keys,
* unless some other process already did so. See also
* _bt_advance_array_keys.
*/
SpinLockAcquire(&btscan->btps_mutex);
- if (so->arrayKeyCount >= btscan->btps_arrayKeyCount &&
- btscan->btps_pageStatus != BTPARALLEL_DONE)
+
+ Assert(btscan->btps_pageStatus == BTPARALLEL_ADVANCING);
+
+ if (so->arrayKeyCount >= btscan->btps_arrayKeyCount)
{
- btscan->btps_pageStatus = BTPARALLEL_DONE;
+ *scanPage = P_NONE;
status_changed = true;
+
+ /* switch to "done" state only if both KNN scans are done */
+ if (!knnScan || *otherScanPage == P_NONE)
+ btscan->btps_pageStatus = BTPARALLEL_DONE;
+ /* else switch to "idle" state only if both KNN scans are initialized */
+ else if (*otherScanPage != InvalidBlockNumber)
+ btscan->btps_pageStatus = BTPARALLEL_IDLE;
+ else
+ status_changed = false;
}
+
SpinLockRelease(&btscan->btps_mutex);
/* wake up all the workers associated with this parallel scan */
@@ -774,6 +916,7 @@ _bt_parallel_advance_array_keys(IndexScanDesc scan)
if (btscan->btps_pageStatus == BTPARALLEL_DONE)
{
btscan->btps_scanPage = InvalidBlockNumber;
+ btscan->btps_knnScanPage = InvalidBlockNumber;
btscan->btps_pageStatus = BTPARALLEL_NOT_INITIALIZED;
btscan->btps_arrayKeyCount++;
}
@@ -859,6 +1002,12 @@ btrestrpos(IndexScanDesc scan)
_bt_restore_array_keys(scan);
_bt_restore_marked_position(scan, &so->state);
+
+ if (so->knnState)
+ {
+ _bt_restore_marked_position(scan, so->knnState);
+ so->currRightIsNearest = so->markRightIsNearest;
+ }
}
/*
@@ -1394,3 +1543,30 @@ btcanreturn(Relation index, int attno)
{
return true;
}
+
+/*
+ * btmatchorderby() -- Check whether KNN-search strategy is applicable to
+ * the given ORDER BY distance operator.
+ */
+static bool
+btmatchorderby(IndexOptInfo *index, List *pathkeys,
+ List **orderby_clauses_p, List **clause_columns_p)
+{
+ Expr *expr;
+ /* ORDER BY distance to the first index column is only supported */
+ int indexcol = 0;
+
+ if (list_length(pathkeys) != 1)
+ return false; /* only one ORDER BY clause is supported */
+
+ expr = match_orderbyop_pathkey(index, castNode(PathKey, linitial(pathkeys)),
+ &indexcol);
+
+ if (!expr)
+ return false;
+
+ *orderby_clauses_p = lappend(*orderby_clauses_p, expr);
+ *clause_columns_p = lappend_int(*clause_columns_p, 0);
+
+ return true;
+}
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 2b63e0c..6b58dd52 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -32,12 +32,14 @@ static void _bt_saveitem(BTScanState state, int itemIndex,
static bool _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir);
static bool _bt_readnextpage(IndexScanDesc scan, BTScanState state,
BlockNumber blkno, ScanDirection dir);
-static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno,
- ScanDirection dir);
+static bool _bt_parallel_readpage(IndexScanDesc scan, BTScanState state,
+ BlockNumber blkno, ScanDirection dir);
static Buffer _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot);
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
static inline void _bt_initialize_more_data(BTScanState state, ScanDirection dir);
+static BTScanState _bt_alloc_knn_scan(IndexScanDesc scan);
+static bool _bt_start_knn_scan(IndexScanDesc scan, bool left, bool right);
/*
@@ -598,6 +600,156 @@ _bt_load_first_page(IndexScanDesc scan, BTScanState state, ScanDirection dir,
}
/*
+ * _bt_calc_current_dist() -- Calculate distance from the current item
+ * of the scan state to the target order-by ScanKey argument.
+ */
+static void
+_bt_calc_current_dist(IndexScanDesc scan, BTScanState state)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ BTScanPosItem *currItem = &state->currPos.items[state->currPos.itemIndex];
+ IndexTuple itup = (IndexTuple) (state->currTuples + currItem->tupleOffset);
+ ScanKey scankey = &scan->orderByData[0];
+ Datum value;
+
+ value = index_getattr(itup, 1, scan->xs_itupdesc, &state->currIsNull);
+
+ if (state->currIsNull)
+ return; /* NULL distance */
+
+ value = FunctionCall2Coll(&scankey->sk_func,
+ scankey->sk_collation,
+ value,
+ scankey->sk_argument);
+
+ /* free previous distance value for by-ref types */
+ if (!so->distanceTypeByVal && DatumGetPointer(state->currDistance))
+ pfree(DatumGetPointer(state->currDistance));
+
+ state->currDistance = value;
+}
+
+/*
+ * _bt_compare_current_dist() -- Compare current distances of the left and right scan states.
+ *
+ * NULL distances are considered to be greater than any non-NULL distances.
+ *
+ * Returns true if right distance is lesser than left, otherwise false.
+ */
+static bool
+_bt_compare_current_dist(BTScanOpaque so, BTScanState rstate, BTScanState lstate)
+{
+ if (lstate->currIsNull)
+ return true; /* non-NULL < NULL */
+
+ if (rstate->currIsNull)
+ return false; /* NULL > non-NULL */
+
+ return DatumGetBool(FunctionCall2Coll(&so->distanceCmpProc,
+ InvalidOid, /* XXX collation for distance comparison */
+ rstate->currDistance,
+ lstate->currDistance));
+}
+
+/*
+ * _bt_alloc_knn_scan() -- Allocate additional backward scan state for KNN.
+ */
+static BTScanState
+_bt_alloc_knn_scan(IndexScanDesc scan)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ BTScanState lstate = (BTScanState) palloc(sizeof(BTScanStateData));
+
+ _bt_allocate_tuple_workspaces(lstate);
+
+ if (!scan->xs_want_itup)
+ {
+ /* We need to request index tuples for distance comparison. */
+ scan->xs_want_itup = true;
+ _bt_allocate_tuple_workspaces(&so->state);
+ }
+
+ BTScanPosInvalidate(lstate->currPos);
+ lstate->currPos.moreLeft = false;
+ lstate->currPos.moreRight = false;
+ BTScanPosInvalidate(lstate->markPos);
+ lstate->markItemIndex = -1;
+ lstate->killedItems = NULL;
+ lstate->numKilled = 0;
+ lstate->currDistance = PointerGetDatum(NULL);
+ lstate->markDistance = PointerGetDatum(NULL);
+
+ return so->knnState = lstate;
+}
+
+static bool
+_bt_start_knn_scan(IndexScanDesc scan, bool left, bool right)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ BTScanState rstate; /* right (forward) main scan state */
+ BTScanState lstate; /* additional left (backward) KNN scan state */
+
+ if (!left && !right)
+ return false; /* empty result */
+
+ rstate = &so->state;
+ lstate = so->knnState;
+
+ if (left && right)
+ {
+ /*
+ * We have found items in both scan directions,
+ * determine nearest item to return.
+ */
+ _bt_calc_current_dist(scan, rstate);
+ _bt_calc_current_dist(scan, lstate);
+ so->currRightIsNearest = _bt_compare_current_dist(so, rstate, lstate);
+
+ /* Reset right flag if the left item is nearer. */
+ right = so->currRightIsNearest;
+ }
+
+ /* Return current item of the selected scan direction. */
+ return _bt_return_current_item(scan, right ? rstate : lstate);
+}
+
+/*
+ * _bt_init_knn_scan() -- Init additional scan state for KNN search.
+ *
+ * Caller must pin and read-lock scan->state.currPos.buf buffer.
+ *
+ * If empty result was found returned false.
+ * Otherwise prepared current item, and returned true.
+ */
+static bool
+_bt_init_knn_scan(IndexScanDesc scan, OffsetNumber offnum)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ BTScanState rstate = &so->state; /* right (forward) main scan state */
+ BTScanState lstate; /* additional left (backward) KNN scan state */
+ Buffer buf = rstate->currPos.buf;
+ bool left,
+ right;
+
+ lstate = _bt_alloc_knn_scan(scan);
+
+ /* Bump pin and lock count before BTScanPosData copying. */
+ IncrBufferRefCount(buf);
+ LockBuffer(buf, BT_READ);
+
+ memcpy(&lstate->currPos, &rstate->currPos, sizeof(BTScanPosData));
+ lstate->currPos.moreLeft = true;
+ lstate->currPos.moreRight = false;
+
+ /* Load first pages from the both scans. */
+ right = _bt_load_first_page(scan, rstate, ForwardScanDirection, offnum);
+ left = _bt_load_first_page(scan, lstate, BackwardScanDirection,
+ OffsetNumberPrev(offnum));
+
+ return _bt_start_knn_scan(scan, left, right);
+}
+
+/*
* _bt_first() -- Find the first item in a scan.
*
* We need to be clever about the direction of scan, the search
@@ -655,6 +807,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (!so->qual_ok)
return false;
+ strat_total = BTEqualStrategyNumber;
+
+ if (scan->numberOfOrderBys > 0)
+ {
+ if (_bt_process_orderings(scan, startKeys, &keysCount, notnullkeys))
+ /* use bidirectional KNN scan */
+ strat_total = BtreeKNNSearchStrategyNumber;
+
+ /* use selected KNN scan direction */
+ if (so->scanDirection != NoMovementScanDirection)
+ dir = so->scanDirection;
+ }
+
/*
* For parallel scans, get the starting page from shared state. If the
* scan has not started, proceed to find out first leaf page in the usual
@@ -663,19 +828,50 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
*/
if (scan->parallel_scan != NULL)
{
- status = _bt_parallel_seize(scan, &blkno);
+ status = _bt_parallel_seize(scan, &so->state, &blkno);
if (!status)
return false;
- else if (blkno == P_NONE)
- {
- _bt_parallel_done(scan);
- return false;
- }
else if (blkno != InvalidBlockNumber)
{
- if (!_bt_parallel_readpage(scan, blkno, dir))
- return false;
- goto readcomplete;
+ bool knn = strat_total == BtreeKNNSearchStrategyNumber;
+ bool right;
+ bool left;
+
+ if (knn)
+ _bt_alloc_knn_scan(scan);
+
+ if (blkno == P_NONE)
+ {
+ _bt_parallel_done(scan, &so->state);
+ right = false;
+ }
+ else
+ right = _bt_parallel_readpage(scan, &so->state, blkno,
+ knn ? ForwardScanDirection : dir);
+
+ if (!knn)
+ return right && _bt_return_current_item(scan, &so->state);
+
+ /* seize additional backward KNN scan */
+ left = _bt_parallel_seize(scan, so->knnState, &blkno);
+
+ if (left)
+ {
+ if (blkno == P_NONE)
+ {
+ _bt_parallel_done(scan, so->knnState);
+ left = false;
+ }
+ else
+ {
+ /* backward scan should be already initialized */
+ Assert(blkno != InvalidBlockNumber);
+ left = _bt_parallel_readpage(scan, so->knnState, blkno,
+ BackwardScanDirection);
+ }
+ }
+
+ return _bt_start_knn_scan(scan, left, right);
}
}
@@ -725,8 +921,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* storing their addresses into the local startKeys[] array.
*----------
*/
- strat_total = BTEqualStrategyNumber;
- if (so->numberOfKeys > 0)
+
+ if (so->numberOfKeys > 0 &&
+ /* startKeys for KNN search already have been initialized */
+ strat_total != BtreeKNNSearchStrategyNumber)
{
AttrNumber curattr;
ScanKey chosen;
@@ -866,7 +1064,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (!match)
{
/* No match, so mark (parallel) scan finished */
- _bt_parallel_done(scan);
+ _bt_parallel_done(scan, NULL);
}
return match;
@@ -901,7 +1099,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
Assert(subkey->sk_flags & SK_ROW_MEMBER);
if (subkey->sk_flags & SK_ISNULL)
{
- _bt_parallel_done(scan);
+ _bt_parallel_done(scan, NULL);
return false;
}
memcpy(scankeys + i, subkey, sizeof(ScanKeyData));
@@ -1081,6 +1279,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
break;
case BTGreaterEqualStrategyNumber:
+ case BtreeKNNSearchStrategyNumber:
/*
* Find first item >= scankey. (This is only used for forward
@@ -1128,7 +1327,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* mark parallel scan as done, so that all the workers can finish
* their scan
*/
- _bt_parallel_done(scan);
+ _bt_parallel_done(scan, NULL);
BTScanPosInvalidate(*currPos);
return false;
@@ -1167,17 +1366,21 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
Assert(!BTScanPosIsValid(*currPos));
currPos->buf = buf;
+ if (strat_total == BtreeKNNSearchStrategyNumber)
+ return _bt_init_knn_scan(scan, offnum);
+
if (!_bt_load_first_page(scan, &so->state, dir, offnum))
- return false;
+ return false; /* empty result */
-readcomplete:
/* OK, currPos->itemIndex says what to return */
return _bt_return_current_item(scan, &so->state);
}
/*
- * Advance to next tuple on current page; or if there's no more,
- * try to step to the next page with data.
+ * _bt_next_item() -- Advance to next tuple on current page;
+ * or if there's no more, try to step to the next page with data.
+ *
+ * If there are no more matching records in the given direction
*/
static bool
_bt_next_item(IndexScanDesc scan, BTScanState state, ScanDirection dir)
@@ -1197,6 +1400,51 @@ _bt_next_item(IndexScanDesc scan, BTScanState state, ScanDirection dir)
}
/*
+ * _bt_next_nearest() -- Return next nearest item from bidirectional KNN scan.
+ */
+static bool
+_bt_next_nearest(IndexScanDesc scan)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ BTScanState rstate = &so->state;
+ BTScanState lstate = so->knnState;
+ bool right = BTScanPosIsValid(rstate->currPos);
+ bool left = BTScanPosIsValid(lstate->currPos);
+ bool advanceRight;
+
+ if (right && left)
+ advanceRight = so->currRightIsNearest;
+ else if (right)
+ advanceRight = true;
+ else if (left)
+ advanceRight = false;
+ else
+ return false; /* end of the scan */
+
+ if (advanceRight)
+ right = _bt_next_item(scan, rstate, ForwardScanDirection);
+ else
+ left = _bt_next_item(scan, lstate, BackwardScanDirection);
+
+ if (!left && !right)
+ return false; /* end of the scan */
+
+ if (left && right)
+ {
+ /*
+ * If there are items in both scans we must recalculate distance
+ * in the advanced scan.
+ */
+ _bt_calc_current_dist(scan, advanceRight ? rstate : lstate);
+ so->currRightIsNearest = _bt_compare_current_dist(so, rstate, lstate);
+ right = so->currRightIsNearest;
+ }
+
+ /* return nearest item */
+ return _bt_return_current_item(scan, right ? rstate : lstate);
+}
+
+/*
* _bt_next() -- Get the next item in a scan.
*
* On entry, so->currPos describes the current page, which may be pinned
@@ -1215,6 +1463,10 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ if (so->knnState)
+ /* return next neareset item from KNN scan */
+ return _bt_next_nearest(scan);
+
if (!_bt_next_item(scan, &so->state, dir))
return false;
@@ -1267,9 +1519,9 @@ _bt_readpage(IndexScanDesc scan, BTScanState state, ScanDirection dir,
if (scan->parallel_scan)
{
if (ScanDirectionIsForward(dir))
- _bt_parallel_release(scan, opaque->btpo_next);
+ _bt_parallel_release(scan, state, opaque->btpo_next);
else
- _bt_parallel_release(scan, BufferGetBlockNumber(pos->buf));
+ _bt_parallel_release(scan, state, BufferGetBlockNumber(pos->buf));
}
minoff = P_FIRSTDATAKEY(opaque);
@@ -1443,7 +1695,7 @@ _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir)
* Seize the scan to get the next block number; if the scan has
* ended already, bail out.
*/
- status = _bt_parallel_seize(scan, &blkno);
+ status = _bt_parallel_seize(scan, state, &blkno);
if (!status)
{
/* release the previous buffer, if pinned */
@@ -1475,13 +1727,19 @@ _bt_steppage(IndexScanDesc scan, BTScanState state, ScanDirection dir)
* Seize the scan to get the current block number; if the scan has
* ended already, bail out.
*/
- status = _bt_parallel_seize(scan, &blkno);
+ status = _bt_parallel_seize(scan, state, &blkno);
BTScanPosUnpinIfPinned(*currPos);
if (!status)
{
BTScanPosInvalidate(*currPos);
return false;
}
+ if (blkno == P_NONE)
+ {
+ _bt_parallel_done(scan, state);
+ BTScanPosInvalidate(*currPos);
+ return false;
+ }
}
else
{
@@ -1531,7 +1789,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
*/
if (blkno == P_NONE || !currPos->moreRight)
{
- _bt_parallel_done(scan);
+ _bt_parallel_done(scan, state);
BTScanPosInvalidate(*currPos);
return false;
}
@@ -1554,14 +1812,14 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
else if (scan->parallel_scan != NULL)
{
/* allow next page be processed by parallel worker */
- _bt_parallel_release(scan, opaque->btpo_next);
+ _bt_parallel_release(scan, state, opaque->btpo_next);
}
/* nope, keep going */
if (scan->parallel_scan != NULL)
{
_bt_relbuf(rel, currPos->buf);
- status = _bt_parallel_seize(scan, &blkno);
+ status = _bt_parallel_seize(scan, state, &blkno);
if (!status)
{
BTScanPosInvalidate(*currPos);
@@ -1620,7 +1878,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
if (!currPos->moreLeft)
{
_bt_relbuf(rel, currPos->buf);
- _bt_parallel_done(scan);
+ _bt_parallel_done(scan, state);
BTScanPosInvalidate(*currPos);
return false;
}
@@ -1631,7 +1889,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
/* if we're physically at end of index, return failure */
if (currPos->buf == InvalidBuffer)
{
- _bt_parallel_done(scan);
+ _bt_parallel_done(scan, state);
BTScanPosInvalidate(*currPos);
return false;
}
@@ -1655,7 +1913,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
else if (scan->parallel_scan != NULL)
{
/* allow next page be processed by parallel worker */
- _bt_parallel_release(scan, BufferGetBlockNumber(currPos->buf));
+ _bt_parallel_release(scan, state, BufferGetBlockNumber(currPos->buf));
}
/*
@@ -1667,7 +1925,7 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
if (scan->parallel_scan != NULL)
{
_bt_relbuf(rel, currPos->buf);
- status = _bt_parallel_seize(scan, &blkno);
+ status = _bt_parallel_seize(scan, state, &blkno);
if (!status)
{
BTScanPosInvalidate(*currPos);
@@ -1688,17 +1946,16 @@ _bt_readnextpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
* indicate success.
*/
static bool
-_bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
+_bt_parallel_readpage(IndexScanDesc scan, BTScanState state, BlockNumber blkno,
+ ScanDirection dir)
{
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ _bt_initialize_more_data(state, dir);
- _bt_initialize_more_data(&so->state, dir);
-
- if (!_bt_readnextpage(scan, &so->state, blkno, dir))
+ if (!_bt_readnextpage(scan, state, blkno, dir))
return false;
/* Drop the lock, and maybe the pin, on the current page */
- _bt_drop_lock_and_maybe_pin(scan, &so->state.currPos);
+ _bt_drop_lock_and_maybe_pin(scan, &state->currPos);
return true;
}
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 9bf453c..cd81490 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -20,16 +20,21 @@
#include "access/nbtree.h"
#include "access/reloptions.h"
#include "access/relscan.h"
+#include "catalog/pg_amop.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
+#include "utils/syscache.h"
typedef struct BTSortArrayContext
{
FmgrInfo flinfo;
+ FmgrInfo distflinfo;
+ FmgrInfo distcmpflinfo;
+ ScanKey distkey;
Oid collation;
bool reverse;
} BTSortArrayContext;
@@ -49,6 +54,9 @@ static void _bt_mark_scankey_required(ScanKey skey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, TupleDesc tupdesc,
ScanDirection dir, bool *continuescan);
+static void _bt_get_distance_cmp_proc(ScanKey distkey, Oid opfamily, Oid leftargtype,
+ FmgrInfo *finfo, int16 *typlen, bool *typbyval);
+
/*
@@ -445,6 +453,7 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
{
Relation rel = scan->indexRelation;
Oid elemtype;
+ Oid opfamily;
RegProcedure cmp_proc;
BTSortArrayContext cxt;
int last_non_dup;
@@ -462,6 +471,53 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
if (elemtype == InvalidOid)
elemtype = rel->rd_opcintype[skey->sk_attno - 1];
+ opfamily = rel->rd_opfamily[skey->sk_attno - 1];
+
+ if (scan->numberOfOrderBys <= 0)
+ {
+ cxt.distkey = NULL;
+ cxt.reverse = reverse;
+ }
+ else
+ {
+ /* Init procedures for distance calculation and comparison. */
+ ScanKey distkey = &scan->orderByData[0];
+ ScanKeyData distkey2;
+ Oid disttype = distkey->sk_subtype;
+ Oid distopr;
+ RegProcedure distproc;
+
+ if (!OidIsValid(disttype))
+ disttype = rel->rd_opcintype[skey->sk_attno - 1];
+
+ /* Lookup distance operator in index column's operator family. */
+ distopr = get_opfamily_member(opfamily,
+ elemtype,
+ disttype,
+ distkey->sk_strategy);
+
+ if (!OidIsValid(distopr))
+ elog(ERROR, "missing operator (%u,%u) for strategy %d in opfamily %u",
+ elemtype, disttype, BtreeKNNSearchStrategyNumber, opfamily);
+
+ distproc = get_opcode(distopr);
+
+ if (!RegProcedureIsValid(distproc))
+ elog(ERROR, "missing code for operator %u", distopr);
+
+ fmgr_info(distproc, &cxt.distflinfo);
+
+ distkey2 = *distkey;
+ fmgr_info_copy(&distkey2.sk_func, &cxt.distflinfo, CurrentMemoryContext);
+ distkey2.sk_subtype = disttype;
+
+ _bt_get_distance_cmp_proc(&distkey2, opfamily, elemtype,
+ &cxt.distcmpflinfo, NULL, NULL);
+
+ cxt.distkey = distkey;
+ cxt.reverse = false; /* supported only ascending ordering */
+ }
+
/*
* Look up the appropriate comparison function in the opfamily.
*
@@ -470,19 +526,17 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
* non-cross-type support functions for any datatype that it supports at
* all.
*/
- cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
+ cmp_proc = get_opfamily_proc(opfamily,
elemtype,
elemtype,
BTORDER_PROC);
if (!RegProcedureIsValid(cmp_proc))
elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
- BTORDER_PROC, elemtype, elemtype,
- rel->rd_opfamily[skey->sk_attno - 1]);
+ BTORDER_PROC, elemtype, elemtype, opfamily);
/* Sort the array elements */
fmgr_info(cmp_proc, &cxt.flinfo);
cxt.collation = skey->sk_collation;
- cxt.reverse = reverse;
qsort_arg((void *) elems, nelems, sizeof(Datum),
_bt_compare_array_elements, (void *) &cxt);
@@ -514,6 +568,23 @@ _bt_compare_array_elements(const void *a, const void *b, void *arg)
BTSortArrayContext *cxt = (BTSortArrayContext *) arg;
int32 compare;
+ if (cxt->distkey)
+ {
+ Datum dista = FunctionCall2Coll(&cxt->distflinfo,
+ cxt->collation,
+ da,
+ cxt->distkey->sk_argument);
+ Datum distb = FunctionCall2Coll(&cxt->distflinfo,
+ cxt->collation,
+ db,
+ cxt->distkey->sk_argument);
+ bool cmp = DatumGetBool(FunctionCall2Coll(&cxt->distcmpflinfo,
+ cxt->collation,
+ dista,
+ distb));
+ return cmp ? -1 : 1;
+ }
+
compare = DatumGetInt32(FunctionCall2Coll(&cxt->flinfo,
cxt->collation,
da, db));
@@ -2075,6 +2146,39 @@ btproperty(Oid index_oid, int attno,
*res = true;
return true;
+ case AMPROP_DISTANCE_ORDERABLE:
+ {
+ Oid opclass,
+ opfamily,
+ opcindtype;
+
+ /* answer only for columns, not AM or whole index */
+ if (attno == 0)
+ return false;
+
+ opclass = get_index_column_opclass(index_oid, attno);
+
+ if (!OidIsValid(opclass))
+ {
+ *res = false; /* non-key attribute */
+ return true;
+ }
+
+ if (!get_opclass_opfamily_and_input_type(opclass,
+ &opfamily, &opcindtype))
+ {
+ *isnull = true;
+ return true;
+ }
+
+ *res = SearchSysCacheExists(AMOPSTRATEGY,
+ ObjectIdGetDatum(opfamily),
+ ObjectIdGetDatum(opcindtype),
+ ObjectIdGetDatum(opcindtype),
+ Int16GetDatum(BtreeKNNSearchStrategyNumber));
+ return true;
+ }
+
default:
return false; /* punt to generic code */
}
@@ -2223,3 +2327,264 @@ _bt_allocate_tuple_workspaces(BTScanState state)
state->currTuples = (char *) palloc(BLCKSZ * 2);
state->markTuples = state->currTuples + BLCKSZ;
}
+
+static bool
+_bt_compare_row_key_with_ordering_key(ScanKey row, ScanKey ord, bool *result)
+{
+ ScanKey subkey = (ScanKey) DatumGetPointer(row->sk_argument);
+ int32 cmpresult;
+
+ Assert(subkey->sk_attno == 1);
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+
+ if (subkey->sk_flags & SK_ISNULL)
+ return false;
+
+ /* Perform the test --- three-way comparison not bool operator */
+ cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
+ subkey->sk_collation,
+ ord->sk_argument,
+ subkey->sk_argument));
+
+ if (subkey->sk_flags & SK_BT_DESC)
+ cmpresult = -cmpresult;
+
+ /*
+ * At this point cmpresult indicates the overall result of the row
+ * comparison, and subkey points to the deciding column (or the last
+ * column if the result is "=").
+ */
+ switch (subkey->sk_strategy)
+ {
+ /* EQ and NE cases aren't allowed here */
+ case BTLessStrategyNumber:
+ *result = cmpresult < 0;
+ break;
+ case BTLessEqualStrategyNumber:
+ *result = cmpresult <= 0;
+ break;
+ case BTGreaterEqualStrategyNumber:
+ *result = cmpresult >= 0;
+ break;
+ case BTGreaterStrategyNumber:
+ *result = cmpresult > 0;
+ break;
+ default:
+ elog(ERROR, "unrecognized RowCompareType: %d",
+ (int) subkey->sk_strategy);
+ *result = false; /* keep compiler quiet */
+ }
+
+ return true;
+}
+
+/* _bt_select_knn_search_strategy() -- Determine which KNN scan strategy to use:
+ * bidirectional or unidirectional. We are checking here if the
+ * ordering scankey argument falls into the scan range: if it falls
+ * we must use bidirectional scan, otherwise we use unidirectional.
+ *
+ * Returns BtreeKNNSearchStrategyNumber for bidirectional scan or
+ * strategy number of non-matched scankey for unidirectional.
+ */
+static StrategyNumber
+_bt_select_knn_search_strategy(IndexScanDesc scan, ScanKey ord)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ ScanKey cond;
+
+ for (cond = so->keyData; cond < so->keyData + so->numberOfKeys; cond++)
+ {
+ bool result;
+
+ if (cond->sk_attno != 1)
+ break; /* only interesting in the first index attribute */
+
+ if (cond->sk_strategy == BTEqualStrategyNumber)
+ /* always use simple unidirectional scan for equals operators */
+ return BTEqualStrategyNumber;
+
+ if (cond->sk_flags & SK_ROW_HEADER)
+ {
+ if (!_bt_compare_row_key_with_ordering_key(cond, ord, &result))
+ return BTEqualStrategyNumber; /* ROW(fist_index_attr, ...) IS NULL */
+ }
+ else
+ {
+ if (!_bt_compare_scankey_args(scan, cond, ord, cond, &result))
+ elog(ERROR, "could not compare ordering key");
+ }
+
+ if (!result)
+ /*
+ * Ordering scankey argument is out of scan range,
+ * use unidirectional scan.
+ */
+ return cond->sk_strategy;
+ }
+
+ return BtreeKNNSearchStrategyNumber; /* use bidirectional scan */
+}
+
+static Oid
+_bt_get_sortfamily_for_opfamily_op(Oid opfamily, Oid lefttype, Oid righttype,
+ StrategyNumber strategy)
+{
+ HeapTuple tp;
+ Form_pg_amop amop_tup;
+ Oid sortfamily;
+
+ tp = SearchSysCache4(AMOPSTRATEGY,
+ ObjectIdGetDatum(opfamily),
+ ObjectIdGetDatum(lefttype),
+ ObjectIdGetDatum(righttype),
+ Int16GetDatum(strategy));
+ if (!HeapTupleIsValid(tp))
+ return InvalidOid;
+ amop_tup = (Form_pg_amop) GETSTRUCT(tp);
+ sortfamily = amop_tup->amopsortfamily;
+ ReleaseSysCache(tp);
+
+ return sortfamily;
+}
+
+/*
+ * _bt_get_distance_cmp_proc() -- Init procedure for comparsion of distances
+ * between "leftargtype" and "distkey".
+ */
+static void
+_bt_get_distance_cmp_proc(ScanKey distkey, Oid opfamily, Oid leftargtype,
+ FmgrInfo *finfo, int16 *typlen, bool *typbyval)
+{
+ RegProcedure opcode;
+ Oid sortfamily;
+ Oid opno;
+ Oid distanceType;
+
+ distanceType = get_func_rettype(distkey->sk_func.fn_oid);
+
+ sortfamily = _bt_get_sortfamily_for_opfamily_op(opfamily, leftargtype,
+ distkey->sk_subtype,
+ distkey->sk_strategy);
+
+ if (!OidIsValid(sortfamily))
+ elog(ERROR, "could not find sort family for btree ordering operator");
+
+ opno = get_opfamily_member(sortfamily,
+ distanceType,
+ distanceType,
+ BTLessEqualStrategyNumber);
+
+ if (!OidIsValid(opno))
+ elog(ERROR, "could not find operator for btree distance comparison");
+
+ opcode = get_opcode(opno);
+
+ if (!RegProcedureIsValid(opcode))
+ elog(ERROR,
+ "could not find procedure for btree distance comparison operator");
+
+ fmgr_info(opcode, finfo);
+
+ if (typlen)
+ get_typlenbyval(distanceType, typlen, typbyval);
+}
+
+/*
+ * _bt_init_distance_comparison() -- Init distance typlen/typbyval and its
+ * comparison procedure.
+ */
+static void
+_bt_init_distance_comparison(IndexScanDesc scan, ScanKey ord)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ Relation rel = scan->indexRelation;
+
+ _bt_get_distance_cmp_proc(ord,
+ rel->rd_opfamily[ord->sk_attno - 1],
+ rel->rd_opcintype[ord->sk_attno - 1],
+ &so->distanceCmpProc,
+ &so->distanceTypeLen,
+ &so->distanceTypeByVal);
+
+ if (!so->distanceTypeByVal)
+ {
+ so->state.currDistance = PointerGetDatum(NULL);
+ so->state.markDistance = PointerGetDatum(NULL);
+ }
+}
+
+/*
+ * _bt_process_orderings() -- Process ORDER BY distance scankeys and
+ * select corresponding KNN strategy.
+ *
+ * If bidirectional scan is selected then one scankey is initialized
+ * using bufKeys and placed into startKeys/keysCount, true is returned.
+ *
+ * Otherwise, so->scanDirection is set and false is returned.
+ */
+bool
+_bt_process_orderings(IndexScanDesc scan, ScanKey *startKeys, int *keysCount,
+ ScanKeyData bufKeys[])
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ ScanKey ord = scan->orderByData;
+
+ if (scan->numberOfOrderBys > 1 || ord->sk_attno != 1)
+ /* it should not happen, see btmatchorderby() */
+ elog(ERROR, "only one btree ordering operator "
+ "for the first index column is supported");
+
+ Assert(ord->sk_strategy == BtreeKNNSearchStrategyNumber);
+
+ switch (_bt_select_knn_search_strategy(scan, ord))
+ {
+ case BTLessStrategyNumber:
+ case BTLessEqualStrategyNumber:
+ /*
+ * Ordering key argument is greater than all values in scan range.
+ * select backward scan direction.
+ */
+ so->scanDirection = BackwardScanDirection;
+ return false;
+
+ case BTEqualStrategyNumber:
+ /* Use default unidirectional scan direction. */
+ return false;
+
+ case BTGreaterEqualStrategyNumber:
+ case BTGreaterStrategyNumber:
+ /*
+ * Ordering key argument is lesser than all values in scan range.
+ * select forward scan direction.
+ */
+ so->scanDirection = ForwardScanDirection;
+ return false;
+
+ case BtreeKNNSearchStrategyNumber:
+ /*
+ * Ordering key argument falls into scan range,
+ * use bidirectional scan.
+ */
+ break;
+ }
+
+ _bt_init_distance_comparison(scan, ord);
+
+ /* Init btree search key with ordering key argument. */
+ ScanKeyEntryInitialize(&bufKeys[0],
+ (scan->indexRelation->rd_indoption[ord->sk_attno - 1] <<
+ SK_BT_INDOPTION_SHIFT) |
+ SK_ORDER_BY |
+ SK_SEARCHNULL /* only for invalid procedure oid, see
+ * assert in ScanKeyEntryInitialize */,
+ ord->sk_attno,
+ BtreeKNNSearchStrategyNumber,
+ ord->sk_subtype,
+ ord->sk_collation,
+ InvalidOid,
+ ord->sk_argument);
+
+ startKeys[(*keysCount)++] = &bufKeys[0];
+
+ return true;
+}
diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c
index f24091c..be4e843 100644
--- a/src/backend/access/nbtree/nbtvalidate.c
+++ b/src/backend/access/nbtree/nbtvalidate.c
@@ -22,9 +22,17 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
+#include "utils/lsyscache.h"
#include "utils/regproc.h"
#include "utils/syscache.h"
+#define BTRequiredOperatorSet \
+ ((1 << BTLessStrategyNumber) | \
+ (1 << BTLessEqualStrategyNumber) | \
+ (1 << BTEqualStrategyNumber) | \
+ (1 << BTGreaterEqualStrategyNumber) | \
+ (1 << BTGreaterStrategyNumber))
+
/*
* Validator for a btree opclass.
@@ -132,10 +140,11 @@ btvalidate(Oid opclassoid)
{
HeapTuple oprtup = &oprlist->members[i]->tuple;
Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
+ Oid op_rettype;
/* Check that only allowed strategy numbers exist */
if (oprform->amopstrategy < 1 ||
- oprform->amopstrategy > BTMaxStrategyNumber)
+ oprform->amopstrategy > BtreeMaxStrategyNumber)
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -146,20 +155,29 @@ btvalidate(Oid opclassoid)
result = false;
}
- /* btree doesn't support ORDER BY operators */
- if (oprform->amoppurpose != AMOP_SEARCH ||
- OidIsValid(oprform->amopsortfamily))
+ /* btree supports ORDER BY operators */
+ if (oprform->amoppurpose != AMOP_SEARCH)
{
- ereport(INFO,
- (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
- errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
- opfamilyname, "btree",
- format_operator(oprform->amopopr))));
- result = false;
+ /* ... and operator result must match the claimed btree opfamily */
+ op_rettype = get_op_rettype(oprform->amopopr);
+ if (!opfamily_can_sort_type(oprform->amopsortfamily, op_rettype))
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
+ opfamilyname, "btree",
+ format_operator(oprform->amopopr))));
+ result = false;
+ }
+ }
+ else
+ {
+ /* Search operators must always return bool */
+ op_rettype = BOOLOID;
}
/* Check operator signature --- same for all btree strategies */
- if (!check_amop_signature(oprform->amopopr, BOOLOID,
+ if (!check_amop_signature(oprform->amopopr, op_rettype,
oprform->amoplefttype,
oprform->amoprighttype))
{
@@ -214,12 +232,8 @@ btvalidate(Oid opclassoid)
* or support functions for this datatype pair. The only things
* considered optional are the sortsupport and in_range functions.
*/
- if (thisgroup->operatorset !=
- ((1 << BTLessStrategyNumber) |
- (1 << BTLessEqualStrategyNumber) |
- (1 << BTEqualStrategyNumber) |
- (1 << BTGreaterEqualStrategyNumber) |
- (1 << BTGreaterStrategyNumber)))
+ if ((thisgroup->operatorset & BTRequiredOperatorSet) !=
+ BTRequiredOperatorSet)
{
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index e043664..7c9b97b 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -988,6 +988,10 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
* if we are only trying to build bitmap indexscans, nor if we have to
* assume the scan is unordered.
*/
+ useful_pathkeys = NIL;
+ orderbyclauses = NIL;
+ orderbyclausecols = NIL;
+
pathkeys_possibly_useful = (scantype != ST_BITMAPSCAN &&
!found_lower_saop_clause &&
has_useful_pathkeys(root, rel));
@@ -998,10 +1002,10 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
ForwardScanDirection);
useful_pathkeys = truncate_useless_pathkeys(root, rel,
index_pathkeys);
- orderbyclauses = NIL;
- orderbyclausecols = NIL;
}
- else if (index->ammatchorderby && pathkeys_possibly_useful)
+
+ if (useful_pathkeys == NIL &&
+ index->ammatchorderby && pathkeys_possibly_useful)
{
/* see if we can generate ordering operators for query_pathkeys */
match_pathkeys_to_index(index, root->query_pathkeys,
@@ -1012,12 +1016,6 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
else
useful_pathkeys = NIL;
}
- else
- {
- useful_pathkeys = NIL;
- orderbyclauses = NIL;
- orderbyclausecols = NIL;
- }
/*
* 3. Check if an index-only scan is possible. If we're not building
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 388b311..9c8a384 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -460,6 +460,12 @@ typedef struct BTScanStateData
/* keep these last in struct for efficiency */
BTScanPosData currPos; /* current position data */
BTScanPosData markPos; /* marked position, if any */
+
+ /* KNN-search fields: */
+ Datum currDistance; /* current distance */
+ Datum markDistance; /* marked distance */
+ bool currIsNull; /* current item is NULL */
+ bool markIsNull; /* marked item is NULL */
} BTScanStateData, *BTScanState;
typedef struct BTScanOpaqueData
@@ -478,7 +484,17 @@ typedef struct BTScanOpaqueData
BTArrayKeyInfo *arrayKeys; /* info about each equality-type array key */
MemoryContext arrayContext; /* scan-lifespan context for array data */
- BTScanStateData state;
+ BTScanStateData state; /* main scan state */
+
+ /* KNN-search fields: */
+ BTScanState knnState; /* optional scan state for KNN search */
+ ScanDirection scanDirection; /* selected scan direction for
+ * unidirectional KNN scan */
+ FmgrInfo distanceCmpProc; /* distance comparison procedure */
+ int16 distanceTypeLen; /* distance typlen */
+ bool distanceTypeByVal; /* distance typebyval */
+ bool currRightIsNearest; /* current right item is nearest */
+ bool markRightIsNearest; /* marked right item is nearest */
} BTScanOpaqueData;
typedef BTScanOpaqueData *BTScanOpaque;
@@ -524,11 +540,12 @@ extern bool btcanreturn(Relation index, int attno);
/*
* prototypes for internal functions in nbtree.c
*/
-extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno);
-extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page);
-extern void _bt_parallel_done(IndexScanDesc scan);
+extern bool _bt_parallel_seize(IndexScanDesc scan, BTScanState state, BlockNumber *pageno);
+extern void _bt_parallel_release(IndexScanDesc scan, BTScanState state, BlockNumber scan_page);
+extern void _bt_parallel_done(IndexScanDesc scan, BTScanState state);
extern void _bt_parallel_advance_array_keys(IndexScanDesc scan);
+
/*
* prototypes for functions in nbtinsert.c
*/
@@ -609,6 +626,8 @@ extern bool btproperty(Oid index_oid, int attno,
extern IndexTuple _bt_nonkey_truncate(Relation rel, IndexTuple itup);
extern bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum);
extern void _bt_allocate_tuple_workspaces(BTScanState state);
+extern bool _bt_process_orderings(IndexScanDesc scan,
+ ScanKey *startKeys, int *keysCount, ScanKeyData bufKeys[]);
/*
* prototypes for functions in nbtvalidate.c
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index 0db11a1..f44b41a 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -32,7 +32,10 @@ typedef uint16 StrategyNumber;
#define BTGreaterEqualStrategyNumber 4
#define BTGreaterStrategyNumber 5
-#define BTMaxStrategyNumber 5
+#define BTMaxStrategyNumber 5 /* number of canonical B-tree strategies */
+
+#define BtreeKNNSearchStrategyNumber 6 /* for <-> (distance) */
+#define BtreeMaxStrategyNumber 6 /* number of extended B-tree strategies */
/*
diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out
index 6faa9d7..c75ef39 100644
--- a/src/test/regress/expected/alter_generic.out
+++ b/src/test/regress/expected/alter_generic.out
@@ -347,10 +347,10 @@ ROLLBACK;
CREATE OPERATOR FAMILY alt_opf4 USING btree;
ALTER OPERATOR FAMILY alt_opf4 USING invalid_index_method ADD OPERATOR 1 < (int4, int2); -- invalid indexing_method
ERROR: access method "invalid_index_method" does not exist
-ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 6 < (int4, int2); -- operator number should be between 1 and 5
-ERROR: invalid operator number 6, must be between 1 and 5
-ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 5
-ERROR: invalid operator number 0, must be between 1 and 5
+ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 7 < (int4, int2); -- operator number should be between 1 and 6
+ERROR: invalid operator number 7, must be between 1 and 6
+ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 6
+ERROR: invalid operator number 0, must be between 1 and 6
ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types
ERROR: operator argument types must be specified in ALTER OPERATOR FAMILY
ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- function number should be between 1 and 5
@@ -397,11 +397,12 @@ DROP OPERATOR FAMILY alt_opf8 USING btree;
CREATE OPERATOR FAMILY alt_opf9 USING gist;
ALTER OPERATOR FAMILY alt_opf9 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops;
DROP OPERATOR FAMILY alt_opf9 USING gist;
--- Should fail. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY
+-- Should work. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY
+BEGIN TRANSACTION;
CREATE OPERATOR FAMILY alt_opf10 USING btree;
ALTER OPERATOR FAMILY alt_opf10 USING btree ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops;
-ERROR: access method "btree" does not support ordering operators
DROP OPERATOR FAMILY alt_opf10 USING btree;
+ROLLBACK;
-- Should work. Textbook case of ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR ORDER BY
CREATE OPERATOR FAMILY alt_opf11 USING gist;
ALTER OPERATOR FAMILY alt_opf11 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops;
diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql
index 84fd900..73e6e206 100644
--- a/src/test/regress/sql/alter_generic.sql
+++ b/src/test/regress/sql/alter_generic.sql
@@ -295,8 +295,8 @@ ROLLBACK;
-- Should fail. Invalid values for ALTER OPERATOR FAMILY .. ADD / DROP
CREATE OPERATOR FAMILY alt_opf4 USING btree;
ALTER OPERATOR FAMILY alt_opf4 USING invalid_index_method ADD OPERATOR 1 < (int4, int2); -- invalid indexing_method
-ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 6 < (int4, int2); -- operator number should be between 1 and 5
-ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 5
+ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 7 < (int4, int2); -- operator number should be between 1 and 6
+ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 0 < (int4, int2); -- operator number should be between 1 and 6
ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types
ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- function number should be between 1 and 5
ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 6 btint42cmp(int4, int2); -- function number should be between 1 and 5
@@ -340,10 +340,12 @@ CREATE OPERATOR FAMILY alt_opf9 USING gist;
ALTER OPERATOR FAMILY alt_opf9 USING gist ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops;
DROP OPERATOR FAMILY alt_opf9 USING gist;
--- Should fail. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY
+-- Should work. Ensure correct ordering methods in ALTER OPERATOR FAMILY ... ADD OPERATOR .. FOR ORDER BY
+BEGIN TRANSACTION;
CREATE OPERATOR FAMILY alt_opf10 USING btree;
ALTER OPERATOR FAMILY alt_opf10 USING btree ADD OPERATOR 1 < (int4, int4) FOR ORDER BY float_ops;
DROP OPERATOR FAMILY alt_opf10 USING btree;
+ROLLBACK;
-- Should work. Textbook case of ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR ORDER BY
CREATE OPERATOR FAMILY alt_opf11 USING gist;