diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index 2d656168fc..24a976297c 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -178,129 +178,31 @@ typedef struct bool *need_recheck; } GinChkVal; -static GinTernaryValue -checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data) +static TSTernaryValue +checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) { int j; - - /* - * if any val requiring a weight is used or caller needs position - * information then set recheck flag - */ - if (val->weight != 0 || data != NULL) - *(gcv->need_recheck) = true; + GinChkVal *gcv = (GinChkVal *) checkval; /* convert item's number to corresponding entry's (operand's) number */ j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; /* return presence of current entry in indexed value */ - return gcv->check[j]; -} + if (gcv->check[j] == TS_NO) + return TS_NO; -/* - * Wrapper of check condition function for TS_execute. - */ -static bool -checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) -{ - return checkcondition_gin_internal((GinChkVal *) checkval, - val, - data) != GIN_FALSE; + /* + * if any val requiring a weight is used or caller needs position + * information then return MAYBE for later recheck + */ + if (val->weight != 0 || data != NULL) + return TS_MAYBE; + return TS_YES; } -/* - * Evaluate tsquery boolean expression using ternary logic. - * - * Note: the reason we can't use TS_execute() for this is that its API - * for the checkcondition callback doesn't allow a MAYBE result to be - * returned, but we might have MAYBEs in the gcv->check array. - * Perhaps we should change that API. +/* Does the same as gin_tsquery_triconsistent() but uses bool check values + * and also converts output to bool */ -static GinTernaryValue -TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase) -{ - GinTernaryValue val1, - val2, - result; - - /* since this function recurses, it could be driven to stack overflow */ - check_stack_depth(); - - if (curitem->type == QI_VAL) - return - checkcondition_gin_internal(gcv, - (QueryOperand *) curitem, - NULL /* don't have position info */ ); - - switch (curitem->qoperator.oper) - { - case OP_NOT: - - /* - * Below a phrase search, force NOT's result to MAYBE. We cannot - * invert a TRUE result from the subexpression to FALSE, since - * TRUE only says that the subexpression matches somewhere, not - * that it matches everywhere, so there might be positions where - * the NOT will match. We could invert FALSE to TRUE, but there's - * little point in distinguishing TRUE from MAYBE, since a recheck - * will have been forced already. - */ - if (in_phrase) - return GIN_MAYBE; - - result = TS_execute_ternary(gcv, curitem + 1, in_phrase); - if (result == GIN_MAYBE) - return result; - return !result; - - case OP_PHRASE: - - /* - * GIN doesn't contain any information about positions, so treat - * OP_PHRASE as OP_AND with recheck requirement, and always - * reporting MAYBE not TRUE. - */ - *(gcv->need_recheck) = true; - /* Pass down in_phrase == true in case there's a NOT below */ - in_phrase = true; - - /* FALL THRU */ - - case OP_AND: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, - in_phrase); - if (val1 == GIN_FALSE) - return GIN_FALSE; - val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); - if (val2 == GIN_FALSE) - return GIN_FALSE; - if (val1 == GIN_TRUE && val2 == GIN_TRUE && - curitem->qoperator.oper != OP_PHRASE) - return GIN_TRUE; - else - return GIN_MAYBE; - - case OP_OR: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, - in_phrase); - if (val1 == GIN_TRUE) - return GIN_TRUE; - val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); - if (val2 == GIN_TRUE) - return GIN_TRUE; - if (val1 == GIN_FALSE && val2 == GIN_FALSE) - return GIN_FALSE; - else - return GIN_MAYBE; - - default: - elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); - } - - /* not reachable, but keep compiler quiet */ - return false; -} - Datum gin_tsquery_consistent(PG_FUNCTION_ARGS) { @@ -312,11 +214,9 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) /* int32 nkeys = PG_GETARG_INT32(3); */ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); bool *recheck = (bool *) PG_GETARG_POINTER(5); - bool res = false; /* Initially assume query doesn't require recheck */ *recheck = false; - if (query->size > 0) { GinChkVal gcv; @@ -330,15 +230,22 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) "sizes of GinTernaryValue and bool are not equal"); gcv.check = (GinTernaryValue *) check; gcv.map_item_operand = (int *) (extra_data[0]); - gcv.need_recheck = recheck; - res = TS_execute(GETQUERY(query), - &gcv, - TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, - checkcondition_gin); + switch (TS_execute_recurse(GETQUERY(query), + &gcv, + TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT, + checkcondition_gin)) + { + case TS_MAYBE: + *recheck = true; + PG_RETURN_BOOL(true); + case TS_YES: + PG_RETURN_BOOL(true); + case TS_NO: + PG_RETURN_BOOL(false); + } } - - PG_RETURN_BOOL(res); + PG_RETURN_BOOL(false); } Datum @@ -352,10 +259,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) /* int32 nkeys = PG_GETARG_INT32(3); */ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); GinTernaryValue res = GIN_FALSE; - bool recheck; + + /* bool recheck; */ /* Initially assume query doesn't require recheck */ - recheck = false; + /* recheck = false; */ if (query->size > 0) { @@ -368,14 +276,13 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) gcv.first_item = GETQUERY(query); gcv.check = check; gcv.map_item_operand = (int *) (extra_data[0]); - gcv.need_recheck = &recheck; - res = TS_execute_ternary(&gcv, GETQUERY(query), false); + res = (GinTernaryValue) TS_execute_recurse(GETQUERY(query), + &gcv, + TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT, + checkcondition_gin); - if (res == GIN_TRUE && recheck) - res = GIN_MAYBE; } - PG_RETURN_GIN_TERNARY_VALUE(res); } diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index c3f25800e7..79c0053eb2 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -275,7 +275,7 @@ typedef struct /* * is there value 'val' in array or not ? */ -static bool +static TSTernaryValue checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) { int32 *StopLow = ((CHKVAL *) checkval)->arrb; @@ -288,23 +288,22 @@ checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data) * we are not able to find a prefix by hash value */ if (val->prefix) - return true; + return TS_MAYBE; while (StopLow < StopHigh) { StopMiddle = StopLow + (StopHigh - StopLow) / 2; if (*StopMiddle == val->valcrc) - return true; + return TS_MAYBE; else if (*StopMiddle < val->valcrc) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } - - return false; + return TS_NO; } -static bool +static TSTernaryValue checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) { void *key = (SignTSVector *) checkval; @@ -313,8 +312,8 @@ checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data) * we are not able to find a prefix in signature tree */ if (val->prefix) - return true; - return GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key))); + return TS_MAYBE; + return GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key))) ? TS_MAYBE : TS_NO; } Datum @@ -327,6 +326,7 @@ gtsvector_consistent(PG_FUNCTION_ARGS) /* Oid subtype = PG_GETARG_OID(3); */ bool *recheck = (bool *) PG_GETARG_POINTER(4); SignTSVector *key = (SignTSVector *) DatumGetPointer(entry->key); + TSTernaryValue res; /* All cases served by this function are inexact */ *recheck = true; @@ -340,10 +340,10 @@ gtsvector_consistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); /* since signature is lossy, cannot specify CALC_NOT here */ - PG_RETURN_BOOL(TS_execute(GETQUERY(query), - key, - TS_EXEC_PHRASE_NO_POS, - checkcondition_bit)); + res = TS_execute_recurse(GETQUERY(query), + key, + TS_EXEC_PHRASE_NO_POS, + checkcondition_bit); } else { /* only leaf pages */ @@ -351,11 +351,12 @@ gtsvector_consistent(PG_FUNCTION_ARGS) chkval.arrb = GETARR(key); chkval.arre = chkval.arrb + ARRNELEM(key); - PG_RETURN_BOOL(TS_execute(GETQUERY(query), - (void *) &chkval, - TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT, - checkcondition_arr)); + res = TS_execute_recurse(GETQUERY(query), + (void *) &chkval, + TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT, + checkcondition_arr); } + PG_RETURN_BOOL(res != TS_NO); } static int32 diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 51619c396c..5914114846 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -67,18 +67,12 @@ typedef struct StatEntry *root; } TSVectorStat; -/* TS_execute requires ternary logic to handle NOT with phrase matches */ -typedef enum +typedef struct TSExecuteCallbackContext { - TS_NO, /* definitely no match */ - TS_YES, /* definitely does match */ - TS_MAYBE /* can't verify match for lack of pos data */ -} TSTernaryValue; + TSExecuteCallback chkcond; + void *arg; +} TSExecuteCallbackContext; - -static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, - uint32 flags, - TSExecuteCallback chkcond); static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len); static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column); @@ -1546,7 +1540,7 @@ TS_phrase_output(ExecPhraseData *data, */ static TSTernaryValue TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, - TSExecuteCallback chkcond, + TSExecuteCallbackTernary chkcond, ExecPhraseData *data) { ExecPhraseData Ldata, @@ -1564,10 +1558,12 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, { if (!chkcond(arg, (QueryOperand *) curitem, data)) return TS_NO; + if (data->npos > 0 || data->negate) return TS_YES; /* If we have no position data, we must return TS_MAYBE */ return TS_MAYBE; + } switch (curitem->qoperator.oper) @@ -1591,6 +1587,8 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, /* change "match nowhere" to "match everywhere" */ Assert(data->npos == 0 && !data->negate); data->negate = true; + + return TS_YES; case TS_YES: if (data->npos > 0) @@ -1784,9 +1782,20 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, return TS_NO; } +/* Wrapper for checkcodition functions which use legacy boolean callback */ +static TSTernaryValue +chkcond_bool(void *arg, QueryOperand *val, ExecPhraseData *data) +{ + TSExecuteCallbackContext *cxt = arg; + + return cxt->chkcond(cxt->arg, val, data) ? TS_YES : TS_NO; +} /* - * Evaluate tsquery boolean expression. + * Evaluate tsquery boolean expression. It is for backward compatibility. + * Actually it is now a wrapper for callers that prefer bool result + * and legacy bool callback TSExecuteCallback. For new code it is + * recommended to call ternary TS_execute_recurse instead. * * curitem: current tsquery item (initially, the first one) * arg: opaque value to pass through to callback function @@ -1800,9 +1809,15 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags, /* * If we get TS_MAYBE from the recursion, return true. We could only see * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no - * need to check again. + * need to check again. This function is compatible with legacy + * TSExecuteCallback callback function which of boolean type. It is + * wrapped into cxt and transferred from root to leaves of + * TS_execute_recurse recursion where the call is compatible with new + * TSExecuteCallbackTernary ternary type. */ - return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO; + TSExecuteCallbackContext cxt = {chkcond, arg}; + + return (TS_execute_recurse(curitem, &cxt, flags, chkcond_bool) != TS_NO); } /* @@ -1810,22 +1825,25 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags, * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE * operator, we pass it off to TS_phrase_execute which does worry. */ -static TSTernaryValue +TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, - TSExecuteCallback chkcond) + TSExecuteCallbackTernary chkcond) { TSTernaryValue lmatch; + TSTernaryValue rmatch; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (curitem->type == QI_VAL) return chkcond(arg, (QueryOperand *) curitem, - NULL /* don't need position info */ ) ? TS_YES : TS_NO; + NULL /* don't need position info */ ); switch (curitem->qoperator.oper) { case OP_NOT: + if (flags & TS_EXEC_IN_PHRASE) + return TS_MAYBE; if (!(flags & TS_EXEC_CALC_NOT)) return TS_YES; switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond)) @@ -1839,17 +1857,48 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, } break; + case OP_PHRASE: + + /* + * If we get a MAYBE result, and the caller doesn't want that, + * convert it to NO. It would be more consistent, perhaps, to + * return the result of TS_phrase_execute() verbatim and then + * convert MAYBE results at the top of the recursion. But + * converting at the topmost phrase operator gives results that + * are bug-compatible with the old implementation, so do it like + * this for now. + */ + if (!(flags & TS_EXEC_PHRASE_AS_AND)) + { + switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL)) + { + case TS_NO: + return TS_NO; + case TS_YES: + return TS_YES; + case TS_MAYBE: + return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO; + } + break; + } + flags |= TS_EXEC_IN_PHRASE; + /* FALL THRU */ + case OP_AND: lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg, flags, chkcond); if (lmatch == TS_NO) return TS_NO; - switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond)) + rmatch = TS_execute_recurse(curitem + 1, arg, flags, chkcond); + switch (rmatch) { case TS_NO: return TS_NO; case TS_YES: - return lmatch; + if (lmatch == TS_YES && curitem->qoperator.oper != OP_PHRASE) + return TS_YES; + else + return TS_MAYBE; case TS_MAYBE: return TS_MAYBE; } @@ -1871,27 +1920,6 @@ TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, } break; - case OP_PHRASE: - - /* - * If we get a MAYBE result, and the caller doesn't want that, - * convert it to NO. It would be more consistent, perhaps, to - * return the result of TS_phrase_execute() verbatim and then - * convert MAYBE results at the top of the recursion. But - * converting at the topmost phrase operator gives results that - * are bug-compatible with the old implementation, so do it like - * this for now. - */ - switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL)) - { - case TS_NO: - return TS_NO; - case TS_YES: - return TS_YES; - case TS_MAYBE: - return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO; - } - break; default: elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); @@ -1925,7 +1953,9 @@ tsquery_requires_match(QueryItem *curitem) /* * Assume there are no required matches underneath a NOT. For * some cases with nested NOTs, we could prove there's a required - * match, but it seems unlikely to be worth the trouble. + * match, but it seems unlikely to be worth the trouble. If + * operand is single ( !a, but not !( a & b) ) and contains weight + * marks but not all of them, its NOT is also positive match. */ return false; diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index f78fbd9d1a..3bd19fb24a 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -166,8 +166,33 @@ typedef struct ExecPhraseData * NULL, it should be filled with lexeme positions, but function can leave * it as zeroes if position data is not available. */ +typedef enum +{ + TS_NO, /* definitely no match */ + TS_YES, /* definitely does match */ + TS_MAYBE /* can't verify match for lack of pos data */ +} TSTernaryValue; + +/* Legacy bool callback API is for backward compatibility. For new code + * it is recommended to use ternary callback TSExecuteCallbackTernary + * (see below) + */ typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, ExecPhraseData *data); +/* Legacy bool TS_execute is for backward compatibility. Actually + * it is now a wrapper for callers that prefer bool result and + * legacy bool callback TSExecuteCallback. For new code it is + * recommended to call ternary TS_execute_recurse instead. + */ +extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallback chkcond); + +/* Ternary callback to call tri-state check functions */ +typedef TSTernaryValue (*TSExecuteCallbackTernary) (void *arg, QueryOperand *val, ExecPhraseData *data); +/* TS_execute_recurse implements ternary logic inside uses ternary + * callback for check function and gives ternary result. + */ +extern TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallbackTernary chkcond); /* * Flag bits for TS_execute @@ -188,9 +213,22 @@ typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, * false if lexeme position information is not available. */ #define TS_EXEC_PHRASE_NO_POS (0x02) +/* If both TS_EXEC_IN_PHRASE and TS_EXEC_PHRASE_AS_AND are set, then NOT + * operator in any phrase part of a query will immediately give MAYBE result + * without diving into it. This is to avoid redundant operation as the result + * in this type of query should be rechecked anyway. + */ +#define TS_EXEC_IN_PHRASE (0x08) +/* TS_EXEC_PHRASE_AS_AND can be used when calling TS_execute_recourse by + * index which doesn't save positional information. This speeds up phrase + * queries by avoiding calls of TS_phrase_execute which is redundant in + * absence of positional information. Also together with TS_EXEC_IN_PHRASE + * it sets MAYBE result and initiates recheck for any phrase parts of queries + * to index without positional information (see above). + */ +#define TS_EXEC_PHRASE_AS_AND (0x10) + -extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, - TSExecuteCallback chkcond); extern bool tsquery_requires_match(QueryItem *curitem); /* diff --git a/src/test/regress/expected/gin.out b/src/test/regress/expected/gin.out index 83de5220fb..efa7a64f7b 100644 --- a/src/test/regress/expected/gin.out +++ b/src/test/regress/expected/gin.out @@ -202,3 +202,21 @@ from reset enable_seqscan; reset enable_bitmapscan; drop table t_gin_test_tbl; +CREATE TABLE test_weight (fts tsvector); +INSERT INTO test_weight (fts) values ('crew:1C shuttl:2C discovery:3a'::tsvector); +SET enable_seqscan=on; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); + fts +-------------------------------------- + 'crew':1C 'discovery':3A 'shuttl':2C +(1 row) + +CREATE INDEX setweight_fts_idx ON test_weight USING gin (fts); +SET enable_seqscan=off; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); + fts +-------------------------------------- + 'crew':1C 'discovery':3A 'shuttl':2C +(1 row) + +DROP TABLE test_weight; diff --git a/src/test/regress/expected/gist.out b/src/test/regress/expected/gist.out index 90edb4061d..f603c6159b 100644 --- a/src/test/regress/expected/gist.out +++ b/src/test/regress/expected/gist.out @@ -317,3 +317,21 @@ reset enable_seqscan; reset enable_bitmapscan; reset enable_indexonlyscan; drop table gist_tbl; +CREATE TABLE test_weight (fts tsvector); +INSERT INTO test_weight (fts) values ('crew:1C shuttl:2C discovery:3a'::tsvector); +SET enable_seqscan=on; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); + fts +-------------------------------------- + 'crew':1C 'discovery':3A 'shuttl':2C +(1 row) + +CREATE INDEX setweight_fts_idx ON test_weight USING gist (fts); +SET enable_seqscan=off; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); + fts +-------------------------------------- + 'crew':1C 'discovery':3A 'shuttl':2C +(1 row) + +DROP TABLE test_weight; diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 7105c67cf8..0110b4d2e0 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -176,6 +176,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; 507 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; + count +------- + 56 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; + count +------- + 58 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; + count +------- + 452 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; + count +------- + 450 +(1 row) + create index wowidx on test_tsvector using gist (a); SET enable_seqscan=OFF; SET enable_indexscan=ON; @@ -308,6 +332,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; 507 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; + count +------- + 56 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; + count +------- + 58 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; + count +------- + 452 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; + count +------- + 450 +(1 row) + SET enable_indexscan=OFF; SET enable_bitmapscan=ON; explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; @@ -440,6 +488,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; 507 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; + count +------- + 56 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; + count +------- + 58 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; + count +------- + 452 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; + count +------- + 450 +(1 row) + -- Test siglen parameter of GiST tsvector_ops CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1)); ERROR: unrecognized parameter "foo" @@ -595,6 +667,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; 507 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; + count +------- + 56 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; + count +------- + 58 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; + count +------- + 452 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; + count +------- + 450 +(1 row) + DROP INDEX wowidx2; CREATE INDEX wowidx ON test_tsvector USING gist (a tsvector_ops(siglen=484)); \d test_tsvector @@ -736,6 +832,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; 507 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; + count +------- + 56 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; + count +------- + 58 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; + count +------- + 452 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; + count +------- + 450 +(1 row) + RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; @@ -873,6 +993,30 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; 507 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; + count +------- + 56 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; + count +------- + 58 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; + count +------- + 452 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; + count +------- + 450 +(1 row) + -- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries EXPLAIN (COSTS OFF) SELECT count(*) FROM test_tsvector WHERE a @@ '!qh'; diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out index ee4a2490bb..2601e312df 100644 --- a/src/test/regress/expected/tstypes.out +++ b/src/test/regress/expected/tstypes.out @@ -551,6 +551,55 @@ SELECT 'wa:1A wb:2D'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "false"; f (1 row) +SELECT 'wa:1A'::tsvector @@ 'w:*A'::tsquery as "true"; + true +------ + t +(1 row) + +SELECT 'wa:1A'::tsvector @@ 'w:*D'::tsquery as "false"; + false +------- + f +(1 row) + +SELECT 'wa:1A'::tsvector @@ '!w:*A'::tsquery as "false"; + false +------- + f +(1 row) + +SELECT 'wa:1A'::tsvector @@ '!w:*D'::tsquery as "true"; + true +------ + t +(1 row) + +-- historically, a stripped tsvector matches queries ignoring weights: +SELECT strip('wa:1A'::tsvector) @@ 'w:*A'::tsquery as "true"; + true +------ + t +(1 row) + +SELECT strip('wa:1A'::tsvector) @@ 'w:*D'::tsquery as "true"; + true +------ + t +(1 row) + +SELECT strip('wa:1A'::tsvector) @@ '!w:*A'::tsquery as "false"; + false +------- + f +(1 row) + +SELECT strip('wa:1A'::tsvector) @@ '!w:*D'::tsquery as "false"; + false +------- + f +(1 row) + SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false"; false ------- diff --git a/src/test/regress/sql/gin.sql b/src/test/regress/sql/gin.sql index abe3575265..307f35c9e5 100644 --- a/src/test/regress/sql/gin.sql +++ b/src/test/regress/sql/gin.sql @@ -139,3 +139,11 @@ reset enable_seqscan; reset enable_bitmapscan; drop table t_gin_test_tbl; +CREATE TABLE test_weight (fts tsvector); +INSERT INTO test_weight (fts) values ('crew:1C shuttl:2C discovery:3a'::tsvector); +SET enable_seqscan=on; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); +CREATE INDEX setweight_fts_idx ON test_weight USING gin (fts); +SET enable_seqscan=off; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); +DROP TABLE test_weight; diff --git a/src/test/regress/sql/gist.sql b/src/test/regress/sql/gist.sql index b9d398ea94..5613396dfb 100644 --- a/src/test/regress/sql/gist.sql +++ b/src/test/regress/sql/gist.sql @@ -148,3 +148,12 @@ reset enable_bitmapscan; reset enable_indexonlyscan; drop table gist_tbl; + +CREATE TABLE test_weight (fts tsvector); +INSERT INTO test_weight (fts) values ('crew:1C shuttl:2C discovery:3a'::tsvector); +SET enable_seqscan=on; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); +CREATE INDEX setweight_fts_idx ON test_weight USING gist (fts); +SET enable_seqscan=off; +select * from test_weight where fts @@ to_tsquery('pg_catalog.english', 'shuttle & !(crew:a & discovery:a)'); +DROP TABLE test_weight; diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index e53e44f7ed..8a27fcd8b0 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -61,6 +61,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; create index wowidx on test_tsvector using gist (a); @@ -90,6 +94,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; SET enable_indexscan=OFF; SET enable_bitmapscan=ON; @@ -116,6 +124,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; -- Test siglen parameter of GiST tsvector_ops CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1)); @@ -152,6 +164,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; DROP INDEX wowidx2; @@ -181,6 +197,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; RESET enable_seqscan; RESET enable_indexscan; @@ -215,6 +235,10 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!qe <2> qt'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(pl <-> yh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(yh <-> pl)'; SELECT count(*) FROM test_tsvector WHERE a @@ '!(qe <2> qt)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wd:D'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:A'; +SELECT count(*) FROM test_tsvector WHERE a @@ '!wd:D'; -- Test optimization of non-empty GIN_SEARCH_MODE_ALL queries EXPLAIN (COSTS OFF) diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql index 50b4359c9a..30c8c702f0 100644 --- a/src/test/regress/sql/tstypes.sql +++ b/src/test/regress/sql/tstypes.sql @@ -104,6 +104,15 @@ SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true"; SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D & w:*A'::tsquery as "true"; SELECT 'wa:1D wb:2A'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "true"; SELECT 'wa:1A wb:2D'::tsvector @@ 'w:*D <-> w:*A'::tsquery as "false"; +SELECT 'wa:1A'::tsvector @@ 'w:*A'::tsquery as "true"; +SELECT 'wa:1A'::tsvector @@ 'w:*D'::tsquery as "false"; +SELECT 'wa:1A'::tsvector @@ '!w:*A'::tsquery as "false"; +SELECT 'wa:1A'::tsvector @@ '!w:*D'::tsquery as "true"; +-- historically, a stripped tsvector matches queries ignoring weights: +SELECT strip('wa:1A'::tsvector) @@ 'w:*A'::tsquery as "true"; +SELECT strip('wa:1A'::tsvector) @@ 'w:*D'::tsquery as "true"; +SELECT strip('wa:1A'::tsvector) @@ '!w:*A'::tsquery as "false"; +SELECT strip('wa:1A'::tsvector) @@ '!w:*D'::tsquery as "false"; SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false"; SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";