diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index ea5947a3a8..6055fb6b4e 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -490,7 +490,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS) query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), - false); + 0); PG_RETURN_TSQUERY(query); } @@ -520,7 +520,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS) query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), - true); + P_TSQ_PLAIN); PG_RETURN_POINTER(query); } @@ -551,7 +551,7 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS) query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), - true); + P_TSQ_PLAIN); PG_RETURN_TSQUERY(query); } @@ -567,3 +567,35 @@ phraseto_tsquery(PG_FUNCTION_ARGS) ObjectIdGetDatum(cfgId), PointerGetDatum(in))); } + +Datum +websearch_to_tsquery_byid(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_PP(1); + MorphOpaque data; + TSQuery query = NULL; + + data.cfg_id = PG_GETARG_OID(0); + + data.qoperator = OP_AND; + + query = parse_tsquery(text_to_cstring(in), + pushval_morph, + PointerGetDatum(&data), + P_TSQ_WEB); + + PG_RETURN_TSQUERY(query); +} + +Datum +websearch_to_tsquery(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_PP(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(in))); + +} diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 1ccbf79030..695bdb89e9 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -32,14 +32,27 @@ const int tsearch_op_priority[OP_COUNT] = 3 /* OP_PHRASE */ }; +/* + * parser's states + */ +typedef enum +{ + WAITOPERAND = 1, + WAITOPERATOR = 2, + WAITFIRSTOPERAND = 3, + WAITSINGLEOPERAND = 4 +} ts_parserstate; + struct TSQueryParserStateData { /* State for gettoken_query */ char *buffer; /* entire string we are scanning */ char *buf; /* current scan point */ - int state; int count; /* nesting count, incremented by (, * decremented by ) */ + bool in_quotes; /* phrase in quotes "" */ + bool is_web; /* is it a web search? */ + ts_parserstate state; /* polish (prefix) notation in list, filled in by push* functions */ List *polstr; @@ -57,12 +70,6 @@ struct TSQueryParserStateData TSVectorParseState valstate; }; -/* parser's states */ -#define WAITOPERAND 1 -#define WAITOPERATOR 2 -#define WAITFIRSTOPERAND 3 -#define WAITSINGLEOPERAND 4 - /* * subroutine to parse the modifiers (weight and prefix flag currently) * part, like ':AB*' of a query. @@ -197,6 +204,26 @@ err: return buf; } +/* + * Parse OR operator used in websearch_to_tsquery(). + */ +static bool +parse_or_operator(TSQueryParserState state) +{ + char *buf = state->buf; + + if (state->in_quotes) + return false; + + return (t_iseq(&buf[0], 'o') || t_iseq(&buf[0], 'O')) && + (t_iseq(&buf[1], 'r') || t_iseq(&buf[1], 'R')) && + (buf[2] != '\0' && + !t_iseq(&buf[2], '-') && + !t_iseq(&buf[2], '_') && + !t_isalpha(&buf[2]) && + !t_isdigit(&buf[2])); +} + /* * token types for parsing */ @@ -219,10 +246,12 @@ typedef enum * */ static ts_tokentype -gettoken_query(TSQueryParserState state, - int8 *operator, - int *lenval, char **strval, int16 *weight, bool *prefix) +gettoken_query(TSQueryParserState state, int8 *operator, + int *lenval, char **strval, + int16 *weight, bool *prefix) { + bool is_web = state->is_web; + *weight = 0; *prefix = false; @@ -232,28 +261,63 @@ gettoken_query(TSQueryParserState state, { case WAITFIRSTOPERAND: case WAITOPERAND: - if (t_iseq(state->buf, '!')) + if ((! is_web && t_iseq(state->buf, '!')) || + (is_web && t_iseq(state->buf, '-'))) { - (state->buf)++; /* can safely ++, t_iseq guarantee that - * pg_mblen()==1 */ + state->buf++; + + if (state->in_quotes) + continue; + *operator = OP_NOT; state->state = WAITOPERAND; return PT_OPR; } else if (t_iseq(state->buf, '(')) { + state->buf++; + + if (is_web) + continue; + state->count++; - (state->buf)++; state->state = WAITOPERAND; return PT_OPEN; } else if (t_iseq(state->buf, ':')) { + state->buf++; + + if (is_web) + continue; + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error in tsquery: \"%s\"", state->buffer))); } + else if (is_web && t_iseq(state->buf, '"')) + { + state->buf++; + + /* web search tolerates missing quotes */ + if (!state->in_quotes && strchr(state->buf, '"')) + { + /* quoted text should be ordered <-> */ + state->in_quotes = true; + state->state = WAITOPERAND; + } + else + state->in_quotes = false; + + continue; + } + else if (is_web && ISOPERATOR(state->buf)) + { + /* or else gettoken_tsvector() will raise an error */ + state->buf++; + continue; + } else if (!t_isspace(state->buf)) { /* @@ -263,12 +327,22 @@ gettoken_query(TSQueryParserState state, reset_tsvector_parser(state->valstate, state->buf); if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf)) { - state->buf = get_modifiers(state->buf, weight, prefix); + if (!is_web) + { + /* web search does not support weights */ + state->buf = get_modifiers(state->buf, weight, prefix); + } state->state = WAITOPERATOR; return PT_VAL; } else if (state->state == WAITFIRSTOPERAND) return PT_END; + else if (is_web) + { + /* finally, we have to provide an operand */ + pushStop(state); + return PT_END; + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -277,40 +351,95 @@ gettoken_query(TSQueryParserState state, } break; case WAITOPERATOR: - if (t_iseq(state->buf, '&')) + if (! is_web && t_iseq(state->buf, '&')) { + state->buf++; state->state = WAITOPERAND; *operator = OP_AND; - (state->buf)++; return PT_OPR; } - else if (t_iseq(state->buf, '|')) + else if (! is_web && t_iseq(state->buf, '|')) { + state->buf++; state->state = WAITOPERAND; *operator = OP_OR; - (state->buf)++; return PT_OPR; } - else if (t_iseq(state->buf, '<')) + else if (! is_web && t_iseq(state->buf, '<')) { - state->state = WAITOPERAND; - *operator = OP_PHRASE; /* weight var is used as storage for distance */ state->buf = parse_phrase_operator(state->buf, weight); + state->state = WAITOPERAND; + *operator = OP_PHRASE; if (*weight < 0) return PT_ERR; return PT_OPR; } - else if (t_iseq(state->buf, ')')) + else if (! is_web && t_iseq(state->buf, ')')) { - (state->buf)++; + state->buf++; state->count--; return (state->count < 0) ? PT_ERR : PT_CLOSE; } + else if (is_web && t_iseq(state->buf, '"')) + { + state->buf++; + + /* web search tolerates missing quotes */ + if (!state->in_quotes && strchr(state->buf, '"')) + { + /* quoted text should be ordered <-> */ + state->in_quotes = true; + state->state = WAITOPERAND; + + /* put implicit AND after an operand */ + *operator = OP_AND; + return PT_OPR; + } + else + state->in_quotes = false; + + continue; + } + else if (is_web && parse_or_operator(state)) + { + state->buf += 2; /* strlen("OR") */ + state->state = WAITOPERAND; + *operator = OP_OR; + return PT_OPR; + } + else if (is_web && ISOPERATOR(state->buf)) + { + /* just skip disabled operators */ + state->buf++; + continue; + } else if (*(state->buf) == '\0') - return (state->count) ? PT_ERR : PT_END; + { + /* web search tolerates unexpected end of line */ + return (!is_web && state->count) ? PT_ERR : PT_END; + } else if (!t_isspace(state->buf)) + { + if (is_web) + { + if (state->in_quotes) + { + /* put implicit <-> after an operand */ + *operator = OP_PHRASE; + *weight = 1; + } + else + { + /* put implicit AND after an operand */ + *operator = OP_AND; + } + + state->state = WAITOPERAND; + return PT_OPR; + } return PT_ERR; + } break; case WAITSINGLEOPERAND: if (*(state->buf) == '\0') @@ -320,9 +449,6 @@ gettoken_query(TSQueryParserState state, state->buf += strlen(state->buf); state->count++; return PT_VAL; - default: - return PT_ERR; - break; } state->buf += pg_mblen(state->buf); } @@ -605,7 +731,7 @@ TSQuery parse_tsquery(char *buf, PushFunction pushval, Datum opaque, - bool isplain) + int flags) { struct TSQueryParserStateData state; int i; @@ -613,17 +739,28 @@ parse_tsquery(char *buf, int commonlen; QueryItem *ptr; ListCell *cell; - bool needcleanup; + bool needcleanup, + is_plain, + is_web; + int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY; + + is_plain = (flags & P_TSQ_PLAIN) != 0; + is_web = (flags & P_TSQ_WEB) != 0; + + if (is_web) + tsv_flags |= P_TSV_IS_WEB; /* init state */ state.buffer = buf; state.buf = buf; - state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; state.count = 0; + state.in_quotes = false; + state.is_web = is_web; + state.state = is_plain ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; state.polstr = NIL; /* init value parser's state */ - state.valstate = init_tsvector_parser(state.buffer, true, true); + state.valstate = init_tsvector_parser(state.buffer, tsv_flags); /* init list of operand */ state.sumlen = 0; @@ -716,7 +853,7 @@ tsqueryin(PG_FUNCTION_ARGS) { char *in = PG_GETARG_CSTRING(0); - PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false)); + PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0)); } /* diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c index 64e02ef434..7a27bd12a3 100644 --- a/src/backend/utils/adt/tsvector.c +++ b/src/backend/utils/adt/tsvector.c @@ -200,7 +200,7 @@ tsvectorin(PG_FUNCTION_ARGS) char *cur; int buflen = 256; /* allocated size of tmpbuf */ - state = init_tsvector_parser(buf, false, false); + state = init_tsvector_parser(buf, 0); arrlen = 64; arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen); diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c index 7367ba6a40..fed411a842 100644 --- a/src/backend/utils/adt/tsvector_parser.c +++ b/src/backend/utils/adt/tsvector_parser.c @@ -33,6 +33,7 @@ struct TSVectorParseStateData int eml; /* max bytes per character */ bool oprisdelim; /* treat ! | * ( ) as delimiters? */ bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */ + bool is_web; /* we're in websearch_to_tsquery() */ }; @@ -42,7 +43,7 @@ struct TSVectorParseStateData * ! | & ( ) */ TSVectorParseState -init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery) +init_tsvector_parser(char *input, int flags) { TSVectorParseState state; @@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery) state->len = 32; state->word = (char *) palloc(state->len); state->eml = pg_database_encoding_max_length(); - state->oprisdelim = oprisdelim; - state->is_tsquery = is_tsquery; + state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0; + state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0; + state->is_web = (flags & P_TSV_IS_WEB) != 0; return state; } @@ -89,16 +91,6 @@ do { \ } \ } while (0) -/* phrase operator begins with '<' */ -#define ISOPERATOR(x) \ - ( pg_mblen(x) == 1 && ( *(x) == '!' || \ - *(x) == '&' || \ - *(x) == '|' || \ - *(x) == '(' || \ - *(x) == ')' || \ - *(x) == '<' \ - ) ) - /* Fills gettoken_tsvector's output parameters, and returns true */ #define RETURN_TOKEN \ do { \ @@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state, { if (*(state->prsbuf) == '\0') return false; - else if (t_iseq(state->prsbuf, '\'')) + else if (!state->is_web && t_iseq(state->prsbuf, '\'')) statecode = WAITENDCMPLX; - else if (t_iseq(state->prsbuf, '\\')) + else if (!state->is_web && t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } - else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) + else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) || + (state->is_web && t_iseq(state->prsbuf, '"'))) PRSSYNTAXERROR; else if (!t_isspace(state->prsbuf)) { @@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == WAITENDWORD) { - if (t_iseq(state->prsbuf, '\\')) + if (!state->is_web && t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || - (state->oprisdelim && ISOPERATOR(state->prsbuf))) + (state->oprisdelim && ISOPERATOR(state->prsbuf)) || + (state->is_web && t_iseq(state->prsbuf, '"'))) { RESIZEPRSBUF; if (curpos == state->word) @@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == WAITENDCMPLX) { - if (t_iseq(state->prsbuf, '\'')) + if (!state->is_web && t_iseq(state->prsbuf, '\'')) { statecode = WAITCHARCMPLX; } - else if (t_iseq(state->prsbuf, '\\')) + else if (!state->is_web && t_iseq(state->prsbuf, '\\')) { statecode = WAITNEXTCHAR; oldstate = WAITENDCMPLX; @@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == WAITCHARCMPLX) { - if (t_iseq(state->prsbuf, '\'')) + if (!state->is_web && t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; COPYCHAR(curpos, state->prsbuf); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index bfc90098f8..00f1a85ae7 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4950,6 +4950,8 @@ DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s DESCR("transform to tsquery"); DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 8889 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery_byid _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ )); DESCR("transform to tsvector"); DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ )); @@ -4958,6 +4960,8 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s DESCR("transform to tsquery"); DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 8890 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ )); DESCR("transform jsonb to tsvector"); DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ )); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index f8ddce5ecb..73e969fe9c 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -25,9 +25,11 @@ struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */ typedef struct TSVectorParseStateData *TSVectorParseState; -extern TSVectorParseState init_tsvector_parser(char *input, - bool oprisdelim, - bool is_tsquery); +#define P_TSV_OPR_IS_DELIM (1 << 0) +#define P_TSV_IS_TSQUERY (1 << 1) +#define P_TSV_IS_WEB (1 << 2) + +extern TSVectorParseState init_tsvector_parser(char *input, int flags); extern void reset_tsvector_parser(TSVectorParseState state, char *input); extern bool gettoken_tsvector(TSVectorParseState state, char **token, int *len, @@ -35,6 +37,16 @@ extern bool gettoken_tsvector(TSVectorParseState state, char **endptr); extern void close_tsvector_parser(TSVectorParseState state); +/* phrase operator begins with '<' */ +#define ISOPERATOR(x) \ + ( pg_mblen(x) == 1 && ( *(x) == '!' || \ + *(x) == '&' || \ + *(x) == '|' || \ + *(x) == '(' || \ + *(x) == ')' || \ + *(x) == '<' \ + ) ) + /* parse_tsquery */ struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */ @@ -46,9 +58,13 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state, * QueryOperand struct */ bool prefix); +#define P_TSQ_PLAIN (1 << 0) +#define P_TSQ_WEB (1 << 1) + extern TSQuery parse_tsquery(char *buf, - PushFunction pushval, - Datum opaque, bool isplain); + PushFunction pushval, + Datum opaque, + int flags); /* Functions for use by PushFunction implementations */ extern void pushValue(TSQueryParserState state, diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index d63fb12f1d..2b1da308df 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -1672,3 +1672,325 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca (1 row) set enable_seqscan = on; +-- test websearch_to_tsquery function +select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat'); + websearch_to_tsquery +--------------------------------------------- + 'i' & 'have' & 'a' & 'fat' & 'abcd' & 'cat' +(1 row) + +select websearch_to_tsquery('simple', 'orange:**AABBCCDD'); + websearch_to_tsquery +----------------------- + 'orange' & 'aabbccdd' +(1 row) + +select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<'); + websearch_to_tsquery +----------------------------------------- + 'fat' & 'a' & 'cat' & 'b' & 'rat' & 'c' +(1 row) + +select websearch_to_tsquery('simple', 'fat:A : cat:B'); + websearch_to_tsquery +--------------------------- + 'fat' & 'a' & 'cat' & 'b' +(1 row) + +select websearch_to_tsquery('simple', 'abc : def'); + websearch_to_tsquery +---------------------- + 'abc' & 'def' +(1 row) + +select websearch_to_tsquery('simple', 'abc:def'); + websearch_to_tsquery +---------------------- + 'abc' & 'def' +(1 row) + +select websearch_to_tsquery('simple', 'a:::b'); + websearch_to_tsquery +---------------------- + 'a' & 'b' +(1 row) + +select websearch_to_tsquery('simple', ':'); +NOTICE: text-search query doesn't contain lexemes: ":" + websearch_to_tsquery +---------------------- + +(1 row) + +select websearch_to_tsquery('english', 'My brand new smartphone'); + websearch_to_tsquery +------------------------------- + 'brand' & 'new' & 'smartphon' +(1 row) + +select websearch_to_tsquery('english', 'My brand "new smartphone"'); + websearch_to_tsquery +--------------------------------- + 'brand' & 'new' <-> 'smartphon' +(1 row) + +select websearch_to_tsquery('english', 'My brand "new -smartphone"'); + websearch_to_tsquery +--------------------------------- + 'brand' & 'new' <-> 'smartphon' +(1 row) + +select websearch_to_tsquery('english', 'My brand:B "new -smartphone"'); + websearch_to_tsquery +--------------------------------------- + 'brand' & 'b' & 'new' <-> 'smartphon' +(1 row) + +select websearch_to_tsquery('simple', 'cat or rat'); + websearch_to_tsquery +---------------------- + 'cat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'cat OR rat'); + websearch_to_tsquery +---------------------- + 'cat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'cat "OR" rat'); + websearch_to_tsquery +---------------------- + 'cat' & 'or' & 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'cat OR'); + websearch_to_tsquery +---------------------- + 'cat' & 'or' +(1 row) + +select websearch_to_tsquery('simple', 'OR rat'); + websearch_to_tsquery +---------------------- + 'or' & 'rat' +(1 row) + +select websearch_to_tsquery('simple', '"fat cat OR rat"'); + websearch_to_tsquery +------------------------------------ + 'fat' <-> 'cat' <-> 'or' <-> 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat (cat OR rat'); + websearch_to_tsquery +----------------------- + 'fat' & 'cat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat*rat'); + websearch_to_tsquery +---------------------- + 'fat' & 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat-rat'); + websearch_to_tsquery +--------------------------- + 'fat-rat' & 'fat' & 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat_rat'); + websearch_to_tsquery +---------------------- + 'fat' & 'rat' +(1 row) + +-- OR is an operator here ... +select websearch_to_tsquery('simple', '"fat cat"or"fat rat"'); + websearch_to_tsquery +----------------------------------- + 'fat' <-> 'cat' | 'fat' <-> 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat or(rat'); + websearch_to_tsquery +---------------------- + 'fat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat or)rat'); + websearch_to_tsquery +---------------------- + 'fat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat or&rat'); + websearch_to_tsquery +---------------------- + 'fat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat or|rat'); + websearch_to_tsquery +---------------------- + 'fat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat or!rat'); + websearch_to_tsquery +---------------------- + 'fat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat orrat'); + websearch_to_tsquery +---------------------- + 'fat' | 'rat' +(1 row) + +select websearch_to_tsquery('simple', 'fat or '); + websearch_to_tsquery +---------------------- + 'fat' +(1 row) + +-- ... but not here +select websearch_to_tsquery('simple', 'abc orange'); + websearch_to_tsquery +---------------------- + 'abc' & 'orange' +(1 row) + +select websearch_to_tsquery('simple', 'abc orтест'); + websearch_to_tsquery +---------------------- + 'abc' & 'orтест' +(1 row) + +select websearch_to_tsquery('simple', 'abc OR1234'); + websearch_to_tsquery +---------------------- + 'abc' & 'or1234' +(1 row) + +select websearch_to_tsquery('simple', 'abc or-abc'); + websearch_to_tsquery +--------------------------------- + 'abc' & 'or-abc' & 'or' & 'abc' +(1 row) + +select websearch_to_tsquery('simple', 'abc OR_abc'); + websearch_to_tsquery +---------------------- + 'abc' & 'or' & 'abc' +(1 row) + +select websearch_to_tsquery('simple', 'abc or'); + websearch_to_tsquery +---------------------- + 'abc' & 'or' +(1 row) + +select websearch_to_tsquery('simple', 'or OR or'); + websearch_to_tsquery +---------------------- + 'or' | 'or' +(1 row) + +select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.'); + websearch_to_tsquery +------------------------------------ + 'fat' <-> 'cat' & 'eaten' & !'rat' +(1 row) + +select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.'); + websearch_to_tsquery +----------------------------------- + 'fat' <-> 'cat' & 'eaten' | 'rat' +(1 row) + +select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)'); + websearch_to_tsquery +------------------------------------ + 'fat' <-> 'cat' & 'eaten' | !'rat' +(1 row) + +select websearch_to_tsquery('english', 'this is ----fine'); + websearch_to_tsquery +---------------------- + !!!!'fine' +(1 row) + +select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good'); + websearch_to_tsquery +---------------------------------------- + !'fine' & 'dear' <-> 'friend' | 'good' +(1 row) + +select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too'); + websearch_to_tsquery +------------------------ + 'old' & 'cat' & 'fine' +(1 row) + +select websearch_to_tsquery('english', '"A the" OR just on'); +NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored + websearch_to_tsquery +---------------------- + +(1 row) + +select websearch_to_tsquery('russian', '"толстая кошка" съела крысу'); + websearch_to_tsquery +-------------------------------------- + 'толст' <-> 'кошк' & 'съел' & 'крыс' +(1 row) + +select to_tsvector('russian', 'съела толстая кошка крысу') @@ +websearch_to_tsquery('russian', '"толстая кошка" съела крысу'); + ?column? +---------- + t +(1 row) + +select to_tsvector('russian', 'съела толстая серая кошка крысу') @@ +websearch_to_tsquery('russian', '"толстая кошка" съела крысу'); + ?column? +---------- + f +(1 row) + +-- cases handled by gettoken_tsvector() +select websearch_to_tsquery(''''); +NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored + websearch_to_tsquery +---------------------- + +(1 row) + +select websearch_to_tsquery('''abc''''def'''); + websearch_to_tsquery +---------------------- + 'abc' & 'def' +(1 row) + +select websearch_to_tsquery('\abc'); + websearch_to_tsquery +---------------------- + 'abc' +(1 row) + +select websearch_to_tsquery('\'); +NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored + websearch_to_tsquery +---------------------- + +(1 row) + diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 1c8520b3e9..da8d089100 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -539,3 +539,75 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts); set enable_seqscan = off; select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat'); set enable_seqscan = on; + +-- test websearch_to_tsquery function +select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat'); +select websearch_to_tsquery('simple', 'orange:**AABBCCDD'); +select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<'); +select websearch_to_tsquery('simple', 'fat:A : cat:B'); + +select websearch_to_tsquery('simple', 'abc : def'); +select websearch_to_tsquery('simple', 'abc:def'); +select websearch_to_tsquery('simple', 'a:::b'); +select websearch_to_tsquery('simple', ':'); + +select websearch_to_tsquery('english', 'My brand new smartphone'); +select websearch_to_tsquery('english', 'My brand "new smartphone"'); +select websearch_to_tsquery('english', 'My brand "new -smartphone"'); +select websearch_to_tsquery('english', 'My brand:B "new -smartphone"'); + +select websearch_to_tsquery('simple', 'cat or rat'); +select websearch_to_tsquery('simple', 'cat OR rat'); +select websearch_to_tsquery('simple', 'cat "OR" rat'); +select websearch_to_tsquery('simple', 'cat OR'); +select websearch_to_tsquery('simple', 'OR rat'); + +select websearch_to_tsquery('simple', '"fat cat OR rat"'); +select websearch_to_tsquery('simple', 'fat (cat OR rat'); +select websearch_to_tsquery('simple', 'fat*rat'); +select websearch_to_tsquery('simple', 'fat-rat'); +select websearch_to_tsquery('simple', 'fat_rat'); + +-- OR is an operator here ... +select websearch_to_tsquery('simple', '"fat cat"or"fat rat"'); +select websearch_to_tsquery('simple', 'fat or(rat'); +select websearch_to_tsquery('simple', 'fat or)rat'); +select websearch_to_tsquery('simple', 'fat or&rat'); +select websearch_to_tsquery('simple', 'fat or|rat'); +select websearch_to_tsquery('simple', 'fat or!rat'); +select websearch_to_tsquery('simple', 'fat orrat'); +select websearch_to_tsquery('simple', 'fat or '); + +-- ... but not here +select websearch_to_tsquery('simple', 'abc orange'); +select websearch_to_tsquery('simple', 'abc orтест'); +select websearch_to_tsquery('simple', 'abc OR1234'); +select websearch_to_tsquery('simple', 'abc or-abc'); +select websearch_to_tsquery('simple', 'abc OR_abc'); +select websearch_to_tsquery('simple', 'abc or'); + +select websearch_to_tsquery('simple', 'or OR or'); + +select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.'); +select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.'); +select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)'); + +select websearch_to_tsquery('english', 'this is ----fine'); +select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good'); +select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too'); + +select websearch_to_tsquery('english', '"A the" OR just on'); +select websearch_to_tsquery('russian', '"толстая кошка" съела крысу'); + +select to_tsvector('russian', 'съела толстая кошка крысу') @@ +websearch_to_tsquery('russian', '"толстая кошка" съела крысу'); + +select to_tsvector('russian', 'съела толстая серая кошка крысу') @@ +websearch_to_tsquery('russian', '"толстая кошка" съела крысу'); + +-- cases handled by gettoken_tsvector() +select websearch_to_tsquery(''''); +select websearch_to_tsquery('''abc''''def'''); +select websearch_to_tsquery('\abc'); +select websearch_to_tsquery('\');