diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index cf55e39..2972540 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -390,7 +390,8 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len) * and different variants are ORed together. */ static void -pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix) +pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, + int16 weight, bool prefix, bool isphrase) { int32 count = 0; ParsedText prs; @@ -423,7 +424,12 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, /* put placeholders for each missing stop word */ pushStop(state); if (cntpos) - pushOperator(state, data->qoperator, 1); + { + if (isphrase) + pushOperator(state, OP_PHRASE, 1); + else + pushOperator(state, data->qoperator, 1); + } cntpos++; pos++; } @@ -464,7 +470,10 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, if (cntpos) { /* distance may be useful */ - pushOperator(state, data->qoperator, 1); + if (isphrase) + pushOperator(state, OP_PHRASE, 1); + else + pushOperator(state, data->qoperator, 1); } cntpos++; @@ -490,6 +499,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS) query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), + false, false); PG_RETURN_TSQUERY(query); @@ -520,7 +530,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS) query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), - true); + true, + false); PG_RETURN_POINTER(query); } @@ -551,7 +562,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS) query = parse_tsquery(text_to_cstring(in), pushval_morph, PointerGetDatum(&data), - true); + true, + false); PG_RETURN_TSQUERY(query); } @@ -567,3 +579,36 @@ phraseto_tsquery(PG_FUNCTION_ARGS) ObjectIdGetDatum(cfgId), PointerGetDatum(in))); } + +Datum +queryto_tsquery_byid(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_PP(1); + MorphOpaque data; + TSQuery query = NULL; + + data.cfg_id = PG_GETARG_OID(0); + + data.qoperator = OP_AND; + + query = parse_tsquery(text_to_cstring(in), + pushval_morph, + PointerGetDatum(&data), + false, + true); + + PG_RETURN_TSQUERY(query); +} + +Datum +queryto_tsquery(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_PP(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(queryto_tsquery_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(in))); + +} \ No newline at end of file diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 07d56b0..53c9fc7 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -63,6 +63,7 @@ struct TSQueryParserStateData #define WAITOPERATOR 2 #define WAITFIRSTOPERAND 3 #define WAITSINGLEOPERAND 4 +#define WAITQUOTE 5 /* * subroutine to parse the modifiers (weight and prefix flag currently) @@ -285,7 +286,8 @@ parse_around_operator(char *buf, int16 *distance) static ts_tokentype gettoken_query(TSQueryParserState state, int8 *operator, - int *lenval, char **strval, int16 *weight, bool *prefix) + int *lenval, char **strval, int16 *weight, bool *prefix, + bool isquery) { *weight = 0; *prefix = false; @@ -296,7 +298,8 @@ gettoken_query(TSQueryParserState state, { case WAITFIRSTOPERAND: case WAITOPERAND: - if (t_iseq(state->buf, '!')) + if (t_iseq(state->buf, '!') || + (isquery && t_iseq(state->buf, '-'))) { (state->buf)++; /* can safely ++, t_iseq guarantee that * pg_mblen()==1 */ @@ -318,6 +321,20 @@ gettoken_query(TSQueryParserState state, errmsg("syntax error in tsquery: \"%s\"", state->buffer))); } + else if (isquery && t_iseq(state->buf, '"')) + { + char *quote = strchr(state->buf + 1, '"'); + if (quote == NULL) + { + state->buf++; + continue; + } + *strval = state->buf + 1; + *lenval = quote - state->buf - 1; + state->buf = quote + 1; + state->state = WAITQUOTE; + return PT_VAL; + } else if (!t_isspace(state->buf)) { /* @@ -355,6 +372,13 @@ gettoken_query(TSQueryParserState state, (state->buf)++; return PT_OPR; } + else if (isquery && has_prefix(state->buf, "OR ")) + { + state->state = WAITOPERAND; + *operator = OP_OR; + (state->buf) += 3; + return PT_OPR; + } else if (t_iseq(state->buf, '<')) { state->state = WAITOPERAND; @@ -365,7 +389,7 @@ gettoken_query(TSQueryParserState state, return PT_ERR; return PT_OPR; } - else if (has_prefix(state->buf, "AROUND(")) + else if (isquery && has_prefix(state->buf, "AROUND(")) { state->state = WAITOPERAND; *operator = OP_AROUND; @@ -381,8 +405,23 @@ gettoken_query(TSQueryParserState state, state->count--; return (state->count < 0) ? PT_ERR : PT_CLOSE; } + else if (t_iseq(state->buf, '(')) + { + *operator = OP_AND; + state->state = WAITOPERAND; + return PT_OPR; + } else if (*(state->buf) == '\0') return (state->count) ? PT_ERR : PT_END; + else if (isquery && + (t_isalpha(state->buf) || t_iseq(state->buf, '!') + || t_iseq(state->buf, '-') + || t_iseq(state->buf, '"'))) + { + state->state = WAITOPERAND; + *operator = OP_AND; + return PT_OPR; + } else if (!t_isspace(state->buf)) return PT_ERR; break; @@ -394,6 +433,9 @@ gettoken_query(TSQueryParserState state, state->buf += strlen(state->buf); state->count++; return PT_VAL; + case WAITQUOTE: + state->state = WAITOPERATOR; + continue; default: return PT_ERR; break; @@ -550,7 +592,8 @@ cleanOpStack(TSQueryParserState state, static void makepol(TSQueryParserState state, PushFunction pushval, - Datum opaque) + Datum opaque, + bool isquery) { int8 operator = 0; ts_tokentype type; @@ -564,19 +607,20 @@ makepol(TSQueryParserState state, /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); - while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END) + while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, + &prefix, isquery)) != PT_END) { switch (type) { case PT_VAL: - pushval(opaque, state, strval, lenval, weight, prefix); + pushval(opaque, state, strval, lenval, weight, prefix, state->state == WAITQUOTE); break; case PT_OPR: cleanOpStack(state, opstack, &lenstack, operator); pushOpStack(opstack, &lenstack, operator, weight); break; case PT_OPEN: - makepol(state, pushval, opaque); + makepol(state, pushval, opaque, isquery); break; case PT_CLOSE: cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); @@ -681,7 +725,8 @@ TSQuery parse_tsquery(char *buf, PushFunction pushval, Datum opaque, - bool isplain) + bool isplain, + bool isquery) { struct TSQueryParserStateData state; int i; @@ -708,7 +753,7 @@ parse_tsquery(char *buf, *(state.curop) = '\0'; /* parse query & make polish notation (postfix, but in reverse order) */ - makepol(&state, pushval, opaque); + makepol(&state, pushval, opaque, isquery); close_tsvector_parser(state.valstate); @@ -779,7 +824,7 @@ parse_tsquery(char *buf, static void pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval, - int16 weight, bool prefix) + int16 weight, bool prefix, bool isphrase) { pushValue(state, strval, lenval, weight, prefix); } @@ -792,7 +837,7 @@ tsqueryin(PG_FUNCTION_ARGS) { char *in = PG_GETARG_CSTRING(0); - PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false)); + PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false, false)); } /* diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c index e679bb5..bb7fbd5 100644 --- a/src/backend/utils/adt/tsquery_cleanup.c +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -278,7 +278,7 @@ clean_stopword_intree(NODE *node, int *ladd, int *radd) node->right = clean_stopword_intree(node->right, &rladd, &rradd); /* Check if current node is OP_PHRASE, get its distance */ - isphrase = (node->valnode->qoperator.oper == OP_PHRASE || + isphrase = (node->valnode->qoperator.oper == OP_PHRASE || node->valnode->qoperator.oper == OP_AROUND); ndistance = isphrase ? node->valnode->qoperator.distance : 0; diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index ce7ee25..e8bc9eb 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -1715,7 +1715,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, { /* straight AND */ return TS_phrase_output(data, &Ldata, &Rdata, - TSPO_BOTH | + TSPO_BOTH | (curitem->qoperator.oper == OP_AROUND ? TSPO_NOT_EXAC : 0), Loffset, Roffset, Min(Ldata.npos, Rdata.npos)); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index c969375..9abde15 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4906,6 +4906,8 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 DESCR("make tsquery"); DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 8889 ( queryto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ queryto_tsquery_byid _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ )); DESCR("transform to tsquery"); DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ )); @@ -4914,6 +4916,8 @@ DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 DESCR("make tsquery"); DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 8890 ( queryto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ queryto_tsquery _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ )); DESCR("transform to tsquery"); DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ )); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index 782548c..ffb1762 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -44,11 +44,12 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state, char *token, int tokenlen, int16 tokenweights, /* bitmap as described in * QueryOperand struct */ - bool prefix); + bool prefix, + bool isphrase); extern TSQuery parse_tsquery(char *buf, PushFunction pushval, - Datum opaque, bool isplain); + Datum opaque, bool isplain, bool isquery); /* Functions for use by PushFunction implementations */ extern void pushValue(TSQueryParserState state, diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index d63fb12..2eb410b 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -1672,3 +1672,115 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca (1 row) set enable_seqscan = on; + +--test queryto_tsquery function +select queryto_tsquery('My brand new smartphone'); + queryto_tsquery +------------------------------- + 'brand' & 'new' & 'smartphon' +(1 row) + +select queryto_tsquery('My brand "new smartphone"'); + queryto_tsquery +--------------------------------- + 'brand' & 'new' <-> 'smartphon' +(1 row) + +select queryto_tsquery('"A fat cat" has just eaten a -rat.'); + queryto_tsquery +------------------------------------ + 'fat' <-> 'cat' & 'eaten' & !'rat' +(1 row) + +select queryto_tsquery('"A fat cat" has just eaten OR -rat.'); + queryto_tsquery +------------------------------------ + 'fat' <-> 'cat' & 'eaten' | !'rat' +(1 row) + +select queryto_tsquery('"A fat cat" has just (eaten OR -rat)'); + queryto_tsquery +---------------------------------------- + 'fat' <-> 'cat' & ( 'eaten' | !'rat' ) +(1 row) + +-- testing AROUND operator evaluation +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"gnu debugger" AROUND(5) runs'); + ?column? +---------- + f +(1 row) + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('run AROUND(5) "gnu debugger"'); + ?column? +---------- + f +(1 row) + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"gnu debugger" AROUND(6) runs'); + ?column? +---------- + t +(1 row) + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('run AROUND(6) "gnu debugger"'); + ?column? +---------- + t +(1 row) + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"many programming languages" AROUND(10) "portable debugger"'); + ?column? +---------- + f +(1 row) + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"portable debugger" AROUND(10) "many programming languages"'); + ?column? +---------- + f +(1 row) + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"many programming languages" AROUND(11) "portable debugger"'); + ?column? +---------- + t +(1 row) + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"portable debugger" AROUND(11) "many programming languages"'); + ?column? +---------- + t +(1 row) + +select queryto_tsquery('"fat cat AROUND(5) rat"'); + queryto_tsquery +------------------------------------------------ + 'fat' <-> 'cat' <-> 'around' <-> '5' <-> 'rat' +(1 row) + +select queryto_tsquery('simple','"fat cat OR rat"'); + queryto_tsquery +------------------------------------ + 'fat' <-> 'cat' <-> 'or' <-> 'rat' +(1 row) + +select queryto_tsquery('fat*rat'); + queryto_tsquery +----------------- + 'fat' & 'rat' +(1 row) + +select queryto_tsquery('fat-rat'); + queryto_tsquery +--------------------------- + 'fat-rat' & 'fat' & 'rat' +(1 row) diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 1c8520b..65e71da 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -539,3 +539,34 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts); set enable_seqscan = off; select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat'); set enable_seqscan = on; + +--test queryto_tsquery function +select queryto_tsquery('My brand new smartphone'); +select queryto_tsquery('My brand "new smartphone"'); +select queryto_tsquery('"A fat cat" has just eaten a -rat.'); +select queryto_tsquery('"A fat cat" has just eaten OR -rat.'); +select queryto_tsquery('"A fat cat" has just (eaten OR -rat)'); + +-- testing AROUND operator evaluation +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"gnu debugger" AROUND(5) runs'); +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('run AROUND(5) "gnu debugger"'); +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"gnu debugger" AROUND(6) runs'); +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('run AROUND(6) "gnu debugger"'); + +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"many programming languages" AROUND(10) "portable debugger"'); +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"portable debugger" AROUND(10) "many programming languages"'); +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"many programming languages" AROUND(11) "portable debugger"'); +select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@ +queryto_tsquery('"portable debugger" AROUND(11) "many programming languages"'); + +select queryto_tsquery('"fat cat AROUND(5) rat"'); +select queryto_tsquery('simple','"fat cat OR rat"'); +select queryto_tsquery('fat*rat'); +select queryto_tsquery('fat-rat'); \ No newline at end of file