diff --git a/doc/src/sgml/ref/alter_tsconfig.sgml b/doc/src/sgml/ref/alter_tsconfig.sgml index b44aac9..ddbe4e4 100644 --- a/doc/src/sgml/ref/alter_tsconfig.sgml +++ b/doc/src/sgml/ref/alter_tsconfig.sgml @@ -22,8 +22,12 @@ PostgreSQL documentation ALTER TEXT SEARCH CONFIGURATION name + ADD MAPPING FOR token_type [, ... ] WITH dictionary_expression +ALTER TEXT SEARCH CONFIGURATION name ADD MAPPING FOR token_type [, ... ] WITH dictionary_name [, ... ] ALTER TEXT SEARCH CONFIGURATION name + ALTER MAPPING FOR token_type [, ... ] WITH dictionary_expression +ALTER TEXT SEARCH CONFIGURATION name ALTER MAPPING FOR token_type [, ... ] WITH dictionary_name [, ... ] ALTER TEXT SEARCH CONFIGURATION name ALTER MAPPING REPLACE old_dictionary WITH new_dictionary @@ -89,6 +93,16 @@ ALTER TEXT SEARCH CONFIGURATION name SET SCHEMA + dictionary_expression + + + The expression of dictionaries tree. The dctionary expression + is a list of condition/command pairs that define way to process text. + + + + + old_dictionary @@ -133,7 +147,7 @@ ALTER TEXT SEARCH CONFIGURATION name SET SCHEMA - + The ADD MAPPING FOR form installs a list of dictionaries to be @@ -155,6 +169,64 @@ ALTER TEXT SEARCH CONFIGURATION name SET SCHEMA + Dictionaries expression + + + Format + + CASE + WHEN condition THEN command + [ WHEN condition THEN command ] + [ ELSE command ] + END + + + A condition is + + + + dictionary_name [IS [NOT] {NULL|STOPWORD}] [ {AND|OR} ... ] + or + (dictionary_name MAP BY dictionary_name) IS [NOT] {NULL|STOPWORD} [ {AND|OR} ... ] + + + + And command is: + + + + dictionary_name [ {UNION|INTERSECT|EXCEPT|MAP BY} ... ] + + + + + Condition + + Condition used to determine a command for token processing. A condition is + boolean expression. A dictionary can be tested for NULL-output + or stop-word output via options IS [NOT] {NULL|STOPWORD}. If none + of test options is mentied (dictionary_name without additional + options) it is tested for both not NULL and not stop word output. + + + + + Command + + A command describes how PostgreSQL should build + a result set for current token. Output of each dictionary is set of lexemes. + Result of dictionaries can be combined with help of operators + UNION, EXCEPT, INTERSECT and a special + operator MAP BY. MAP BY operator uses output of + right subexpression as an input for left subexpression. If right subexpression + output is NULL, initial token is used instead. If the output contains + multiple lexemes, each lexeme used as token for left subexpression + independently and final results is combined. + + + + + Examples diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 7b4912d..58bf43a 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -732,10 +732,11 @@ SELECT to_tsvector('english', 'a fat cat sat on a mat - it ate a fat rats'); The to_tsvector function internally calls a parser which breaks the document text into tokens and assigns a type to each token. For each token, a list of - dictionaries () is consulted, - where the list can vary depending on the token type. The first dictionary - that recognizes the token emits one or more normalized - lexemes to represent the token. For example, + condition/command pairs is consulted, where the list can vary depending + on the token type, condition and command are logical and set expressions + on dictionaries() respectively. + The first pair with true-resulted condition emits one or more normalized + lexemes to represent the token based on command. For example, rats became rat because one of the dictionaries recognized that the word rats is a plural form of rat. Some words are recognized as @@ -743,7 +744,7 @@ SELECT to_tsvector('english', 'a fat cat sat on a mat - it ate a fat rats'); causes them to be ignored since they occur too frequently to be useful in searching. In our example these are a, on, and it. - If no dictionary in the list recognizes the token then it is also ignored. + If none of conditions is true the token then is also ignored. In this example that happened to the punctuation sign - because there are in fact no dictionaries assigned for its token type (Space symbols), meaning space tokens will never be @@ -2232,7 +2233,9 @@ SELECT alias, description, token FROM ts_debug('http://example.com/stuff/index.h a single lexeme with the TSL_FILTER flag set, to replace the original token with a new token to be passed to subsequent dictionaries (a dictionary that does this is called a - filtering dictionary) + filtering dictionary). This behavior is applicable only + with comma-separated configuration + (see for more information) @@ -2264,38 +2267,85 @@ SELECT alias, description, token FROM ts_debug('http://example.com/stuff/index.h type that the parser can return, a separate list of dictionaries is specified by the configuration. When a token of that type is found by the parser, each dictionary in the list is consulted in turn, - until some dictionary recognizes it as a known word. If it is identified - as a stop word, or if no dictionary recognizes the token, it will be - discarded and not indexed or searched for. - Normally, the first dictionary that returns a non-NULL - output determines the result, and any remaining dictionaries are not - consulted; but a filtering dictionary can replace the given word - with a modified word, which is then passed to subsequent dictionaries. + until command is not selected based on it's condition. If none of cases is + selected it will be discarded and not indexed or searched for. - The general rule for configuring a list of dictionaries + A list of cases is described as condition/command pairs. Each condition is + evaluated in order to select appropriate command to generate resulted set + of lexems. + + + + A condition is a boolean expression with dictionaries used as operands and + basic logic operators AND, OR, NOT and + special operator MAP BY. In addition to operators, each operand + could contain IS [NOT] NULL or IS [NOT] STOPWORD option + to mark way to interpret lexemes as boolean value. If no options are mentioned + it is interpret as dictionary IS NOT NULL AND dictionary IS NOT STOPWORD. + + Special operator MAP BY is used to use output of right-hand + subexpression as input for left-hand one. In condition left and right + subexpressions can be either another MAP BY expression or + dictionary expression. Result of MAP BY should be explicitly + makred for boolean interpretation. + + + + A command is a set expression with dictionaries used as operands and basic + set operators UNION, EXCEPT, INTERSECT + and special operator MAP BY. The behavior of MAP BY + operator is similar to one in condition but without restrictions on content + of subexpressions since all operators operates on sets. + + + + The general rule for configuring a list of condition/command pairs is to place first the most narrow, most specific dictionary, then the more - general dictionaries, finishing with a very general dictionary, like + general dictionaries, finishing with a very general dictionaries, like a Snowball stemmer or simple, which - recognizes everything. For example, for an astronomy-specific search + recognizes everything. For example, for an astronomy-specific search (astro_en configuration) one could bind token type asciiword (ASCII word) to a synonym dictionary of astronomical terms, a general English dictionary and a Snowball English - stemmer: + stemmer via comma-separated variant of mapping: ALTER TEXT SEARCH CONFIGURATION astro_en ADD MAPPING FOR asciiword WITH astrosyn, english_ispell, english_stem; + + Another example is a configuration for both english and german languages via + operator-separated variant of mapping: + + +ALTER TEXT SEARCH CONFIGURATION multi_en_de + ADD MAPPING FOR asciiword, word WITH + CASE + WHEN english_ispell AND german_ispell THEN + english_ispell UNION german_ispell + WHEN english_ispell THEN + english_ispell UNION german_stem + WHEN german_ispell THEN + german_ispell UNION english_stem + ELSE + english_stem UNION german_stem + END; + + - A filtering dictionary can be placed anywhere in the list, except at the - end where it'd be useless. Filtering dictionaries are useful to partially + A filtering dictionary can be placed anywhere in comma-separated list, + except at the end where it'd be useless. + Filtering dictionaries are useful to partially normalize words to simplify the task of later dictionaries. For example, a filtering dictionary could be used to remove accents from accented letters, as is done by the module. + Otherwise filter dictionary should be placed at righthand of MAP BY + operator. If filter dictionary returns NULL it pass initial token + further in processing chain. @@ -2462,9 +2512,9 @@ SELECT ts_lexize('public.simple_dict','The'); SELECT * FROM ts_debug('english', 'Paris'); - alias | description | token | dictionaries | dictionary | lexemes ------------+-----------------+-------+----------------+--------------+--------- - asciiword | Word, all ASCII | Paris | {english_stem} | english_stem | {pari} + alias | description | token | dictionaries | command | lexemes +-----------+-----------------+-------+--------------+--------------+--------- + asciiword | Word, all ASCII | Paris | english_stem | english_stem | {pari} CREATE TEXT SEARCH DICTIONARY my_synonym ( TEMPLATE = synonym, @@ -2476,9 +2526,9 @@ ALTER TEXT SEARCH CONFIGURATION english WITH my_synonym, english_stem; SELECT * FROM ts_debug('english', 'Paris'); - alias | description | token | dictionaries | dictionary | lexemes ------------+-----------------+-------+---------------------------+------------+--------- - asciiword | Word, all ASCII | Paris | {my_synonym,english_stem} | my_synonym | {paris} + alias | description | token | dictionaries | command | lexemes +-----------+-----------------+-------+-------------------------+------------+--------- + asciiword | Word, all ASCII | Paris | my_synonym,english_stem | my_synonym | {paris} @@ -3107,6 +3157,20 @@ CREATE TEXT SEARCH DICTIONARY english_ispell ( ALTER TEXT SEARCH CONFIGURATION pg ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, word, hword, hword_part + WITH + CASE + WHEN pg_dict IS NOT NULL THEN pg_dict + WHEN english_ispell THEN english_ispell + ELSE english_stem + END; + + + Or use alternative comma-separated syntax: + + +ALTER TEXT SEARCH CONFIGURATION pg + ALTER MAPPING FOR asciiword, asciihword, hword_asciipart, + word, hword, hword_part WITH pg_dict, english_ispell, english_stem; @@ -3177,13 +3241,13 @@ SHOW default_text_search_config; -ts_debug( config regconfig, document text, - OUT alias text, - OUT description text, - OUT token text, - OUT dictionaries regdictionary[], - OUT dictionary regdictionary, - OUT lexemes text[]) +ts_debug( config regconfig, document text, + OUT alias text, + OUT description text, + OUT token text, + OUT dictionaries text, + OUT command text, + OUT lexemes text[]) returns setof record @@ -3220,20 +3284,20 @@ ts_debug( config re - dictionaries regdictionary[] — the - dictionaries selected by the configuration for this token type + dictionaries text — the + dictionaries defined by the configuration for this token type - dictionary regdictionary — the dictionary - that recognized the token, or NULL if none did + command text — the command that describes + the way to generate output lexemes text[] — the lexeme(s) produced - by the dictionary that recognized the token, or NULL if + by the command selected acording conditions, or NULL if none did; an empty array ({}) means it was recognized as a stop word @@ -3246,32 +3310,32 @@ ts_debug( config re SELECT * FROM ts_debug('english','a fat cat sat on a mat - it ate a fat rats'); - alias | description | token | dictionaries | dictionary | lexemes ------------+-----------------+-------+----------------+--------------+--------- - asciiword | Word, all ASCII | a | {english_stem} | english_stem | {} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | fat | {english_stem} | english_stem | {fat} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | cat | {english_stem} | english_stem | {cat} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | sat | {english_stem} | english_stem | {sat} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | on | {english_stem} | english_stem | {} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | a | {english_stem} | english_stem | {} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | mat | {english_stem} | english_stem | {mat} - blank | Space symbols | | {} | | - blank | Space symbols | - | {} | | - asciiword | Word, all ASCII | it | {english_stem} | english_stem | {} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | ate | {english_stem} | english_stem | {ate} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | a | {english_stem} | english_stem | {} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | fat | {english_stem} | english_stem | {fat} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | rats | {english_stem} | english_stem | {rat} + alias | description | token | dictionaries | command | lexemes +-----------+-----------------+-------+--------------+--------------+--------- + asciiword | Word, all ASCII | a | english_stem | english_stem | {} + blank | Space symbols | | | | + asciiword | Word, all ASCII | fat | english_stem | english_stem | {fat} + blank | Space symbols | | | | + asciiword | Word, all ASCII | cat | english_stem | english_stem | {cat} + blank | Space symbols | | | | + asciiword | Word, all ASCII | sat | english_stem | english_stem | {sat} + blank | Space symbols | | | | + asciiword | Word, all ASCII | on | english_stem | english_stem | {} + blank | Space symbols | | | | + asciiword | Word, all ASCII | a | english_stem | english_stem | {} + blank | Space symbols | | | | + asciiword | Word, all ASCII | mat | english_stem | english_stem | {mat} + blank | Space symbols | | | | + blank | Space symbols | - | | | + asciiword | Word, all ASCII | it | english_stem | english_stem | {} + blank | Space symbols | | | | + asciiword | Word, all ASCII | ate | english_stem | english_stem | {ate} + blank | Space symbols | | | | + asciiword | Word, all ASCII | a | english_stem | english_stem | {} + blank | Space symbols | | | | + asciiword | Word, all ASCII | fat | english_stem | english_stem | {fat} + blank | Space symbols | | | | + asciiword | Word, all ASCII | rats | english_stem | english_stem | {rat} @@ -3297,13 +3361,13 @@ ALTER TEXT SEARCH CONFIGURATION public.english SELECT * FROM ts_debug('public.english','The Brightest supernovaes'); - alias | description | token | dictionaries | dictionary | lexemes ------------+-----------------+-------------+-------------------------------+----------------+------------- - asciiword | Word, all ASCII | The | {english_ispell,english_stem} | english_ispell | {} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | Brightest | {english_ispell,english_stem} | english_ispell | {bright} - blank | Space symbols | | {} | | - asciiword | Word, all ASCII | supernovaes | {english_ispell,english_stem} | english_stem | {supernova} + alias | description | token | dictionaries | command | lexemes +-----------+-----------------+-------------+-----------------------------+----------------+------------- + asciiword | Word, all ASCII | The | english_ispell,english_stem | english_ispell | {} + blank | Space symbols | | | | + asciiword | Word, all ASCII | Brightest | english_ispell,english_stem | english_ispell | {bright} + blank | Space symbols | | | | + asciiword | Word, all ASCII | supernovaes | english_ispell,english_stem | english_stem | {supernova} diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index dc40cde..74cab6b 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -944,55 +944,13 @@ GRANT SELECT (subdbid, subname, subowner, subenabled, subslotname, subpublicatio -- Tsearch debug function. Defined here because it'd be pretty unwieldy -- to put it into pg_proc.h -CREATE FUNCTION ts_debug(IN config regconfig, IN document text, - OUT alias text, - OUT description text, - OUT token text, - OUT dictionaries regdictionary[], - OUT dictionary regdictionary, - OUT lexemes text[]) -RETURNS SETOF record AS -$$ -SELECT - tt.alias AS alias, - tt.description AS description, - parse.token AS token, - ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary - FROM pg_catalog.pg_ts_config_map AS m - WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid - ORDER BY m.mapseqno ) - AS dictionaries, - ( SELECT mapdict::pg_catalog.regdictionary - FROM pg_catalog.pg_ts_config_map AS m - WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid - ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno - LIMIT 1 - ) AS dictionary, - ( SELECT pg_catalog.ts_lexize(mapdict, parse.token) - FROM pg_catalog.pg_ts_config_map AS m - WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid - ORDER BY pg_catalog.ts_lexize(mapdict, parse.token) IS NULL, m.mapseqno - LIMIT 1 - ) AS lexemes -FROM pg_catalog.ts_parse( - (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2 - ) AS parse, - pg_catalog.ts_token_type( - (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ) - ) AS tt -WHERE tt.tokid = parse.tokid -$$ -LANGUAGE SQL STRICT STABLE PARALLEL SAFE; - -COMMENT ON FUNCTION ts_debug(regconfig,text) IS - 'debug function for text search configuration'; CREATE FUNCTION ts_debug(IN document text, OUT alias text, OUT description text, OUT token text, - OUT dictionaries regdictionary[], - OUT dictionary regdictionary, + OUT dictionaries text, + OUT dictionary text, OUT lexemes text[]) RETURNS SETOF record AS $$ diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index adc7cd6..a0f1650 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -39,9 +39,12 @@ #include "nodes/makefuncs.h" #include "parser/parse_func.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_public.h" #include "tsearch/ts_utils.h" +#include "tsearch/ts_configmap.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/jsonb.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -52,6 +55,7 @@ static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, HeapTuple tup, Relation relMap); static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt, HeapTuple tup, Relation relMap); +static TSMapRuleList *ParseTSMapList(List *dictMapList); /* --------------------- TS Parser commands ------------------------ */ @@ -935,11 +939,21 @@ makeConfigurationDependencies(HeapTuple tuple, bool removeOld, while (HeapTupleIsValid((maptup = systable_getnext(scan)))) { Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + TSMapRuleList *mapdicts = JsonbToTSMap(DatumGetJsonbP(&cfgmap->mapdicts)); + Oid *dictionaryOids = TSMapGetDictionariesList(mapdicts); + Oid *currentOid = dictionaryOids; - referenced.classId = TSDictionaryRelationId; - referenced.objectId = cfgmap->mapdict; - referenced.objectSubId = 0; - add_exact_object_address(&referenced, addrs); + while (*currentOid != InvalidOid) + { + referenced.classId = TSDictionaryRelationId; + referenced.objectId = *currentOid; + referenced.objectSubId = 0; + add_exact_object_address(&referenced, addrs); + + currentOid++; + } + pfree(dictionaryOids); + pfree(mapdicts); } systable_endscan(scan); @@ -1091,8 +1105,7 @@ DefineTSConfiguration(List *names, List *parameters, ObjectAddress *copied) mapvalues[Anum_pg_ts_config_map_mapcfg - 1] = cfgOid; mapvalues[Anum_pg_ts_config_map_maptokentype - 1] = cfgmap->maptokentype; - mapvalues[Anum_pg_ts_config_map_mapseqno - 1] = cfgmap->mapseqno; - mapvalues[Anum_pg_ts_config_map_mapdict - 1] = cfgmap->mapdict; + mapvalues[Anum_pg_ts_config_map_mapdicts - 1] = JsonbPGetDatum(&cfgmap->mapdicts); newmaptup = heap_form_tuple(mapRel->rd_att, mapvalues, mapnulls); @@ -1195,7 +1208,7 @@ AlterTSConfiguration(AlterTSConfigurationStmt *stmt) relMap = heap_open(TSConfigMapRelationId, RowExclusiveLock); /* Add or drop mappings */ - if (stmt->dicts) + if (stmt->dicts || stmt->dict_map) MakeConfigurationMapping(stmt, tup, relMap); else if (stmt->tokentype) DropConfigurationMapping(stmt, tup, relMap); @@ -1271,6 +1284,105 @@ getTokenTypes(Oid prsId, List *tokennames) return res; } +static TSMapExpression * +ParseTSMapExpression(DictMapExprElem *head) +{ + TSMapExpression *result; + + if (head == NULL) + return NULL; + + result = palloc0(sizeof(TSMapExpression)); + + if (head->kind == DICT_MAP_OPERATOR) + { + result->left = ParseTSMapExpression(head->left); + result->right = ParseTSMapExpression(head->right); + result->operator = head->oper; + result->options = head->options; + } + else if (head->kind == DICT_MAP_CONST_TRUE) + { + result->left = result->right = NULL; + result->is_true = true; + result->options = result->operator = 0; + } + else /* head->kind == DICT_MAP_OPERAND */ + { + result->dictionary = get_ts_dict_oid(head->dictname, false); + result->options = head->options; + } + + return result; +} + +static TSMapRule +ParseTSMapRule(DictMapElem *elem) +{ + TSMapRule result; + + memset(&result, 0, sizeof(result)); + + result.condition.expression = ParseTSMapExpression(elem->condition); + if (elem->commandmaps) + { + result.command.ruleList = ParseTSMapList(elem->commandmaps); + result.command.is_expression = false; + result.command.expression = NULL; + } + else + { + result.command.ruleList = NULL; + result.command.is_expression = true; + result.command.expression = ParseTSMapExpression(elem->command); + } + + return result; +} + +static TSMapRuleList * +ParseTSMapList(List *dictMapList) +{ + int i; + TSMapRuleList *result; + ListCell *c; + + if (list_length(dictMapList) == 1 && ((DictMapElem *) lfirst(dictMapList->head))->dictnames) + { + DictMapElem *elem = (DictMapElem *) lfirst(dictMapList->head); + + result = palloc0(sizeof(TSMapRuleList)); + result->count = list_length(elem->dictnames); + result->data = palloc0(sizeof(TSMapRule) * result->count); + + i = 0; + foreach(c, elem->dictnames) + { + List *names = (List *) lfirst(c); + + result->data[i].dictionary = get_ts_dict_oid(names, false); + i++; + } + } + else + { + result = palloc0(sizeof(TSMapRuleList)); + result->count = list_length(dictMapList); + result->data = palloc0(sizeof(TSMapRule) * result->count); + + i = 0; + foreach(c, dictMapList) + { + List *l = (List *) lfirst(c); + + result->data[i] = ParseTSMapRule((DictMapElem *) l); + i++; + } + } + + return result; +} + /* * ALTER TEXT SEARCH CONFIGURATION ADD/ALTER MAPPING */ @@ -1287,8 +1399,9 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, Oid prsId; int *tokens, ntoken; - Oid *dictIds; - int ndict; + Oid *dictIds = NULL; + int ndict = 0; + TSMapRuleList *mapRules = NULL; ListCell *c; prsId = ((Form_pg_ts_config) GETSTRUCT(tup))->cfgparser; @@ -1327,17 +1440,23 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, /* * Convert list of dictionary names to array of dict OIDs */ - ndict = list_length(stmt->dicts); - dictIds = (Oid *) palloc(sizeof(Oid) * ndict); - i = 0; - foreach(c, stmt->dicts) + if (stmt->dicts) { - List *names = (List *) lfirst(c); + ndict = list_length(stmt->dicts); + dictIds = (Oid *) palloc(sizeof(Oid) * ndict); + i = 0; + foreach(c, stmt->dicts) + { + List *names = (List *) lfirst(c); - dictIds[i] = get_ts_dict_oid(names, false); - i++; + dictIds[i] = get_ts_dict_oid(names, false); + i++; + } } + if (stmt->dict_map) + mapRules = ParseTSMapList(stmt->dict_map); + if (stmt->replace) { /* @@ -1357,6 +1476,10 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, while (HeapTupleIsValid((maptup = systable_getnext(scan)))) { Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + Datum repl_val[Natts_pg_ts_config_map]; + bool repl_null[Natts_pg_ts_config_map]; + bool repl_repl[Natts_pg_ts_config_map]; + HeapTuple newtup; /* * check if it's one of target token types @@ -1380,25 +1503,21 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, /* * replace dictionary if match */ - if (cfgmap->mapdict == dictOld) - { - Datum repl_val[Natts_pg_ts_config_map]; - bool repl_null[Natts_pg_ts_config_map]; - bool repl_repl[Natts_pg_ts_config_map]; - HeapTuple newtup; - - memset(repl_val, 0, sizeof(repl_val)); - memset(repl_null, false, sizeof(repl_null)); - memset(repl_repl, false, sizeof(repl_repl)); - - repl_val[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictNew); - repl_repl[Anum_pg_ts_config_map_mapdict - 1] = true; - - newtup = heap_modify_tuple(maptup, - RelationGetDescr(relMap), - repl_val, repl_null, repl_repl); - CatalogTupleUpdate(relMap, &newtup->t_self, newtup); - } + mapRules = JsonbToTSMap(DatumGetJsonbP(&cfgmap->mapdicts)); + TSMapReplaceDictionary(mapRules, dictOld, dictNew); + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + + repl_val[Anum_pg_ts_config_map_mapdicts - 1] = JsonbPGetDatum(TSMapToJsonb(mapRules)); + repl_repl[Anum_pg_ts_config_map_mapdicts - 1] = true; + + newtup = heap_modify_tuple(maptup, + RelationGetDescr(relMap), + repl_val, repl_null, repl_repl); + CatalogTupleUpdate(relMap, &newtup->t_self, newtup); + pfree(mapRules); } systable_endscan(scan); @@ -1408,24 +1527,21 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, /* * Insertion of new entries */ + for (i = 0; i < ntoken; i++) { - for (j = 0; j < ndict; j++) - { - Datum values[Natts_pg_ts_config_map]; - bool nulls[Natts_pg_ts_config_map]; + Datum values[Natts_pg_ts_config_map]; + bool nulls[Natts_pg_ts_config_map]; - memset(nulls, false, sizeof(nulls)); - values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId); - values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]); - values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1); - values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]); + memset(nulls, false, sizeof(nulls)); + values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId); + values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]); + values[Anum_pg_ts_config_map_mapdicts - 1] = JsonbPGetDatum(TSMapToJsonb(mapRules)); - tup = heap_form_tuple(relMap->rd_att, values, nulls); - CatalogTupleInsert(relMap, tup); + tup = heap_form_tuple(relMap->rd_att, values, nulls); + CatalogTupleInsert(relMap, tup); - heap_freetuple(tup); - } + heap_freetuple(tup); } } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index c1a83ca..476e8da 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4371,6 +4371,32 @@ _copyReassignOwnedStmt(const ReassignOwnedStmt *from) return newnode; } +static DictMapExprElem * +_copyDictMapExprElem(const DictMapExprElem *from) +{ + DictMapExprElem *newnode = makeNode(DictMapExprElem); + + COPY_NODE_FIELD(dictname); + COPY_NODE_FIELD(left); + COPY_NODE_FIELD(right); + COPY_SCALAR_FIELD(kind); + COPY_SCALAR_FIELD(oper); + COPY_SCALAR_FIELD(options); + + return newnode; +} + +static DictMapElem * +_copyDictMapElem(const DictMapElem *from) +{ + DictMapElem *newnode = makeNode(DictMapElem); + + COPY_NODE_FIELD(condition); + COPY_NODE_FIELD(command); + + return newnode; +} + static AlterTSDictionaryStmt * _copyAlterTSDictionaryStmt(const AlterTSDictionaryStmt *from) { @@ -5373,6 +5399,12 @@ copyObjectImpl(const void *from) case T_ReassignOwnedStmt: retval = _copyReassignOwnedStmt(from); break; + case T_DictMapExprElem: + retval = _copyDictMapExprElem(from); + break; + case T_DictMapElem: + retval = _copyDictMapElem(from); + break; case T_AlterTSDictionaryStmt: retval = _copyAlterTSDictionaryStmt(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 7a70001..4434566 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2177,6 +2177,28 @@ _equalReassignOwnedStmt(const ReassignOwnedStmt *a, const ReassignOwnedStmt *b) } static bool +_equalDictMapExprElem(const DictMapExprElem *a, const DictMapExprElem *b) +{ + COMPARE_NODE_FIELD(dictname); + COMPARE_NODE_FIELD(left); + COMPARE_NODE_FIELD(right); + COMPARE_SCALAR_FIELD(kind); + COMPARE_SCALAR_FIELD(oper); + COMPARE_SCALAR_FIELD(options); + + return true; +} + +static bool +_equalDictMapElem(const DictMapElem *a, const DictMapElem *b) +{ + COMPARE_NODE_FIELD(condition); + COMPARE_NODE_FIELD(command); + + return true; +} + +static bool _equalAlterTSDictionaryStmt(const AlterTSDictionaryStmt *a, const AlterTSDictionaryStmt *b) { COMPARE_NODE_FIELD(dictname); @@ -3517,6 +3539,12 @@ equal(const void *a, const void *b) case T_ReassignOwnedStmt: retval = _equalReassignOwnedStmt(a, b); break; + case T_DictMapExprElem: + retval = _equalDictMapExprElem(a, b); + break; + case T_DictMapElem: + retval = _equalDictMapElem(a, b); + break; case T_AlterTSDictionaryStmt: retval = _equalAlterTSDictionaryStmt(a, b); break; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 4c83a63..6a14890 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -52,6 +52,7 @@ #include "catalog/namespace.h" #include "catalog/pg_am.h" #include "catalog/pg_trigger.h" +#include "catalog/pg_ts_config_map.h" #include "commands/defrem.h" #include "commands/trigger.h" #include "nodes/makefuncs.h" @@ -241,6 +242,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); PartitionSpec *partspec; PartitionBoundSpec *partboundspec; RoleSpec *rolespec; + DictMapExprElem *dmapexpr; + DictMapElem *dmap; } %type stmt schema_stmt @@ -396,8 +399,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); relation_expr_list dostmt_opt_list transform_element_list transform_type_list TriggerTransitions TriggerReferencing - publication_name_list vacuum_relation_list opt_vacuum_relation_list + publication_name_list dictionary_map_list dictionary_map + dictionary_map_case %type group_by_list %type group_by_item empty_grouping_set rollup_clause cube_clause @@ -581,6 +585,15 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type partbound_datum PartitionRangeDatum %type partbound_datum_list range_datum_list +%type dictionary_map_clause_expr_dict_not dictionary_map_clause_expr_dict_flag +%type dictionary_map_clause dictionary_map_clause_expr_not + dictionary_map_command dictionary_map_command_expr_paren + dictionary_map_dict dictionary_map_clause_expr_or + dictionary_map_clause_expr_and dictionary_map_clause_expr_mapby_ext + dictionary_map_clause_expr_mapby + dictionary_map_clause_expr_paren dictionary_map_clause_expr_dict +%type dictionary_map_else dictionary_map_element + /* * Non-keyword token types. These are hard-wired into the "flex" lexer. * They must be listed first so that their numeric codes do not depend on @@ -648,7 +661,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); LEADING LEAKPROOF LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P LOCKED LOGGED - MAPPING MATCH MATERIALIZED MAXVALUE METHOD MINUTE_P MINVALUE MODE MONTH_P MOVE + MAP MAPPING MATCH MATERIALIZED MAXVALUE METHOD MINUTE_P MINVALUE MODE + MONTH_P MOVE NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NONE NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF @@ -671,7 +685,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); SAVEPOINT SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P - START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P + START STATEMENT STATISTICS STDIN STDOUT STOPWORD STORAGE STRICT_P STRIP_P SUBSCRIPTION SUBSTRING SYMMETRIC SYSID SYSTEM_P TABLE TABLES TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN @@ -10005,24 +10019,26 @@ AlterTSDictionaryStmt: ; AlterTSConfigurationStmt: - ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list any_with any_name_list + ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list any_with dictionary_map { AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); n->kind = ALTER_TSCONFIG_ADD_MAPPING; n->cfgname = $5; n->tokentype = $9; - n->dicts = $11; + n->dict_map = $11; + n->dicts = NULL; n->override = false; n->replace = false; $$ = (Node*)n; } - | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list any_with any_name_list + | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list any_with dictionary_map { AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); n->kind = ALTER_TSCONFIG_ALTER_MAPPING_FOR_TOKEN; n->cfgname = $5; n->tokentype = $9; - n->dicts = $11; + n->dict_map = $11; + n->dicts = NULL; n->override = true; n->replace = false; $$ = (Node*)n; @@ -10074,6 +10090,272 @@ any_with: WITH {} | WITH_LA {} ; +dictionary_map: + dictionary_map_case { $$ = $1; } + | any_name_list + { + DictMapElem *n = makeNode(DictMapElem); + n->condition = NULL; + n->command = NULL; + n->commandmaps = NULL; + n->dictnames = $1; + $$ = list_make1(n); + } + ; + +dictionary_map_case: + CASE dictionary_map_list END_P + { + $$ = $2; + } + | CASE dictionary_map_list dictionary_map_else END_P + { + $$ = lappend($2, $3); + } + ; + +dictionary_map_list: + dictionary_map_element { $$ = list_make1($1); } + | dictionary_map_list dictionary_map_element { $$ = lappend($1, $2); } + ; + +dictionary_map_else: + ELSE dictionary_map_command + { + DictMapElem *n = makeNode(DictMapElem); + n->command = $2; + n->commandmaps = NULL; + n->dictnames = NULL; + + n->condition = makeNode(DictMapExprElem); + n->condition->kind = DICT_MAP_CONST_TRUE; + n->condition->oper = 0; + n->condition->options = 0; + n->condition->left = NULL; + n->condition->right = NULL; + + $$ = n; + } + | ELSE dictionary_map_case + { + DictMapElem *n = makeNode(DictMapElem); + n->command = NULL; + n->commandmaps = $2; + n->dictnames = NULL; + + n->condition = makeNode(DictMapExprElem); + n->condition->kind = DICT_MAP_CONST_TRUE; + n->condition->oper = 0; + n->condition->options = 0; + n->condition->left = NULL; + n->condition->right = NULL; + + $$ = n; + } + ; + +dictionary_map_element: + WHEN dictionary_map_clause THEN dictionary_map_command + { + DictMapElem *n = makeNode(DictMapElem); + n->condition = $2; + n->command = $4; + n->commandmaps = NULL; + n->dictnames = NULL; + $$ = n; + } + | WHEN dictionary_map_clause THEN dictionary_map_case + { + DictMapElem *n = makeNode(DictMapElem); + n->condition = $2; + n->command = NULL; + n->commandmaps = $4; + n->dictnames = NULL; + $$ = n; + } + ; + +dictionary_map_clause: + dictionary_map_clause_expr_or { $$ = $1; } + ; + +dictionary_map_clause_expr_or: + dictionary_map_clause_expr_and OR dictionary_map_clause_expr_or + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_OR; + n->options = 0; + n->left = $1; + n->right = $3; + $$ = n; + } + | dictionary_map_clause_expr_and { $$ = $1; } + ; + +dictionary_map_clause_expr_and: + dictionary_map_clause_expr_not AND dictionary_map_clause_expr_and + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_AND; + n->options = 0; + n->left = $1; + n->right = $3; + $$ = n; + } + | dictionary_map_clause_expr_not { $$ = $1; } + ; + +dictionary_map_clause_expr_mapby_ext: + dictionary_map_clause_expr_dict MAP BY dictionary_map_clause_expr_mapby_ext + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_MAPBY; + n->options = 0; + n->left = $1; + n->right = $4; + $$ = n; + } + | dictionary_map_clause_expr_dict { $$ = $1; } + ; + +dictionary_map_clause_expr_mapby: + dictionary_map_clause_expr_dict MAP BY dictionary_map_clause_expr_mapby_ext + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_MAPBY; + n->options = 0; + n->left = $1; + n->right = $4; + $$ = n; + } + ; + +dictionary_map_clause_expr_not: + NOT dictionary_map_clause_expr_not + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_NOT; + n->options = 0; + n->left = NULL; + n->right = $2; + $$ = n; + } + | dictionary_map_clause_expr_paren { $$ = $1; } + ; + +dictionary_map_clause_expr_paren: + '(' dictionary_map_clause_expr_or ')' { $$ = $2; } + | '(' dictionary_map_clause_expr_mapby ')' IS dictionary_map_clause_expr_dict_not dictionary_map_clause_expr_dict_flag + { + $$ = $2; + $$->options = $5 | $6; + } + | '(' dictionary_map_clause_expr_mapby ')' + { + $$ = $2; + $$->options = DICTMAP_OPT_NOT | DICTMAP_OPT_IS_NULL | DICTMAP_OPT_IS_STOP; + } + | dictionary_map_clause_expr_dict { $$ = $1; } + ; + +dictionary_map_clause_expr_dict: + any_name + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERAND; + n->dictname = $1; + n->oper = 0; + n->options = DICTMAP_OPT_NOT | DICTMAP_OPT_IS_NULL | DICTMAP_OPT_IS_STOP; + n->left = n->right = NULL; + $$ = n; + } + | any_name IS dictionary_map_clause_expr_dict_not dictionary_map_clause_expr_dict_flag + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERAND; + n->dictname = $1; + n->oper = 0; + n->options = $3 | $4; + n->left = n->right = NULL; + $$ = n; + } + ; + +dictionary_map_clause_expr_dict_not: + NOT { $$ = DICTMAP_OPT_NOT; } + | /* EMPTY */ { $$ = 0; } + ; + +dictionary_map_clause_expr_dict_flag: + NULL_P { $$ = DICTMAP_OPT_IS_NULL; } + | STOPWORD { $$ = DICTMAP_OPT_IS_STOP; } + ; + +dictionary_map_command: + dictionary_map_command_expr_paren { $$ = $1; } + | dictionary_map_command_expr_paren UNION dictionary_map_command_expr_paren + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_UNION; + n->options = 0; + n->left = $1; + n->right = $3; + $$ = n; + } + | dictionary_map_command_expr_paren EXCEPT dictionary_map_command_expr_paren + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_EXCEPT; + n->options = 0; + n->left = $1; + n->right = $3; + $$ = n; + } + | dictionary_map_command_expr_paren INTERSECT dictionary_map_command_expr_paren + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_INTERSECT; + n->options = 0; + n->left = $1; + n->right = $3; + $$ = n; + } + | dictionary_map_command_expr_paren MAP BY dictionary_map_command_expr_paren + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERATOR; + n->oper = DICTMAP_OP_MAPBY; + n->options = 0; + n->left = $1; + n->right = $4; + $$ = n; + } + ; + +dictionary_map_command_expr_paren: + '(' dictionary_map_command ')' { $$ = $2; } + | dictionary_map_dict { $$ = $1; } + ; + +dictionary_map_dict: + any_name + { + DictMapExprElem *n = makeNode(DictMapExprElem); + n->kind = DICT_MAP_OPERAND; + n->dictname = $1; + n->options = 0; + n->left = n->right = NULL; + $$ = n; + } + ; /***************************************************************************** * @@ -14728,6 +15010,7 @@ unreserved_keyword: | LOCK_P | LOCKED | LOGGED + | MAP | MAPPING | MATCH | MATERIALIZED @@ -14831,6 +15114,7 @@ unreserved_keyword: | STATISTICS | STDIN | STDOUT + | STOPWORD | STORAGE | STRICT_P | STRIP_P diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile index 34fe4c5..24e47f2 100644 --- a/src/backend/tsearch/Makefile +++ b/src/backend/tsearch/Makefile @@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES)) OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \ dict_simple.o dict_synonym.o dict_thesaurus.o \ dict_ispell.o regis.o spell.o \ - to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o + to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o ts_configmap.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/tsearch/ts_configmap.c b/src/backend/tsearch/ts_configmap.c new file mode 100644 index 0000000..a7d9e0c --- /dev/null +++ b/src/backend/tsearch/ts_configmap.c @@ -0,0 +1,976 @@ +/*------------------------------------------------------------------------- + * + * ts_configmap.c + * internal represtation of text search configuration and utilities for it + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/tsearch/ts_confimap.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "catalog/indexing.h" +#include "catalog/pg_ts_dict.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_configmap.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" + +/* + * Used during the parsing of TSMapRuleList from JSONB into internal + * datastructures. + */ +typedef enum TSMapRuleParseState +{ + TSMRPS_BEGINING, + TSMRPS_IN_CASES_ARRAY, + TSMRPS_IN_CASE, + TSMRPS_IN_CONDITION, + TSMRPS_IN_COMMAND, + TSMRPS_IN_EXPRESSION +} TSMapRuleParseState; + +typedef enum TSMapRuleParseNodeType +{ + TSMRPT_UNKNOWN, + TSMRPT_NUMERIC, + TSMRPT_EXPRESSION, + TSMRPT_RULE_LIST, + TSMRPT_RULE, + TSMRPT_COMMAND, + TSMRPT_CONDITION, + TSMRPT_BOOL +} TSMapRuleParseNodeType; + +typedef struct TSMapParseNode +{ + TSMapRuleParseNodeType type; + union + { + int num_val; + bool bool_val; + TSMapRule *rule_val; + TSMapCommand *command_val; + TSMapRuleList *rule_list_val; + TSMapCondition *condition_val; + TSMapExpression *expression_val; + }; +} TSMapParseNode; + +static JsonbValue *TSMapToJsonbValue(TSMapRuleList *rules, JsonbParseState *jsonb_state); +static TSMapParseNode *JsonbToTSMapParse(JsonbContainer *root, TSMapRuleParseState *parse_state); + +static void +TSMapPrintDictName(Oid dictId, StringInfo result) +{ + Relation maprel; + Relation mapidx; + ScanKeyData mapskey; + SysScanDesc mapscan; + HeapTuple maptup; + Form_pg_ts_dict dict; + + maprel = heap_open(TSDictionaryRelationId, AccessShareLock); + mapidx = index_open(TSDictionaryOidIndexId, AccessShareLock); + + ScanKeyInit(&mapskey, ObjectIdAttributeNumber, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(dictId)); + mapscan = systable_beginscan_ordered(maprel, mapidx, + NULL, 1, &mapskey); + + maptup = systable_getnext_ordered(mapscan, ForwardScanDirection); + dict = (Form_pg_ts_dict) GETSTRUCT(maptup); + appendStringInfoString(result, dict->dictname.data); + + systable_endscan_ordered(mapscan); + index_close(mapidx, AccessShareLock); + heap_close(maprel, AccessShareLock); +} + +static void +TSMapExpressionPrint(TSMapExpression *expression, StringInfo result) +{ + if (expression->dictionary == InvalidOid && expression->options != 0) + appendStringInfoChar(result, '('); + + if (expression->left) + { + if (expression->left->operator != 0 && expression->left->operator < expression->operator) + appendStringInfoChar(result, '('); + + TSMapExpressionPrint(expression->left, result); + + if (expression->left->operator != 0 && expression->left->operator < expression->operator) + appendStringInfoChar(result, ')'); + } + + switch (expression->operator) + { + case DICTMAP_OP_OR: + appendStringInfoString(result, " OR "); + break; + case DICTMAP_OP_AND: + appendStringInfoString(result, " AND "); + break; + case DICTMAP_OP_NOT: + appendStringInfoString(result, " NOT "); + break; + case DICTMAP_OP_UNION: + appendStringInfoString(result, " UNION "); + break; + case DICTMAP_OP_EXCEPT: + appendStringInfoString(result, " EXCEPT "); + break; + case DICTMAP_OP_INTERSECT: + appendStringInfoString(result, " INTERSECT "); + break; + case DICTMAP_OP_MAPBY: + appendStringInfoString(result, " MAP BY "); + break; + } + + if (expression->right) + { + if (expression->right->operator != 0 && expression->right->operator < expression->operator) + appendStringInfoChar(result, '('); + + TSMapExpressionPrint(expression->right, result); + + if (expression->right->operator != 0 && expression->right->operator < expression->operator) + appendStringInfoChar(result, ')'); + } + + if (expression->dictionary == InvalidOid && expression->options != 0) + appendStringInfoChar(result, ')'); + + if (expression->dictionary != InvalidOid || expression->options != 0) + { + if (expression->dictionary != InvalidOid) + TSMapPrintDictName(expression->dictionary, result); + if (expression->options != (DICTMAP_OPT_NOT | DICTMAP_OPT_IS_NULL | DICTMAP_OPT_IS_STOP)) + { + if (expression->options != 0) + appendStringInfoString(result, " IS "); + if (expression->options & DICTMAP_OPT_NOT) + appendStringInfoString(result, "NOT "); + if (expression->options & DICTMAP_OPT_IS_NULL) + appendStringInfoString(result, "NULL "); + if (expression->options & DICTMAP_OPT_IS_STOP) + appendStringInfoString(result, "STOPWORD "); + } + } +} + +void +TSMapPrintRule(TSMapRule *rule, StringInfo result, int depth) +{ + int i; + + if (rule->dictionary != InvalidOid) + { + TSMapPrintDictName(rule->dictionary, result); + } + else if (rule->condition.expression->is_true) + { + for (i = 0; i < depth; i++) + appendStringInfoChar(result, '\t'); + appendStringInfoString(result, "ELSE "); + } + else + { + for (i = 0; i < depth; i++) + appendStringInfoChar(result, '\t'); + appendStringInfoString(result, "WHEN "); + TSMapExpressionPrint(rule->condition.expression, result); + appendStringInfoString(result, " THEN\n"); + for (i = 0; i < depth + 1; i++) + appendStringInfoString(result, "\t"); + } + + if (rule->command.is_expression) + { + TSMapExpressionPrint(rule->command.expression, result); + } + else if (rule->dictionary == InvalidOid) + { + TSMapPrintRuleList(rule->command.ruleList, result, depth + 1); + } +} + +void +TSMapPrintRuleList(TSMapRuleList *rules, StringInfo result, int depth) +{ + int i; + + for (i = 0; i < rules->count; i++) + { + if (rules->data[i].dictionary != InvalidOid) /* Comma-separated + * configuration syntax */ + { + if (i > 0) + appendStringInfoString(result, ", "); + TSMapPrintDictName(rules->data[i].dictionary, result); + } + else + { + if (i == 0) + { + int j; + + for (j = 0; j < depth; j++) + appendStringInfoChar(result, '\t'); + appendStringInfoString(result, "CASE\n"); + } + else + appendStringInfoChar(result, '\n'); + TSMapPrintRule(&rules->data[i], result, depth + 1); + } + } + + if (rules->data[0].dictionary == InvalidOid) + { + appendStringInfoChar(result, '\n'); + for (i = 0; i < depth; i++) + appendStringInfoChar(result, '\t'); + appendStringInfoString(result, "END"); + } +} + +Datum +dictionary_map_to_text(PG_FUNCTION_ARGS) +{ + Oid cfgOid = PG_GETARG_OID(0); + int32 tokentype = PG_GETARG_INT32(1); + StringInfo rawResult; + text *result = NULL; + TSConfigCacheEntry *cacheEntry; + + cacheEntry = lookup_ts_config_cache(cfgOid); + rawResult = makeStringInfo(); + initStringInfo(rawResult); + + if (cacheEntry->lenmap > tokentype && cacheEntry->map[tokentype]->count > 0) + { + TSMapRuleList *rules = cacheEntry->map[tokentype]; + + TSMapPrintRuleList(rules, rawResult, 0); + } + + if (rawResult) + { + result = cstring_to_text(rawResult->data); + pfree(rawResult); + } + + PG_RETURN_TEXT_P(result); +} + +static JsonbValue * +TSIntToJsonbValue(int int_value) +{ + char buffer[16]; + JsonbValue *value = palloc0(sizeof(JsonbValue)); + + memset(buffer, 0, sizeof(char) * 16); + + pg_ltoa(int_value, buffer); + value->type = jbvNumeric; + value->val.numeric = DatumGetNumeric(DirectFunctionCall3( + numeric_in, + CStringGetDatum(buffer), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1) + )); + return value; + +} + +static JsonbValue * +TSExpressionToJsonb(TSMapExpression *expression, JsonbParseState *jsonb_state) +{ + if (expression == NULL) + return NULL; + if (expression->dictionary != InvalidOid) + { + JsonbValue key; + JsonbValue *value = NULL; + + pushJsonbValue(&jsonb_state, WJB_BEGIN_OBJECT, NULL); + + key.type = jbvString; + key.val.string.len = strlen("options"); + key.val.string.val = "options"; + value = TSIntToJsonbValue(expression->options); + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + key.type = jbvString; + key.val.string.len = strlen("dictionary"); + key.val.string.val = "dictionary"; + value = TSIntToJsonbValue(expression->dictionary); + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + return pushJsonbValue(&jsonb_state, WJB_END_OBJECT, NULL); + } + else if (expression->is_true) + { + JsonbValue *value = palloc0(sizeof(JsonbValue)); + + value->type = jbvBool; + value->val.boolean = true; + return value; + } + else + { + JsonbValue key; + JsonbValue *value = NULL; + + pushJsonbValue(&jsonb_state, WJB_BEGIN_OBJECT, NULL); + + key.type = jbvString; + key.val.string.len = strlen("operator"); + key.val.string.val = "operator"; + value = TSIntToJsonbValue(expression->operator); + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + key.type = jbvString; + key.val.string.len = strlen("options"); + key.val.string.val = "options"; + value = TSIntToJsonbValue(expression->options); + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + key.type = jbvString; + key.val.string.len = strlen("left"); + key.val.string.val = "left"; + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + value = TSExpressionToJsonb(expression->left, jsonb_state); + if (value && IsAJsonbScalar(value)) + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + key.type = jbvString; + key.val.string.len = strlen("right"); + key.val.string.val = "right"; + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + value = TSExpressionToJsonb(expression->right, jsonb_state); + if (value && IsAJsonbScalar(value)) + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + return pushJsonbValue(&jsonb_state, WJB_END_OBJECT, NULL); + } +} + +static JsonbValue * +TSRuleToJsonbValue(TSMapRule *rule, JsonbParseState *jsonb_state) +{ + if (rule->dictionary != InvalidOid) + { + return TSIntToJsonbValue(rule->dictionary); + } + else + { + JsonbValue key; + JsonbValue *value = NULL; + + pushJsonbValue(&jsonb_state, WJB_BEGIN_OBJECT, NULL); + + key.type = jbvString; + key.val.string.len = strlen("condition"); + key.val.string.val = "condition"; + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + value = TSExpressionToJsonb(rule->condition.expression, jsonb_state); + + if (IsAJsonbScalar(value)) + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + key.type = jbvString; + key.val.string.len = strlen("command"); + key.val.string.val = "command"; + + pushJsonbValue(&jsonb_state, WJB_KEY, &key); + if (rule->command.is_expression) + value = TSExpressionToJsonb(rule->command.expression, jsonb_state); + else + value = TSMapToJsonbValue(rule->command.ruleList, jsonb_state); + + if (IsAJsonbScalar(value)) + pushJsonbValue(&jsonb_state, WJB_VALUE, value); + + return pushJsonbValue(&jsonb_state, WJB_END_OBJECT, NULL); + } +} + +static JsonbValue * +TSMapToJsonbValue(TSMapRuleList *rules, JsonbParseState *jsonb_state) +{ + JsonbValue *out; + int i; + + pushJsonbValue(&jsonb_state, WJB_BEGIN_ARRAY, NULL); + for (i = 0; i < rules->count; i++) + { + JsonbValue *value = TSRuleToJsonbValue(&rules->data[i], jsonb_state); + + if (IsAJsonbScalar(value)) + pushJsonbValue(&jsonb_state, WJB_ELEM, value); + } + out = pushJsonbValue(&jsonb_state, WJB_END_ARRAY, NULL); + return out; +} + +Jsonb * +TSMapToJsonb(TSMapRuleList *rules) +{ + JsonbParseState *jsonb_state = NULL; + JsonbValue *out; + Jsonb *result; + + out = TSMapToJsonbValue(rules, jsonb_state); + + result = JsonbValueToJsonb(out); + return result; +} + +static inline TSMapExpression * +JsonbToTSMapGetExpression(TSMapParseNode *node) +{ + TSMapExpression *result; + + if (node->type == TSMRPT_NUMERIC) + { + result = palloc0(sizeof(TSMapExpression)); + result->dictionary = node->num_val; + } + else if (node->type == TSMRPT_BOOL) + { + result = palloc0(sizeof(TSMapExpression)); + result->is_true = node->bool_val; + } + else + result = node->expression_val; + + pfree(node); + + return result; +} + +static TSMapParseNode * +JsonbToTSMapParseObject(JsonbValue *value, TSMapRuleParseState *parse_state) +{ + TSMapParseNode *result = palloc0(sizeof(TSMapParseNode)); + char *str; + + switch (value->type) + { + case jbvNumeric: + result->type = TSMRPT_NUMERIC; + str = DatumGetCString( + DirectFunctionCall1(numeric_out, NumericGetDatum(value->val.numeric))); + result->num_val = pg_atoi(str, sizeof(result->num_val), 0); + break; + case jbvArray: + Assert(*parse_state == TSMRPS_IN_COMMAND); + case jbvBinary: + result = JsonbToTSMapParse(value->val.binary.data, parse_state); + break; + case jbvBool: + result->type = TSMRPT_BOOL; + result->bool_val = value->val.boolean; + break; + case jbvObject: + case jbvNull: + case jbvString: + break; + } + return result; +} + +static TSMapParseNode * +JsonbToTSMapParse(JsonbContainer *root, TSMapRuleParseState *parse_state) +{ + JsonbIteratorToken r; + JsonbValue val; + JsonbIterator *it; + TSMapParseNode *result; + TSMapParseNode *nested_result; + char *key; + TSMapRuleList *rule_list = NULL; + + it = JsonbIteratorInit(root); + result = palloc0(sizeof(TSMapParseNode)); + result->type = TSMRPT_UNKNOWN; + while ((r = JsonbIteratorNext(&it, &val, true)) != WJB_DONE) + { + switch (r) + { + case WJB_BEGIN_ARRAY: + if (*parse_state == TSMRPS_BEGINING || *parse_state == TSMRPS_IN_EXPRESSION) + { + *parse_state = TSMRPS_IN_CASES_ARRAY; + rule_list = palloc0(sizeof(TSMapRuleList)); + } + break; + case WJB_KEY: + key = palloc0(sizeof(char) * (val.val.string.len + 1)); + memcpy(key, val.val.string.val, sizeof(char) * val.val.string.len); + + r = JsonbIteratorNext(&it, &val, true); + if (*parse_state == TSMRPS_IN_CASE) + { + if (strcmp(key, "command") == 0) + *parse_state = TSMRPS_IN_EXPRESSION; + else if (strcmp(key, "condition") == 0) + *parse_state = TSMRPS_IN_EXPRESSION; + } + + nested_result = JsonbToTSMapParseObject(&val, parse_state); + + if (result->type == TSMRPT_RULE) + { + if (strcmp(key, "command") == 0) + { + result->rule_val->command.is_expression = nested_result->type == TSMRPT_EXPRESSION || + nested_result->type == TSMRPT_NUMERIC; + + if (result->rule_val->command.is_expression) + result->rule_val->command.expression = JsonbToTSMapGetExpression(nested_result); + else + result->rule_val->command.ruleList = nested_result->rule_list_val; + } + else if (strcmp(key, "condition") == 0) + { + result->rule_val->condition.expression = JsonbToTSMapGetExpression(nested_result); + } + *parse_state = TSMRPS_IN_CASE; + } + else if (result->type == TSMRPT_COMMAND) + { + result->command_val->is_expression = nested_result->type == TSMRPT_EXPRESSION; + if (result->command_val->is_expression) + result->command_val->expression = JsonbToTSMapGetExpression(nested_result); + else + result->command_val->ruleList = nested_result->rule_list_val; + *parse_state = TSMRPS_IN_COMMAND; + } + else if (result->type == TSMRPT_CONDITION) + { + result->condition_val->expression = JsonbToTSMapGetExpression(nested_result); + *parse_state = TSMRPS_IN_COMMAND; + } + else if (result->type == TSMRPT_EXPRESSION) + { + if (strcmp(key, "left") == 0) + result->expression_val->left = JsonbToTSMapGetExpression(nested_result); + else if (strcmp(key, "right") == 0) + result->expression_val->right = JsonbToTSMapGetExpression(nested_result); + else if (strcmp(key, "operator") == 0) + result->expression_val->operator = nested_result->num_val; + else if (strcmp(key, "options") == 0) + result->expression_val->options = nested_result->num_val; + else if (strcmp(key, "dictionary") == 0) + result->expression_val->dictionary = nested_result->num_val; + } + + break; + case WJB_BEGIN_OBJECT: + if (*parse_state == TSMRPS_IN_CASES_ARRAY) + { + *parse_state = TSMRPS_IN_CASE; + result->type = TSMRPT_RULE; + result->rule_val = palloc0(sizeof(TSMapRule)); + } + else if (*parse_state == TSMRPS_IN_COMMAND) + { + result->type = TSMRPT_COMMAND; + result->command_val = palloc0(sizeof(TSMapCommand)); + } + else if (*parse_state == TSMRPS_IN_CONDITION) + { + result->type = TSMRPT_CONDITION; + result->condition_val = palloc0(sizeof(TSMapCondition)); + } + else if (*parse_state == TSMRPS_IN_EXPRESSION) + { + result->type = TSMRPT_EXPRESSION; + result->expression_val = palloc0(sizeof(TSMapExpression)); + } + break; + case WJB_END_OBJECT: + if (*parse_state == TSMRPS_IN_CASE) + *parse_state = TSMRPS_IN_CASES_ARRAY; + else if (*parse_state == TSMRPS_IN_CONDITION || *parse_state == TSMRPS_IN_COMMAND) + *parse_state = TSMRPS_IN_CASE; + if (rule_list && result->type == TSMRPT_RULE) + { + rule_list->count++; + if (rule_list->data) + rule_list->data = repalloc(rule_list->data, sizeof(TSMapRule) * rule_list->count); + else + rule_list->data = palloc0(sizeof(TSMapRule) * rule_list->count); + memcpy(rule_list->data + rule_list->count - 1, result->rule_val, sizeof(TSMapRule)); + } + else + return result; + case WJB_END_ARRAY: + break; + default: + nested_result = JsonbToTSMapParseObject(&val, parse_state); + if (nested_result->type == TSMRPT_NUMERIC) + { + if (*parse_state == TSMRPS_IN_CASES_ARRAY) + { + /* + * Add dictionary Oid into array (comma-separated + * configuration) + */ + rule_list->count++; + if (rule_list->data) + rule_list->data = repalloc(rule_list->data, sizeof(TSMapRule) * rule_list->count); + else + rule_list->data = palloc0(sizeof(TSMapRule) * rule_list->count); + memset(rule_list->data + rule_list->count - 1, 0, sizeof(TSMapRule)); + rule_list->data[rule_list->count - 1].dictionary = nested_result->num_val; + } + else if (result->type == TSMRPT_UNKNOWN && *parse_state == TSMRPS_IN_EXPRESSION) + { + result->type = TSMRPT_EXPRESSION; + result->expression_val = palloc0(sizeof(TSMapExpression)); + } + if (result->type == TSMRPT_EXPRESSION) + result->expression_val->dictionary = nested_result->num_val; + } + else if (nested_result->type == TSMRPT_RULE && rule_list) + { + rule_list->count++; + if (rule_list->data) + rule_list->data = repalloc(rule_list->data, sizeof(TSMapRule) * rule_list->count); + else + rule_list->data = palloc0(sizeof(TSMapRule) * rule_list->count); + memcpy(rule_list->data + rule_list->count - 1, nested_result->rule_val, sizeof(TSMapRule)); + } + break; + } + } + result->type = TSMRPT_RULE_LIST; + result->rule_list_val = rule_list; + return result; +} + +TSMapRuleList * +JsonbToTSMap(Jsonb *json) +{ + JsonbContainer *root = &json->root; + TSMapRuleList *result = palloc0(sizeof(TSMapRuleList)); + TSMapRuleParseState parse_state = TSMRPS_BEGINING; + TSMapParseNode *parsing_result; + + parsing_result = JsonbToTSMapParse(root, &parse_state); + + Assert(parsing_result->type == TSMRPT_RULE_LIST); + result = parsing_result->rule_list_val; + pfree(parsing_result); + + return result; +} + +static void +TSMapReplaceDictionaryParseExpression(TSMapExpression *expr, Oid oldDict, Oid newDict) +{ + if (expr->left) + TSMapReplaceDictionaryParseExpression(expr->left, oldDict, newDict); + if (expr->right) + TSMapReplaceDictionaryParseExpression(expr->right, oldDict, newDict); + + if (expr->dictionary == oldDict) + expr->dictionary = newDict; +} + +static void +TSMapReplaceDictionaryParseMap(TSMapRule *rule, Oid oldDict, Oid newDict) +{ + if (rule->dictionary != InvalidOid) + { + Oid *result; + + result = palloc0(sizeof(Oid) * 2); + result[0] = rule->dictionary; + result[1] = InvalidOid; + } + else + { + TSMapReplaceDictionaryParseExpression(rule->condition.expression, oldDict, newDict); + + if (rule->command.is_expression) + TSMapReplaceDictionaryParseExpression(rule->command.expression, oldDict, newDict); + else + TSMapReplaceDictionary(rule->command.ruleList, oldDict, newDict); + } +} + +void +TSMapReplaceDictionary(TSMapRuleList *rules, Oid oldDict, Oid newDict) +{ + int i; + + for (i = 0; i < rules->count; i++) + TSMapReplaceDictionaryParseMap(&rules->data[i], oldDict, newDict); +} + +static Oid * +TSMapGetDictionariesParseExpression(TSMapExpression *expr) +{ + Oid *left_res; + Oid *right_res; + Oid *result; + + left_res = right_res = NULL; + + if (expr->left && expr->right) + { + Oid *ptr; + int count_l; + int count_r; + + left_res = TSMapGetDictionariesParseExpression(expr->left); + right_res = TSMapGetDictionariesParseExpression(expr->right); + + for (ptr = left_res, count_l = 0; *ptr != InvalidOid; ptr++) + count_l++; + for (ptr = right_res, count_r = 0; *ptr != InvalidOid; ptr++) + count_r++; + + result = palloc0(sizeof(Oid) * (count_l + count_r + 1)); + memcpy(result, left_res, sizeof(Oid) * count_l); + memcpy(result + count_l, right_res, sizeof(Oid) * count_r); + result[count_l + count_r] = InvalidOid; + + pfree(left_res); + pfree(right_res); + } + else + { + result = palloc0(sizeof(Oid) * 2); + result[0] = expr->dictionary; + result[1] = InvalidOid; + } + + return result; +} + +static Oid * +TSMapGetDictionariesParseRule(TSMapRule *rule) +{ + Oid *result; + + if (rule->dictionary) + { + result = palloc0(sizeof(Oid) * 2); + result[0] = rule->dictionary; + result[1] = InvalidOid; + } + else + { + if (rule->command.is_expression) + result = TSMapGetDictionariesParseExpression(rule->command.expression); + else + result = TSMapGetDictionariesList(rule->command.ruleList); + } + return result; +} + +Oid * +TSMapGetDictionariesList(TSMapRuleList *rules) +{ + int i; + Oid **results_arr; + int *sizes; + Oid *result; + int size; + int offset; + + results_arr = palloc0(sizeof(Oid *) * rules->count); + sizes = palloc0(sizeof(int) * rules->count); + size = 0; + for (i = 0; i < rules->count; i++) + { + int count; + Oid *ptr; + + results_arr[i] = TSMapGetDictionariesParseRule(&rules->data[i]); + + for (count = 0, ptr = results_arr[i]; *ptr != InvalidOid; ptr++) + count++; + + sizes[i] = count; + size += count; + } + + result = palloc(sizeof(Oid) * (size + 1)); + offset = 0; + for (i = 0; i < rules->count; i++) + { + memcpy(result + offset, results_arr[i], sizeof(Oid) * sizes[i]); + offset += sizes[i]; + pfree(results_arr[i]); + } + result[offset] = InvalidOid; + + pfree(results_arr); + pfree(sizes); + + return result; +} + +ListDictionary * +TSMapGetListDictionary(TSMapRuleList *rules) +{ + ListDictionary *result = palloc0(sizeof(ListDictionary)); + Oid *oids = TSMapGetDictionariesList(rules); + int i; + int count; + Oid *ptr; + + ptr = oids; + count = 0; + while (*ptr != InvalidOid) + { + count++; + ptr++; + } + + result->len = count; + result->dictIds = palloc0(sizeof(Oid) * result->len); + ptr = oids; + i = 0; + while (*ptr != InvalidOid) + result->dictIds[i++] = *(ptr++); + + return result; +} + +static TSMapExpression * +TSMapExpressionMoveToMemoryContext(TSMapExpression *expr, MemoryContext context) +{ + TSMapExpression *result; + + if (expr == NULL) + return NULL; + result = MemoryContextAlloc(context, sizeof(TSMapExpression)); + memset(result, 0, sizeof(TSMapExpression)); + if (expr->dictionary != InvalidOid || expr->is_true) + { + result->dictionary = expr->dictionary; + result->is_true = expr->is_true; + result->options = expr->options; + result->left = result->right = NULL; + result->operator = 0; + } + else + { + result->left = TSMapExpressionMoveToMemoryContext(expr->left, context); + result->right = TSMapExpressionMoveToMemoryContext(expr->right, context); + result->operator = expr->operator; + result->options = expr->options; + result->dictionary = InvalidOid; + result->is_true = false; + } + return result; +} + +static TSMapRule +TSMapRuleMoveToMemoryContext(TSMapRule *rule, MemoryContext context) +{ + TSMapRule result; + + memset(&result, 0, sizeof(TSMapRule)); + + if (rule->dictionary) + { + result.dictionary = rule->dictionary; + } + else + { + result.condition.expression = TSMapExpressionMoveToMemoryContext(rule->condition.expression, context); + + result.command.is_expression = rule->command.is_expression; + if (rule->command.is_expression) + result.command.expression = TSMapExpressionMoveToMemoryContext(rule->command.expression, context); + else + result.command.ruleList = TSMapMoveToMemoryContext(rule->command.ruleList, context); + } + + return result; +} + +TSMapRuleList * +TSMapMoveToMemoryContext(TSMapRuleList *rules, MemoryContext context) +{ + int i; + TSMapRuleList *result = MemoryContextAlloc(context, sizeof(TSMapRuleList)); + + memset(result, 0, sizeof(TSMapRuleList)); + + result->count = rules->count; + result->data = MemoryContextAlloc(context, sizeof(TSMapRule) * result->count); + + for (i = 0; i < result->count; i++) + result->data[i] = TSMapRuleMoveToMemoryContext(&rules->data[i], context); + + return result; +} + +static void +TSMapExpressionFree(TSMapExpression *expression) +{ + if (expression->left) + TSMapExpressionFree(expression->left); + if (expression->right) + TSMapExpressionFree(expression->right); + pfree(expression); +} + +static void +TSMapRuleFree(TSMapRule rule) +{ + if (rule.dictionary == InvalidOid) + { + if (rule.command.is_expression) + TSMapExpressionFree(rule.command.expression); + else + TSMapFree(rule.command.ruleList); + + TSMapExpressionFree(rule.condition.expression); + } +} + +void +TSMapFree(TSMapRuleList * rules) +{ + int i; + + for (i = 0; i < rules->count; i++) + TSMapRuleFree(rules->data[i]); + pfree(rules->data); + pfree(rules); +} diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c index ad5dddf..c71658b 100644 --- a/src/backend/tsearch/ts_parse.c +++ b/src/backend/tsearch/ts_parse.c @@ -16,6 +16,10 @@ #include "tsearch/ts_cache.h" #include "tsearch/ts_utils.h" +#include "tsearch/ts_configmap.h" +#include "utils/builtins.h" + +#include "funcapi.h" #define IGNORE_LONGLEXEME 1 @@ -28,328 +32,1296 @@ typedef struct ParsedLex int type; char *lemm; int lenlemm; + int maplen; + bool *accepted; + bool *rejected; + bool *notFinished; + bool *holdAccepted; struct ParsedLex *next; + TSMapRule *relatedRule; } ParsedLex; -typedef struct ListParsedLex -{ - ParsedLex *head; - ParsedLex *tail; -} ListParsedLex; +typedef struct ListParsedLex +{ + ParsedLex *head; + ParsedLex *tail; +} ListParsedLex; + +typedef struct DictState +{ + Oid relatedDictionary; + DictSubState subState; + ListParsedLex acceptedTokens; /* Tokens which are processed and + * accepted, used in last returned result + * by the dictionary */ + ListParsedLex intermediateTokens; /* Tokens which are not accepted, but + * were processed by thesaurus-like + * dictionry */ + bool storeToAccepted; /* Should current token be appended to + * accepted or intermediate tokens */ + bool processed; /* Is the dictionary take control during + * current token processing */ + TSLexeme *tmpResult; /* Last result retued by thesaurus-like + * dictionary, if dictionary still waiting for + * more lexemes */ +} DictState; + +typedef struct DictStateList +{ + int listLength; + DictState *states; +} DictStateList; + +typedef struct LexemesBufferEntry +{ + Oid dictId; + ParsedLex *token; + TSLexeme *data; +} LexemesBufferEntry; + +typedef struct LexemesBuffer +{ + int size; + LexemesBufferEntry *data; +} LexemesBuffer; + +typedef struct ResultStorage +{ + TSLexeme *lexemes; /* Processed lexemes, which is not yet + * accepted */ + TSLexeme *accepted; +} ResultStorage; + +typedef struct LexizeData +{ + TSConfigCacheEntry *cfg; + DictSubState dictState; + DictStateList dslist; + ListParsedLex towork; /* current list to work */ + ListParsedLex waste; /* list of lexemes that already lexized */ + LexemesBuffer buffer; + ResultStorage delayedResults; + Oid skipDictionary; +} LexizeData; + +typedef struct TSDebugContext +{ + TSConfigCacheEntry *cfg; + TSParserCacheEntry *prsobj; + LexDescr *tokenTypes; + void *prsdata; + LexizeData ldata; + int tokentype; /* Last token tokentype */ + TSLexeme *savedLexemes; /* Last token lexemes stored for ts_debug + * output */ + ParsedLex *leftTokens; /* Corresponded ParsedLex */ + TSMapRule *rule; /* Rule which produced output */ +} TSDebugContext; + +static TSLexeme *LexizeExecMapBy(LexizeData *ld, ParsedLex *token, TSMapExpression *left, TSMapExpression *right); + +static void +LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg) +{ + ld->cfg = cfg; + ld->skipDictionary = InvalidOid; + ld->towork.head = ld->towork.tail = NULL; + ld->waste.head = ld->waste.tail = NULL; + ld->dslist.listLength = 0; + ld->dslist.states = NULL; + ld->buffer.size = 0; + ld->buffer.data = NULL; + ld->delayedResults.lexemes = NULL; + ld->delayedResults.accepted = NULL; +} + +static void +LPLAddTail(ListParsedLex *list, ParsedLex *newpl) +{ + if (list->tail) + { + list->tail->next = newpl; + list->tail = newpl; + } + else + list->head = list->tail = newpl; + newpl->next = NULL; +} + +static void +LPLAddTailCopy(ListParsedLex *list, ParsedLex *newpl) +{ + ParsedLex *copy = palloc0(sizeof(ParsedLex)); + + copy->lenlemm = newpl->lenlemm; + copy->type = newpl->type; + copy->lemm = newpl->lemm; + copy->relatedRule = newpl->relatedRule; + copy->next = NULL; + + if (list->tail) + { + list->tail->next = copy; + list->tail = copy; + } + else + list->head = list->tail = copy; +} + +static ParsedLex * +LPLRemoveHead(ListParsedLex *list) +{ + ParsedLex *res = list->head; + + if (list->head) + list->head = list->head->next; + + if (list->head == NULL) + list->tail = NULL; + + return res; +} + +static void +LPLClear(ListParsedLex *list) +{ + ParsedLex *tmp, + *ptr = list->head; + + while (ptr) + { + tmp = ptr->next; + pfree(ptr); + ptr = tmp; + } + + list->head = list->tail = NULL; +} + +static void +LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) +{ + ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); + newpl->type = type; + newpl->lemm = lemm; + newpl->lenlemm = lenlemm; + newpl->relatedRule = NULL; + LPLAddTail(&ld->towork, newpl); +} + +static void +RemoveHead(LexizeData *ld) +{ + LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork)); +} + +static void +setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) +{ + if (correspondLexem) + { + *correspondLexem = ld->waste.head; + } + else + { + LPLClear(&ld->waste); + } + ld->waste.head = ld->waste.tail = NULL; +} + +static DictState * +DictStateListGet(DictStateList *list, Oid dictId) +{ + int i; + DictState *result = NULL; + + for (i = 0; i < list->listLength; i++) + if (list->states[i].relatedDictionary == dictId) + result = &list->states[i]; + + return result; +} + +static void +DictStateListRemove(DictStateList *list, Oid dictId) +{ + int i; + + for (i = 0; i < list->listLength; i++) + if (list->states[i].relatedDictionary == dictId) + break; + + if (i != list->listLength) + { + memcpy(list->states + i, list->states + i + 1, sizeof(DictState) * (list->listLength - i - 1)); + list->listLength--; + if (list->listLength == 0) + list->states = NULL; + else + list->states = repalloc(list->states, sizeof(DictState) * list->listLength); + } +} + +static DictState * +DictStateListAdd(DictStateList *list, DictState *state) +{ + DictStateListRemove(list, state->relatedDictionary); + + list->listLength++; + if (list->states) + list->states = repalloc(list->states, sizeof(DictState) * list->listLength); + else + list->states = palloc0(sizeof(DictState) * list->listLength); + + memcpy(list->states + list->listLength - 1, state, sizeof(DictState)); + + return list->states + list->listLength - 1; +} + +static void +DictStateListClear(DictStateList *list) +{ + list->listLength = 0; + if (list->states) + pfree(list->states); + list->states = NULL; +} + +static bool +LexemesBufferContains(LexemesBuffer *buffer, Oid dictId, ParsedLex *token) +{ + int i; + + for (i = 0; i < buffer->size; i++) + if (buffer->data[i].dictId == dictId && buffer->data[i].token == token) + return true; + + return false; +} + +static TSLexeme * +LexemesBufferGet(LexemesBuffer *buffer, Oid dictId, ParsedLex *token) +{ + int i; + TSLexeme *result = NULL; + + for (i = 0; i < buffer->size; i++) + if (buffer->data[i].dictId == dictId && buffer->data[i].token == token) + result = buffer->data[i].data; + + return result; +} + +static void +LexemesBufferRemove(LexemesBuffer *buffer, Oid dictId, ParsedLex *token) +{ + int i; + + for (i = 0; i < buffer->size; i++) + if (buffer->data[i].dictId == dictId && buffer->data[i].token == token) + break; + + if (i != buffer->size) + { + memcpy(buffer->data + i, buffer->data + i + 1, sizeof(LexemesBufferEntry) * (buffer->size - i - 1)); + buffer->size--; + if (buffer->size == 0) + buffer->data = NULL; + else + buffer->data = repalloc(buffer->data, sizeof(LexemesBufferEntry) * buffer->size); + } +} + +static void +LexemesBufferAdd(LexemesBuffer *buffer, Oid dictId, ParsedLex *token, TSLexeme *data) +{ + LexemesBufferRemove(buffer, dictId, token); + + buffer->size++; + if (buffer->data) + buffer->data = repalloc(buffer->data, sizeof(LexemesBufferEntry) * buffer->size); + else + buffer->data = palloc0(sizeof(LexemesBufferEntry) * buffer->size); + + buffer->data[buffer->size - 1].token = token; + buffer->data[buffer->size - 1].dictId = dictId; + buffer->data[buffer->size - 1].data = data; +} + +static void +LexemesBufferClear(LexemesBuffer *buffer) +{ + buffer->size = 0; + if (buffer->data) + pfree(buffer->data); + buffer->data = NULL; +} + +/* + * TSLexeme util functions + */ + +static int +TSLexemeGetSize(TSLexeme *lex) +{ + int result = 0; + TSLexeme *ptr = lex; + + while (ptr && ptr->lexeme) + { + result++; + ptr++; + } + + return result; +} + +/* + * Remove same lexemes. Remove copies of whole nvariant groups. + */ +static TSLexeme * +TSLexemeRemoveDuplications(TSLexeme *lexeme) +{ + TSLexeme *res; + int curLexIndex; + int i; + int lexemeSize = TSLexemeGetSize(lexeme); + int shouldCopyCount = lexemeSize; + bool *shouldCopy; + + if (lexeme == NULL) + return NULL; + + shouldCopy = palloc(sizeof(bool) * lexemeSize); + memset(shouldCopy, true, sizeof(bool) * lexemeSize); + + for (curLexIndex = 0; curLexIndex < lexemeSize; curLexIndex++) + { + for (i = curLexIndex + 1; i < lexemeSize; i++) + { + if (!shouldCopy[i]) + continue; + + if (strcmp(lexeme[curLexIndex].lexeme, lexeme[i].lexeme) == 0) + { + if (lexeme[curLexIndex].nvariant == lexeme[i].nvariant) + { + shouldCopy[i] = false; + shouldCopyCount--; + continue; + } + else + { + /* + * Check for same set of lexemes in another nvariant + * series + */ + int nvariantCountL = 0; + int nvariantCountR = 0; + int nvariantOverlap = 1; + int j; + + for (j = 0; j < lexemeSize; j++) + if (lexeme[curLexIndex].nvariant == lexeme[j].nvariant) + nvariantCountL++; + for (j = 0; j < lexemeSize; j++) + if (lexeme[i].nvariant == lexeme[j].nvariant) + nvariantCountR++; + + if (nvariantCountL != nvariantCountR) + continue; + + for (j = 1; j < nvariantCountR; j++) + { + if (strcmp(lexeme[curLexIndex + j].lexeme, lexeme[i + j].lexeme) == 0 + && lexeme[curLexIndex + j].nvariant == lexeme[i + j].nvariant) + nvariantOverlap++; + } + + if (nvariantOverlap != nvariantCountR) + continue; + + for (j = 0; j < nvariantCountR; j++) + { + shouldCopy[i + j] = false; + } + } + } + } + } + + res = palloc0(sizeof(TSLexeme) * (shouldCopyCount + 1)); + + for (i = 0, curLexIndex = 0; curLexIndex < lexemeSize; curLexIndex++) + { + if (shouldCopy[curLexIndex]) + { + memcpy(res + i, lexeme + curLexIndex, sizeof(TSLexeme)); + i++; + } + } + + pfree(shouldCopy); + pfree(lexeme); + return res; +} + +/* + * Combine two lexeme lists with respect to positions + */ +static TSLexeme * +TSLexemeMergePositions(TSLexeme *left, TSLexeme *right) +{ + TSLexeme *result; + int left_size = TSLexemeGetSize(left); + int right_size = TSLexemeGetSize(right); + int left_i = 0; + int right_i = 0; + int left_max_nvariant = 0; + int i; + + if (left == NULL && right == NULL) + { + result = NULL; + } + else + { + result = palloc0(sizeof(TSLexeme) * (left_size + right_size + 1)); + + for (i = 0; i < left_size; i++) + if (left[i].nvariant > left_max_nvariant) + left_max_nvariant = left[i].nvariant; + + for (i = 0; i < right_size; i++) + right[i].nvariant += left_max_nvariant; + if (right && right[0].flags & TSL_ADDPOS) + right[0].flags &= ~TSL_ADDPOS; + + i = 0; + while (i < left_size + right_size) + { + if (left_i < left_size) + { + do + { + result[i++] = left[left_i++]; + } while (left && left[left_i].lexeme && (left[left_i].flags & TSL_ADDPOS) == 0); + } + if (right_i < right_size) + { + do + { + result[i++] = right[right_i++]; + } while (right && right[right_i].lexeme && (right[right_i].flags & TSL_ADDPOS) == 0); + } + } + } + return result; +} + +/* + * Split lexemes generated by regular dictionaries and multi-input dictionaries + * and combine them with respect to positions + */ +static TSLexeme * +TSLexemeFilterMulti(TSLexeme *lexemes) +{ + TSLexeme *result; + TSLexeme *ptr = lexemes; + int multi_lexemes = 0; + + while (ptr && ptr->lexeme) + { + if (ptr->flags & TSL_MULTI) + multi_lexemes++; + ptr++; + } + + if (multi_lexemes > 0) + { + TSLexeme *lexemes_multi = palloc0(sizeof(TSLexeme) * (multi_lexemes + 1)); + TSLexeme *lexemes_rest = palloc0(sizeof(TSLexeme) * (TSLexemeGetSize(lexemes) - multi_lexemes + 1)); + int rest_i = 0; + int multi_i = 0; + + ptr = lexemes; + while (ptr && ptr->lexeme) + { + if (ptr->flags & TSL_MULTI) + lexemes_multi[multi_i++] = *ptr; + else + lexemes_rest[rest_i++] = *ptr; + + ptr++; + } + result = TSLexemeMergePositions(lexemes_rest, lexemes_multi); + } + else + { + result = TSLexemeMergePositions(lexemes, NULL); + } + + return result; +} + +/* + * Mark lexemes generated by multi-input (thesaurus-like) dictionary + */ +static void +TSLexemeMarkMulti(TSLexeme *lexemes) +{ + TSLexeme *ptr = lexemes; + + while (ptr && ptr->lexeme) + { + ptr->flags |= TSL_MULTI; + ptr++; + } +} + +/* + * Lexemes set operations + */ + +/* + * Combine left and right lexeme lists into one. + * If append is true, right lexemes added after last left lexeme with TSL_ADDPOS flag + */ +static TSLexeme * +TSLexemeUnionOpt(TSLexeme *left, TSLexeme *right, bool append) +{ + TSLexeme *result; + int left_size = TSLexemeGetSize(left); + int right_size = TSLexemeGetSize(right); + int left_max_nvariant = 0; + int i; + + if (left == NULL && right == NULL) + { + result = NULL; + } + else + { + result = palloc0(sizeof(TSLexeme) * (left_size + right_size + 1)); + + for (i = 0; i < left_size; i++) + if (left[i].nvariant > left_max_nvariant) + left_max_nvariant = left[i].nvariant; + + if (left_size > 0) + memcpy(result, left, sizeof(TSLexeme) * left_size); + if (right_size > 0) + memcpy(result + left_size, right, sizeof(TSLexeme) * right_size); + if (append && left_size > 0 && right_size > 0) + result[left_size].flags |= TSL_ADDPOS; + + for (i = left_size; i < left_size + right_size; i++) + result[i].nvariant += left_max_nvariant; + } + + return result; +} + +static TSLexeme * +TSLexemeUnion(TSLexeme *left, TSLexeme *right) +{ + return TSLexemeUnionOpt(left, right, false); +} + +static TSLexeme * +TSLexemeExcept(TSLexeme *left, TSLexeme *right) +{ + TSLexeme *result = NULL; + int i, + j, + k; + int left_size = TSLexemeGetSize(left); + int right_size = TSLexemeGetSize(right); + + result = palloc0(sizeof(TSLexeme) * (left_size + 1)); + + for (k = 0, i = 0; i < left_size; i++) + { + bool found = false; + + for (j = 0; j < right_size; j++) + { + if (strcmp(left[i].lexeme, right[j].lexeme) == 0) + found = true; + } + + if (!found) + result[k++] = left[i]; + } + + return result; +} + +static TSLexeme * +TSLexemeIntersect(TSLexeme *left, TSLexeme *right) +{ + TSLexeme *result = NULL; + int i, + j, + k; + int left_size = TSLexemeGetSize(left); + int right_size = TSLexemeGetSize(right); + + result = palloc0(sizeof(TSLexeme) * (left_size + 1)); + + for (k = 0, i = 0; i < left_size; i++) + { + bool found = false; + + for (j = 0; j < right_size; j++) + { + if (strcmp(left[i].lexeme, right[j].lexeme) == 0) + found = true; + } + + if (found) + result[k++] = left[i]; + } + + return result; +} + +/* + * Result storage functions + */ + +static void +ResultStorageAdd(ResultStorage *storage, ParsedLex *token, TSLexeme *lexs) +{ + TSLexeme *oldLexs = storage->lexemes; + + storage->lexemes = TSLexemeUnionOpt(storage->lexemes, lexs, true); + if (oldLexs) + pfree(oldLexs); +} + +static void +ResultStorageMoveToAccepted(ResultStorage *storage) +{ + if (storage->accepted) + { + TSLexeme *prevAccepted = storage->accepted; + + storage->accepted = TSLexemeUnionOpt(storage->accepted, storage->lexemes, true); + if (prevAccepted) + pfree(prevAccepted); + if (storage->lexemes) + pfree(storage->lexemes); + } + else + { + storage->accepted = storage->lexemes; + } + storage->lexemes = NULL; +} + +static void +ResultStorageClearLexemes(ResultStorage *storage) +{ + if (storage->lexemes) + pfree(storage->lexemes); + storage->lexemes = NULL; +} + +static void +ResultStorageClear(ResultStorage *storage) +{ + ResultStorageClearLexemes(storage); + + if (storage->accepted) + pfree(storage->accepted); + storage->accepted = NULL; +} + +/* + * Condition and command execution + */ + +static TSLexeme * +LexizeExecDictionary(LexizeData *ld, ParsedLex *token, Oid dictId) +{ + TSLexeme *res; + TSDictionaryCacheEntry *dict; + DictSubState subState; + + if (ld->skipDictionary == dictId) + return NULL; + + if (LexemesBufferContains(&ld->buffer, dictId, token)) + { + res = LexemesBufferGet(&ld->buffer, dictId, token); + } + else + { + char *curValLemm = token->lemm; + int curValLenLemm = token->lenlemm; + DictState *state = DictStateListGet(&ld->dslist, dictId); + + dict = lookup_ts_dictionary_cache(dictId); + + if (state) + { + subState = state->subState; + state->processed = true; + } + else + { + subState.isend = subState.getnext = false; + subState.private_state = NULL; + } + + res = (TSLexeme *) DatumGetPointer(FunctionCall4( + &(dict->lexize), + PointerGetDatum(dict->dictData), + PointerGetDatum(curValLemm), + Int32GetDatum(curValLenLemm), + PointerGetDatum(&subState) + )); + + + if (subState.getnext) + { + /* + * Dictionary wants next word, so store current context and state + * in the DictStateList + */ + if (state == NULL) + { + state = palloc0(sizeof(DictState)); + state->processed = true; + state->relatedDictionary = dictId; + state->intermediateTokens.head = state->intermediateTokens.tail = NULL; + state->acceptedTokens.head = state->acceptedTokens.tail = NULL; + state->tmpResult = NULL; + + /* + * Add state to the list and update pointer in order to work + * with copy from the list + */ + state = DictStateListAdd(&ld->dslist, state); + } + + state->subState = subState; + state->storeToAccepted = res != NULL; + + if (res) + { + if (state->intermediateTokens.head != NULL) + { + ParsedLex *ptr = state->intermediateTokens.head; + + while (ptr) + { + LPLAddTailCopy(&state->acceptedTokens, ptr); + ptr = ptr->next; + } + state->intermediateTokens.head = state->intermediateTokens.tail = NULL; + } + + if (state->tmpResult) + pfree(state->tmpResult); + TSLexemeMarkMulti(res); + state->tmpResult = res; + res = NULL; + } + } + else if (state != NULL) + { + if (res) + { + if (state) + TSLexemeMarkMulti(res); + DictStateListRemove(&ld->dslist, dictId); + } + else + { + /* + * Trigger post-processing in order to check tmpResult and + * restart processing (see LexizeExec function) + */ + state->processed = false; + } + } + LexemesBufferAdd(&ld->buffer, dictId, token, res); + } + + return res; +} + +static bool +LexizeExecDictionaryWaitNext(LexizeData *ld, Oid dictId) +{ + DictState *state = DictStateListGet(&ld->dslist, dictId); + + if (state) + return state->subState.getnext; + else + return false; +} + +static bool +LexizeExecIsNull(LexizeData *ld, ParsedLex *token, Oid dictId) +{ + TSLexeme *lexemes = LexizeExecDictionary(ld, token, dictId); + + if (lexemes) + return false; + else + return !LexizeExecDictionaryWaitNext(ld, dictId); +} + +static bool +LexizeExecIsStop(LexizeData *ld, ParsedLex *token, Oid dictId) +{ + TSLexeme *lex = LexizeExecDictionary(ld, token, dictId); + + return lex != NULL && lex[0].lexeme == NULL; +} + +static bool +LexizeExecExpressionBool(LexizeData *ld, ParsedLex *token, TSMapExpression *expression) +{ + bool result; + + if (expression == NULL) + result = false; + else if (expression->is_true) + result = true; + else if (expression->dictionary != InvalidOid) + { + bool is_null = LexizeExecIsNull(ld, token, expression->dictionary); + bool is_stop = LexizeExecIsStop(ld, token, expression->dictionary); + bool invert = (expression->options & DICTMAP_OPT_NOT) != 0; + + result = true; + if ((expression->options & DICTMAP_OPT_IS_NULL) != 0) + result = result && (invert ? !is_null : is_null); + if ((expression->options & DICTMAP_OPT_IS_STOP) != 0) + result = result && (invert ? !is_stop : is_stop); + } + else + { + if (expression->operator == DICTMAP_OP_MAPBY) + { + TSLexeme *mapby_result = LexizeExecMapBy(ld, token, expression->left, expression->right); + bool is_null = mapby_result == NULL; + bool is_stop = mapby_result != NULL && mapby_result[0].lexeme == NULL; + bool invert = (expression->options & DICTMAP_OPT_NOT) != 0; + + if (expression->left->dictionary != InvalidOid && LexizeExecDictionaryWaitNext(ld, expression->left->dictionary)) + is_null = false; + + result = true; + if ((expression->options & DICTMAP_OPT_IS_NULL) != 0) + result = result && (invert ? !is_null : is_null); + if ((expression->options & DICTMAP_OPT_IS_STOP) != 0) + result = result && (invert ? !is_stop : is_stop); + } + else + { + bool res_left = LexizeExecExpressionBool(ld, token, expression->left); + bool res_right = LexizeExecExpressionBool(ld, token, expression->right); + + switch (expression->operator) + { + case DICTMAP_OP_NOT: + result = !res_right; + break; + case DICTMAP_OP_OR: + result = res_left || res_right; + break; + case DICTMAP_OP_AND: + result = res_left && res_right; + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid text search configuration boolean expression"))); + break; + } + } + } + + return result; +} + +static TSLexeme * +LexizeExecExpressionSet(LexizeData *ld, ParsedLex *token, TSMapExpression *expression) +{ + TSLexeme *result; + + if (expression->dictionary != InvalidOid) + { + result = LexizeExecDictionary(ld, token, expression->dictionary); + } + else + { + if (expression->operator == DICTMAP_OP_MAPBY) + { + result = LexizeExecMapBy(ld, token, expression->left, expression->right); + } + else + { + TSLexeme *res_left = LexizeExecExpressionSet(ld, token, expression->left); + TSLexeme *res_right = LexizeExecExpressionSet(ld, token, expression->right); + + switch (expression->operator) + { + case DICTMAP_OP_UNION: + result = TSLexemeUnion(res_left, res_right); + break; + case DICTMAP_OP_EXCEPT: + result = TSLexemeExcept(res_left, res_right); + break; + case DICTMAP_OP_INTERSECT: + result = TSLexemeIntersect(res_left, res_right); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid text search configuration result set expression"))); + result = NULL; + break; + } + } + } + + return result; +} -typedef struct +static TSLexeme * +LexizeExecMapBy(LexizeData *ld, ParsedLex *token, TSMapExpression *left, TSMapExpression *right) { - TSConfigCacheEntry *cfg; - Oid curDictId; - int posDict; - DictSubState dictState; - ParsedLex *curSub; - ListParsedLex towork; /* current list to work */ - ListParsedLex waste; /* list of lexemes that already lexized */ + TSLexeme *right_res = LexizeExecExpressionSet(ld, token, right); + TSLexeme *result = NULL; + int right_size = TSLexemeGetSize(right_res); + int i; - /* - * fields to store last variant to lexize (basically, thesaurus or similar - * to, which wants several lexemes - */ + if (right_res == NULL) + return LexizeExecExpressionSet(ld, token, left); - ParsedLex *lastRes; - TSLexeme *tmpRes; -} LexizeData; + for (i = 0; i < right_size; i++) + { + TSLexeme *tmp_res = NULL; + TSLexeme *prev_res; + ParsedLex tmp_token; + + tmp_token.lemm = right_res[i].lexeme; + tmp_token.lenlemm = strlen(right_res[i].lexeme); + tmp_token.type = token->type; + tmp_token.next = NULL; + + tmp_res = LexizeExecExpressionSet(ld, &tmp_token, left); + prev_res = result; + result = TSLexemeUnion(prev_res, tmp_res); + if (prev_res) + pfree(prev_res); + } -static void -LexizeInit(LexizeData *ld, TSConfigCacheEntry *cfg) -{ - ld->cfg = cfg; - ld->curDictId = InvalidOid; - ld->posDict = 0; - ld->towork.head = ld->towork.tail = ld->curSub = NULL; - ld->waste.head = ld->waste.tail = NULL; - ld->lastRes = NULL; - ld->tmpRes = NULL; + return result; } -static void -LPLAddTail(ListParsedLex *list, ParsedLex *newpl) +static TSLexeme * +LexizeExecCase(LexizeData *ld, ParsedLex *originalToken, TSMapRuleList *rules, TSMapRule **selectedRule) { - if (list->tail) + TSLexeme *res = NULL; + ParsedLex token = *originalToken; + + if (ld->cfg->lenmap <= token.type || rules == NULL) { - list->tail->next = newpl; - list->tail = newpl; + res = NULL; } else - list->head = list->tail = newpl; - newpl->next = NULL; -} - -static ParsedLex * -LPLRemoveHead(ListParsedLex *list) -{ - ParsedLex *res = list->head; + { + int i; - if (list->head) - list->head = list->head->next; + for (i = 0; i < rules->count; i++) + { + if (rules->data[i].dictionary != InvalidOid) + { + /* Comma-separated syntax configuration */ + res = LexizeExecDictionary(ld, &token, rules->data[i].dictionary); + if (!LexizeExecIsNull(ld, &token, rules->data[i].dictionary)) + { + if (selectedRule) + *selectedRule = rules->data + i; + originalToken->relatedRule = rules->data + i; + + if (res && (res[0].flags & TSL_FILTER)) + { + token.lemm = res[0].lexeme; + token.lenlemm = strlen(res[0].lexeme); + } + else + { + break; + } + } + } + else if (LexizeExecExpressionBool(ld, &token, rules->data[i].condition.expression)) + { + if (selectedRule) + *selectedRule = rules->data + i; + originalToken->relatedRule = rules->data + i; - if (list->head == NULL) - list->tail = NULL; + if (rules->data[i].command.is_expression) + res = LexizeExecExpressionSet(ld, &token, rules->data[i].command.expression); + else + res = LexizeExecCase(ld, &token, rules->data[i].command.ruleList, selectedRule); + break; + } + } + } return res; } -static void -LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) +/* + * LexizeExec and helpers functions + */ + +static TSLexeme * +LexizeExecFinishProcessing(LexizeData *ld) { - ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); + int i; + TSLexeme *res = NULL; - newpl->type = type; - newpl->lemm = lemm; - newpl->lenlemm = lenlemm; - LPLAddTail(&ld->towork, newpl); - ld->curSub = ld->towork.tail; -} + for (i = 0; i < ld->dslist.listLength; i++) + { + TSLexeme *last_res = res; -static void -RemoveHead(LexizeData *ld) -{ - LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork)); + res = TSLexemeUnion(res, ld->dslist.states[i].tmpResult); + if (last_res) + pfree(last_res); + } - ld->posDict = 0; + return res; } -static void -setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) +static TSLexeme * +LexizeExecGetPreviousResults(LexizeData *ld) { - if (correspondLexem) - { - *correspondLexem = ld->waste.head; - } - else - { - ParsedLex *tmp, - *ptr = ld->waste.head; + int i; + TSLexeme *res = NULL; - while (ptr) + for (i = 0; i < ld->dslist.listLength; i++) + { + if (!ld->dslist.states[i].processed) { - tmp = ptr->next; - pfree(ptr); - ptr = tmp; + TSLexeme *last_res = res; + + res = TSLexemeUnion(res, ld->dslist.states[i].tmpResult); + if (last_res) + pfree(last_res); } } - ld->waste.head = ld->waste.tail = NULL; + + return res; } static void -moveToWaste(LexizeData *ld, ParsedLex *stop) +LexizeExecClearDictStates(LexizeData *ld) { - bool go = true; + int i; - while (ld->towork.head && go) + for (i = 0; i < ld->dslist.listLength; i++) { - if (ld->towork.head == stop) + if (!ld->dslist.states[i].processed) { - ld->curSub = stop->next; - go = false; + DictStateListRemove(&ld->dslist, ld->dslist.states[i].relatedDictionary); + i = 0; } - RemoveHead(ld); } } -static void -setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res) +static bool +LexizeExecNotProcessedDictStates(LexizeData *ld) { - if (ld->tmpRes) - { - TSLexeme *ptr; + int i; - for (ptr = ld->tmpRes; ptr->lexeme; ptr++) - pfree(ptr->lexeme); - pfree(ld->tmpRes); - } - ld->tmpRes = res; - ld->lastRes = lex; + for (i = 0; i < ld->dslist.listLength; i++) + if (!ld->dslist.states[i].processed) + return true; + + return false; } static TSLexeme * -LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) +LexizeExec(LexizeData *ld, ParsedLex **correspondLexem, TSMapRule **selectedRule) { + ParsedLex *token; + TSMapRuleList *rules; + TSLexeme *res = NULL; + TSLexeme *prevIterationResult = NULL; + bool removeHead = false; + bool resetSkipDictionary = false; + bool accepted = false; int i; - ListDictionary *map; - TSDictionaryCacheEntry *dict; - TSLexeme *res; - if (ld->curDictId == InvalidOid) - { - /* - * usual mode: dictionary wants only one word, but we should keep in - * mind that we should go through all stack - */ + for (i = 0; i < ld->dslist.listLength; i++) + ld->dslist.states[i].processed = false; + if (ld->skipDictionary != InvalidOid) + resetSkipDictionary = true; - while (ld->towork.head) + token = ld->towork.head; + if (token == NULL) + { + setCorrLex(ld, correspondLexem); + return NULL; + } + else + { + rules = ld->cfg->map[token->type]; + if (rules != NULL) { - ParsedLex *curVal = ld->towork.head; - char *curValLemm = curVal->lemm; - int curValLenLemm = curVal->lenlemm; - - map = ld->cfg->map + curVal->type; - - if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0) + res = LexizeExecCase(ld, token, rules, selectedRule); + prevIterationResult = LexizeExecGetPreviousResults(ld); + removeHead = prevIterationResult == NULL; + } + else + { + removeHead = true; + if (token->type == 0) /* Processing EOF-like token */ { - /* skip this type of lexeme */ - RemoveHead(ld); - continue; + res = LexizeExecFinishProcessing(ld); + prevIterationResult = NULL; } + } + + if (LexizeExecNotProcessedDictStates(ld) && (token->type == 0 || rules != NULL)) /* Rollback processing */ + { + int i; + ListParsedLex *intermediateTokens = NULL; + ListParsedLex *acceptedTokens = NULL; - for (i = ld->posDict; i < map->len; i++) + for (i = 0; i < ld->dslist.listLength; i++) { - dict = lookup_ts_dictionary_cache(map->dictIds[i]); - - ld->dictState.isend = ld->dictState.getnext = false; - ld->dictState.private_state = NULL; - res = (TSLexeme *) DatumGetPointer(FunctionCall4( - &(dict->lexize), - PointerGetDatum(dict->dictData), - PointerGetDatum(curValLemm), - Int32GetDatum(curValLenLemm), - PointerGetDatum(&ld->dictState) - )); - - if (ld->dictState.getnext) + if (!ld->dslist.states[i].processed) { - /* - * dictionary wants next word, so setup and store current - * position and go to multiword mode - */ - - ld->curDictId = DatumGetObjectId(map->dictIds[i]); - ld->posDict = i + 1; - ld->curSub = curVal->next; - if (res) - setNewTmpRes(ld, curVal, res); - return LexizeExec(ld, correspondLexem); + intermediateTokens = &ld->dslist.states[i].intermediateTokens; + acceptedTokens = &ld->dslist.states[i].acceptedTokens; + if (prevIterationResult == NULL) + ld->skipDictionary = ld->dslist.states[i].relatedDictionary; } + } - if (!res) /* dictionary doesn't know this lexeme */ - continue; - - if (res->flags & TSL_FILTER) + if (intermediateTokens && intermediateTokens->head) + { + ParsedLex *head = ld->towork.head; + + ld->towork.head = intermediateTokens->head; + intermediateTokens->tail->next = head; + head->next = NULL; + ld->towork.tail = head; + removeHead = false; + LPLClear(&ld->waste); + if (acceptedTokens && acceptedTokens->head) { - curValLemm = res->lexeme; - curValLenLemm = strlen(res->lexeme); - continue; + ld->waste.head = acceptedTokens->head; + ld->waste.tail = acceptedTokens->tail; } - - RemoveHead(ld); - setCorrLex(ld, correspondLexem); - return res; } - - RemoveHead(ld); + ResultStorageClearLexemes(&ld->delayedResults); + if (rules != NULL) + res = NULL; } + + if (rules != NULL) + LexizeExecClearDictStates(ld); + else if (token->type == 0) + DictStateListClear(&ld->dslist); } - else - { /* curDictId is valid */ - dict = lookup_ts_dictionary_cache(ld->curDictId); - /* - * Dictionary ld->curDictId asks us about following words - */ + if (prevIterationResult) + { + res = prevIterationResult; + } + else + { + int i; - while (ld->curSub) + for (i = 0; i < ld->dslist.listLength; i++) { - ParsedLex *curVal = ld->curSub; - - map = ld->cfg->map + curVal->type; - - if (curVal->type != 0) + if (ld->dslist.states[i].storeToAccepted) { - bool dictExists = false; - - if (curVal->type >= ld->cfg->lenmap || map->len == 0) - { - /* skip this type of lexeme */ - ld->curSub = curVal->next; - continue; - } - - /* - * We should be sure that current type of lexeme is recognized - * by our dictionary: we just check is it exist in list of - * dictionaries ? - */ - for (i = 0; i < map->len && !dictExists; i++) - if (ld->curDictId == DatumGetObjectId(map->dictIds[i])) - dictExists = true; - - if (!dictExists) - { - /* - * Dictionary can't work with current tpe of lexeme, - * return to basic mode and redo all stored lexemes - */ - ld->curDictId = InvalidOid; - return LexizeExec(ld, correspondLexem); - } + LPLAddTailCopy(&ld->dslist.states[i].acceptedTokens, token); + accepted = true; + ld->dslist.states[i].storeToAccepted = false; } - - ld->dictState.isend = (curVal->type == 0) ? true : false; - ld->dictState.getnext = false; - - res = (TSLexeme *) DatumGetPointer(FunctionCall4( - &(dict->lexize), - PointerGetDatum(dict->dictData), - PointerGetDatum(curVal->lemm), - Int32GetDatum(curVal->lenlemm), - PointerGetDatum(&ld->dictState) - )); - - if (ld->dictState.getnext) + else { - /* Dictionary wants one more */ - ld->curSub = curVal->next; - if (res) - setNewTmpRes(ld, curVal, res); - continue; + LPLAddTailCopy(&ld->dslist.states[i].intermediateTokens, token); } + } + } - if (res || ld->tmpRes) - { - /* - * Dictionary normalizes lexemes, so we remove from stack all - * used lexemes, return to basic mode and redo end of stack - * (if it exists) - */ - if (res) - { - moveToWaste(ld, ld->curSub); - } - else - { - res = ld->tmpRes; - moveToWaste(ld, ld->lastRes); - } + if (removeHead) + RemoveHead(ld); - /* reset to initial state */ - ld->curDictId = InvalidOid; - ld->posDict = 0; - ld->lastRes = NULL; - ld->tmpRes = NULL; - setCorrLex(ld, correspondLexem); - return res; - } + if (ld->dslist.listLength > 0) + { + /* + * There is at least one thesaurus dictionary in the middle of + * processing. Delay return of the result to avoid wrong lexemes in + * case of thesaurus phrase rejection. + */ + ResultStorageAdd(&ld->delayedResults, token, res); + if (accepted) + ResultStorageMoveToAccepted(&ld->delayedResults); + if (res) + pfree(res); + res = NULL; + } + else + { + if (ld->towork.head == NULL) + { + TSLexeme *oldAccepted = ld->delayedResults.accepted; - /* - * Dict don't want next lexem and didn't recognize anything, redo - * from ld->towork.head - */ - ld->curDictId = InvalidOid; - return LexizeExec(ld, correspondLexem); + ld->delayedResults.accepted = TSLexemeUnionOpt(ld->delayedResults.accepted, ld->delayedResults.lexemes, true); + if (oldAccepted) + pfree(oldAccepted); + } + + /* + * Add accepted delayed results to the output of the parsing. All + * lexemes returned during thesaurus pharse processing should be + * returned simultaniously, since all phrase tokens are processed as + * one. + */ + if (ld->delayedResults.accepted != NULL) + { + TSLexeme *oldRes = res; + + res = TSLexemeUnionOpt(ld->delayedResults.accepted, res, prevIterationResult == NULL); + if (oldRes) + pfree(oldRes); + ResultStorageClear(&ld->delayedResults); } + setCorrLex(ld, correspondLexem); } - setCorrLex(ld, correspondLexem); - return NULL; + if (resetSkipDictionary) + ld->skipDictionary = InvalidOid; + + LexemesBufferClear(&ld->buffer); + res = TSLexemeFilterMulti(res); + if (res) + res = TSLexemeRemoveDuplications(res); + + return res; } /* + * ts_parse API functions + */ + +/* * Parse string and lexize words. * * prs will be filled in. @@ -357,7 +1329,7 @@ LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen) { - int type, + int type = -1, lenlemm; char *lemm = NULL; LexizeData ldata; @@ -375,36 +1347,42 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen) LexizeInit(&ldata, cfg); + type = 1; do { - type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), - PointerGetDatum(prsdata), - PointerGetDatum(&lemm), - PointerGetDatum(&lenlemm))); - - if (type > 0 && lenlemm >= MAXSTRLEN) + if (type > 0) { + type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), + PointerGetDatum(prsdata), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))); + + if (type > 0 && lenlemm >= MAXSTRLEN) + { #ifdef IGNORE_LONGLEXEME - ereport(NOTICE, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("word is too long to be indexed"), - errdetail("Words longer than %d characters are ignored.", - MAXSTRLEN))); - continue; + ereport(NOTICE, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); + continue; #else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("word is too long to be indexed"), - errdetail("Words longer than %d characters are ignored.", - MAXSTRLEN))); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); #endif - } + } - LexizeAddLemm(&ldata, type, lemm, lenlemm); + LexizeAddLemm(&ldata, type, lemm, lenlemm); + } - while ((norms = LexizeExec(&ldata, NULL)) != NULL) + while ((norms = LexizeExec(&ldata, NULL, NULL)) != NULL) { - TSLexeme *ptr = norms; + TSLexeme *ptr; + + ptr = norms; prs->pos++; /* set pos */ @@ -429,12 +1407,200 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen) } pfree(norms); } - } while (type > 0); + } while (type > 0 || ldata.towork.head); FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); } /* + * Initialize SRF context and text parser for ts_debug execution. + */ +static void +ts_debug_init(Oid cfgId, text *inputText, FunctionCallInfo fcinfo) +{ + TupleDesc tupdesc; + char *buf; + int buflen; + FuncCallContext *funcctx; + MemoryContext oldcontext; + TSDebugContext *context; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + buf = text_to_cstring(inputText); + buflen = strlen(buf); + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + funcctx->user_fctx = palloc0(sizeof(TSDebugContext)); + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + + context = funcctx->user_fctx; + context->cfg = lookup_ts_config_cache(cfgId); + context->prsobj = lookup_ts_parser_cache(context->cfg->prsId); + + context->tokenTypes = (LexDescr *) DatumGetPointer(OidFunctionCall1(context->prsobj->lextypeOid, + (Datum) 0)); + + context->prsdata = (void *) DatumGetPointer(FunctionCall2(&context->prsobj->prsstart, + PointerGetDatum(buf), + Int32GetDatum(buflen))); + LexizeInit(&context->ldata, context->cfg); + context->tokentype = 1; + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Get one token from input text and add it to towork queue. + */ +static void +ts_debug_get_token(FuncCallContext *funcctx) +{ + TSDebugContext *context; + MemoryContext oldcontext; + int lenlemm; + char *lemm = NULL; + + context = funcctx->user_fctx; + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + context->tokentype = DatumGetInt32(FunctionCall3(&(context->prsobj->prstoken), + PointerGetDatum(context->prsdata), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))); + + if (context->tokentype > 0 && lenlemm >= MAXSTRLEN) + { +#ifdef IGNORE_LONGLEXEME + ereport(NOTICE, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); +#else + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); +#endif + } + + LexizeAddLemm(&context->ldata, context->tokentype, lemm, lenlemm); + MemoryContextSwitchTo(oldcontext); +} + +/* + * Parse text and print debug information for each token, such as + * token type, dictionary map configuration, selected command and lexemes. + * Arguments: regconfiguration(Oid) cfgId, text *inputText + */ +Datum +ts_debug(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + TSDebugContext *context; + MemoryContext oldcontext; + + if (SRF_IS_FIRSTCALL()) + { + Oid cfgId = PG_GETARG_OID(0); + text *inputText = PG_GETARG_TEXT_P(1); + + ts_debug_init(cfgId, inputText, fcinfo); + } + + funcctx = SRF_PERCALL_SETUP(); + context = funcctx->user_fctx; + + while (context->tokentype > 0 && context->leftTokens == NULL) + { + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + ts_debug_get_token(funcctx); + + context->savedLexemes = LexizeExec(&context->ldata, &(context->leftTokens), &(context->rule)); + + MemoryContextSwitchTo(oldcontext); + } + + while (context->leftTokens == NULL && context->ldata.towork.head != NULL) + context->savedLexemes = LexizeExec(&context->ldata, &(context->leftTokens), &(context->rule)); + + if (context->leftTokens && context->leftTokens && context->leftTokens->type > 0) + { + HeapTuple tuple; + Datum result; + char **values; + ParsedLex *lex = context->leftTokens; + StringInfo str = NULL; + TSLexeme *ptr; + + values = palloc0(sizeof(char *) * 6); + str = makeStringInfo(); + initStringInfo(str); + + values[0] = context->tokenTypes[lex->type - 1].alias; + values[1] = context->tokenTypes[lex->type - 1].descr; + + values[2] = palloc0(sizeof(char) * (lex->lenlemm + 1)); + memcpy(values[2], lex->lemm, sizeof(char) * lex->lenlemm); + + if (lex->type < context->ldata.cfg->lenmap && context->ldata.cfg->map[lex->type]) + { + TSMapPrintRuleList(context->ldata.cfg->map[lex->type], str, 0); + values[3] = str->data; + str = makeStringInfo(); + initStringInfo(str); + + if (lex->relatedRule) + { + TSMapPrintRule(lex->relatedRule, str, 0); + values[4] = str->data; + str = makeStringInfo(); + initStringInfo(str); + } + } + + ptr = context->savedLexemes; + if (context->savedLexemes) + appendStringInfoChar(str, '{'); + + while (ptr && ptr->lexeme) + { + if (ptr != context->savedLexemes) + appendStringInfoString(str, ", "); + appendStringInfoString(str, ptr->lexeme); + ptr++; + } + if (context->savedLexemes) + appendStringInfoChar(str, '}'); + if (context->savedLexemes) + values[5] = str->data; + else + values[5] = NULL; + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = HeapTupleGetDatum(tuple); + + context->leftTokens = lex->next; + pfree(lex); + if (context->leftTokens == NULL && context->savedLexemes) + pfree(context->savedLexemes); + + SRF_RETURN_NEXT(funcctx, result); + } + + FunctionCall1(&(context->prsobj->prsend), PointerGetDatum(context->prsdata)); + SRF_RETURN_DONE(funcctx); +} + +/* * Headline framework */ static void @@ -532,12 +1698,12 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) { - int type, + int type = -1, lenlemm; char *lemm = NULL; LexizeData ldata; TSLexeme *norms; - ParsedLex *lexs; + ParsedLex *lexs = NULL; TSConfigCacheEntry *cfg; TSParserCacheEntry *prsobj; void *prsdata; @@ -551,45 +1717,50 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu LexizeInit(&ldata, cfg); + type = 1; do { - type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), - PointerGetDatum(prsdata), - PointerGetDatum(&lemm), - PointerGetDatum(&lenlemm))); - - if (type > 0 && lenlemm >= MAXSTRLEN) + if (type > 0) { + type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), + PointerGetDatum(prsdata), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))); + + if (type > 0 && lenlemm >= MAXSTRLEN) + { #ifdef IGNORE_LONGLEXEME - ereport(NOTICE, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("word is too long to be indexed"), - errdetail("Words longer than %d characters are ignored.", - MAXSTRLEN))); - continue; + ereport(NOTICE, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); + continue; #else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("word is too long to be indexed"), - errdetail("Words longer than %d characters are ignored.", - MAXSTRLEN))); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); #endif - } + } - LexizeAddLemm(&ldata, type, lemm, lenlemm); + LexizeAddLemm(&ldata, type, lemm, lenlemm); + } do { - if ((norms = LexizeExec(&ldata, &lexs)) != NULL) + if ((norms = LexizeExec(&ldata, &lexs, NULL)) != NULL) { prs->vectorpos++; addHLParsedLex(prs, query, lexs, norms); } else addHLParsedLex(prs, query, lexs, NULL); + lexs = NULL; } while (norms); - } while (type > 0); + } while (type > 0 || ldata.towork.head); FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); } @@ -642,14 +1813,14 @@ generateHeadline(HeadlineParsedText *prs) } else if (!wrd->skip) { - if (wrd->selected) + if (wrd->selected && (wrd == prs->words || !(wrd - 1)->selected)) { memcpy(ptr, prs->startsel, prs->startsellen); ptr += prs->startsellen; } memcpy(ptr, wrd->word, wrd->len); ptr += wrd->len; - if (wrd->selected) + if (wrd->selected && ((wrd + 1 - prs->words) == prs->curwords || !(wrd + 1)->selected)) { memcpy(ptr, prs->stopsel, prs->stopsellen); ptr += prs->stopsellen; diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c index 56d4cf0..3868b3c 100644 --- a/src/backend/tsearch/ts_utils.c +++ b/src/backend/tsearch/ts_utils.c @@ -19,7 +19,17 @@ #include "miscadmin.h" #include "tsearch/ts_locale.h" #include "tsearch/ts_utils.h" - +#include "catalog/indexing.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "storage/lockdefs.h" +#include "access/heapam.h" +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/sysattr.h" +#include "utils/fmgroids.h" +#include "utils/builtins.h" +#include "tsearch/ts_cache.h" /* * Given the base name and extension of a tsearch config file, return diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 888edbb..0628b9c 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -828,11 +828,10 @@ static const struct cachedesc cacheinfo[] = { }, {TSConfigMapRelationId, /* TSCONFIGMAP */ TSConfigMapIndexId, - 3, + 2, { Anum_pg_ts_config_map_mapcfg, Anum_pg_ts_config_map_maptokentype, - Anum_pg_ts_config_map_mapseqno, 0 }, 2 diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index da5c8ea..da18387 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -39,10 +39,13 @@ #include "catalog/pg_ts_template.h" #include "commands/defrem.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_utils.h" +#include "tsearch/ts_configmap.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" #include "utils/inval.h" +#include "utils/jsonb.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/regproc.h" @@ -51,13 +54,12 @@ /* - * MAXTOKENTYPE/MAXDICTSPERTT are arbitrary limits on the workspace size + * MAXTOKENTYPE is arbitrary limits on the workspace size * used in lookup_ts_config_cache(). We could avoid hardwiring a limit * by making the workspace dynamically enlargeable, but it seems unlikely * to be worth the trouble. */ -#define MAXTOKENTYPE 256 -#define MAXDICTSPERTT 100 +#define MAXTOKENTYPE 256 static HTAB *TSParserCacheHash = NULL; @@ -414,11 +416,10 @@ lookup_ts_config_cache(Oid cfgId) ScanKeyData mapskey; SysScanDesc mapscan; HeapTuple maptup; - ListDictionary maplists[MAXTOKENTYPE + 1]; - Oid mapdicts[MAXDICTSPERTT]; + TSMapRuleList *mapruleslist[MAXTOKENTYPE + 1]; int maxtokentype; - int ndicts; int i; + TSMapRuleList *rules_tmp; tp = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId)); if (!HeapTupleIsValid(tp)) @@ -449,8 +450,10 @@ lookup_ts_config_cache(Oid cfgId) if (entry->map) { for (i = 0; i < entry->lenmap; i++) - if (entry->map[i].dictIds) - pfree(entry->map[i].dictIds); + { + if (entry->map[i]) + TSMapFree(entry->map[i]); + } pfree(entry->map); } } @@ -464,13 +467,11 @@ lookup_ts_config_cache(Oid cfgId) /* * Scan pg_ts_config_map to gather dictionary list for each token type * - * Because the index is on (mapcfg, maptokentype, mapseqno), we will - * see the entries in maptokentype order, and in mapseqno order for - * each token type, even though we didn't explicitly ask for that. + * Because the index is on (mapcfg, maptokentype), we will see the + * entries in maptokentype order even though we didn't explicitly ask + * for that. */ - MemSet(maplists, 0, sizeof(maplists)); maxtokentype = 0; - ndicts = 0; ScanKeyInit(&mapskey, Anum_pg_ts_config_map_mapcfg, @@ -482,6 +483,7 @@ lookup_ts_config_cache(Oid cfgId) mapscan = systable_beginscan_ordered(maprel, mapidx, NULL, 1, &mapskey); + memset(mapruleslist, 0, sizeof(mapruleslist)); while ((maptup = systable_getnext_ordered(mapscan, ForwardScanDirection)) != NULL) { Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); @@ -491,51 +493,27 @@ lookup_ts_config_cache(Oid cfgId) elog(ERROR, "maptokentype value %d is out of range", toktype); if (toktype < maxtokentype) elog(ERROR, "maptokentype entries are out of order"); - if (toktype > maxtokentype) - { - /* starting a new token type, but first save the prior data */ - if (ndicts > 0) - { - maplists[maxtokentype].len = ndicts; - maplists[maxtokentype].dictIds = (Oid *) - MemoryContextAlloc(CacheMemoryContext, - sizeof(Oid) * ndicts); - memcpy(maplists[maxtokentype].dictIds, mapdicts, - sizeof(Oid) * ndicts); - } - maxtokentype = toktype; - mapdicts[0] = cfgmap->mapdict; - ndicts = 1; - } - else - { - /* continuing data for current token type */ - if (ndicts >= MAXDICTSPERTT) - elog(ERROR, "too many pg_ts_config_map entries for one token type"); - mapdicts[ndicts++] = cfgmap->mapdict; - } + + maxtokentype = toktype; + rules_tmp = JsonbToTSMap(DatumGetJsonbP(&cfgmap->mapdicts)); + mapruleslist[maxtokentype] = TSMapMoveToMemoryContext(rules_tmp, CacheMemoryContext); + TSMapFree(rules_tmp); + rules_tmp = NULL; } systable_endscan_ordered(mapscan); index_close(mapidx, AccessShareLock); heap_close(maprel, AccessShareLock); - if (ndicts > 0) + if (maxtokentype > 0) { - /* save the last token type's dictionaries */ - maplists[maxtokentype].len = ndicts; - maplists[maxtokentype].dictIds = (Oid *) - MemoryContextAlloc(CacheMemoryContext, - sizeof(Oid) * ndicts); - memcpy(maplists[maxtokentype].dictIds, mapdicts, - sizeof(Oid) * ndicts); - /* and save the overall map */ + /* save the overall map */ entry->lenmap = maxtokentype + 1; - entry->map = (ListDictionary *) + entry->map = (TSMapRuleList * *) MemoryContextAlloc(CacheMemoryContext, - sizeof(ListDictionary) * entry->lenmap); - memcpy(entry->map, maplists, - sizeof(ListDictionary) * entry->lenmap); + sizeof(TSMapRuleList *) * entry->lenmap); + memcpy(entry->map, mapruleslist, + sizeof(TSMapRuleList *) * entry->lenmap); } entry->isvalid = true; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 8733426..ceff4d1 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -14186,10 +14186,11 @@ dumpTSConfig(Archive *fout, TSConfigInfo *cfginfo) "SELECT\n" " ( SELECT alias FROM pg_catalog.ts_token_type('%u'::pg_catalog.oid) AS t\n" " WHERE t.tokid = m.maptokentype ) AS tokenname,\n" - " m.mapdict::pg_catalog.regdictionary AS dictname\n" + " dictionary_map_to_text(m.mapcfg, m.maptokentype) AS dictname\n" "FROM pg_catalog.pg_ts_config_map AS m\n" "WHERE m.mapcfg = '%u'\n" - "ORDER BY m.mapcfg, m.maptokentype, m.mapseqno", + "GROUP BY m.mapcfg, m.maptokentype\n" + "ORDER BY m.mapcfg, m.maptokentype", cfginfo->cfgparser, cfginfo->dobj.catId.oid); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); @@ -14203,20 +14204,14 @@ dumpTSConfig(Archive *fout, TSConfigInfo *cfginfo) char *tokenname = PQgetvalue(res, i, i_tokenname); char *dictname = PQgetvalue(res, i, i_dictname); - if (i == 0 || - strcmp(tokenname, PQgetvalue(res, i - 1, i_tokenname)) != 0) - { - /* starting a new token type, so start a new command */ - if (i > 0) - appendPQExpBufferStr(q, ";\n"); - appendPQExpBuffer(q, "\nALTER TEXT SEARCH CONFIGURATION %s\n", - fmtId(cfginfo->dobj.name)); - /* tokenname needs quoting, dictname does NOT */ - appendPQExpBuffer(q, " ADD MAPPING FOR %s WITH %s", - fmtId(tokenname), dictname); - } - else - appendPQExpBuffer(q, ", %s", dictname); + /* starting a new token type, so start a new command */ + if (i > 0) + appendPQExpBufferStr(q, ";\n"); + appendPQExpBuffer(q, "\nALTER TEXT SEARCH CONFIGURATION %s\n", + fmtId(cfginfo->dobj.name)); + /* tokenname needs quoting, dictname does NOT */ + appendPQExpBuffer(q, " ADD MAPPING FOR %s WITH \n%s", + fmtId(tokenname), dictname); } if (ntups > 0) diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 0688571..98f000b 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4580,13 +4580,7 @@ describeOneTSConfig(const char *oid, const char *nspname, const char *cfgname, " ( SELECT t.alias FROM\n" " pg_catalog.ts_token_type(c.cfgparser) AS t\n" " WHERE t.tokid = m.maptokentype ) AS \"%s\",\n" - " pg_catalog.btrim(\n" - " ARRAY( SELECT mm.mapdict::pg_catalog.regdictionary\n" - " FROM pg_catalog.pg_ts_config_map AS mm\n" - " WHERE mm.mapcfg = m.mapcfg AND mm.maptokentype = m.maptokentype\n" - " ORDER BY mapcfg, maptokentype, mapseqno\n" - " ) :: pg_catalog.text,\n" - " '{}') AS \"%s\"\n" + " dictionary_map_to_text(m.mapcfg, m.maptokentype) AS \"%s\"\n" "FROM pg_catalog.pg_ts_config AS c, pg_catalog.pg_ts_config_map AS m\n" "WHERE c.oid = '%s' AND m.mapcfg = c.oid\n" "GROUP BY m.mapcfg, m.maptokentype, c.cfgparser\n" diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 9a7f5b2..362fd17 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201710161 +#define CATALOG_VERSION_NO 201710181 #endif diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index ef84936..db487cf 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -260,7 +260,7 @@ DECLARE_UNIQUE_INDEX(pg_ts_config_cfgname_index, 3608, on pg_ts_config using btr DECLARE_UNIQUE_INDEX(pg_ts_config_oid_index, 3712, on pg_ts_config using btree(oid oid_ops)); #define TSConfigOidIndexId 3712 -DECLARE_UNIQUE_INDEX(pg_ts_config_map_index, 3609, on pg_ts_config_map using btree(mapcfg oid_ops, maptokentype int4_ops, mapseqno int4_ops)); +DECLARE_UNIQUE_INDEX(pg_ts_config_map_index, 3609, on pg_ts_config_map using btree(mapcfg oid_ops, maptokentype int4_ops)); #define TSConfigMapIndexId 3609 DECLARE_UNIQUE_INDEX(pg_ts_dict_dictname_index, 3604, on pg_ts_dict using btree(dictname name_ops, dictnamespace oid_ops)); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 93c031a..572374e 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4925,6 +4925,12 @@ DESCR("transform jsonb to tsvector"); DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ )); DESCR("transform json to tsvector"); +DATA(insert OID = 8891 ( dictionary_map_to_text PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "26 23" _null_ _null_ _null_ _null_ _null_ dictionary_map_to_text _null_ _null_ _null_ )); +DESCR("returns text representation of dictionary configurationconfiguration map"); + +DATA(insert OID = 8892 ( ts_debug PGNSP PGUID 12 100 1 0 0 f f f f t t s s 2 0 2249 "3734 25" "{3734,25,25,25,25,25,25,1009}" "{i,i,o,o,o,o,o,o}" "{cfgId,inputText,alias,description,token,dictionaries,command,lexemes}" _null_ _null_ ts_debug _null_ _null_ _null_)); +DESCR("debug function for text search configuration"); + DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ )); DESCR("trigger for automatic update of tsvector column"); DATA(insert OID = 3753 ( tsvector_update_trigger_column PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_bycolumn _null_ _null_ _null_ )); diff --git a/src/include/catalog/pg_ts_config_map.h b/src/include/catalog/pg_ts_config_map.h index 3df0519..ea0fd0a 100644 --- a/src/include/catalog/pg_ts_config_map.h +++ b/src/include/catalog/pg_ts_config_map.h @@ -22,6 +22,7 @@ #define PG_TS_CONFIG_MAP_H #include "catalog/genbki.h" +#include "utils/jsonb.h" /* ---------------- * pg_ts_config_map definition. cpp turns this into @@ -30,49 +31,106 @@ */ #define TSConfigMapRelationId 3603 +typedef Jsonb jsonb; + CATALOG(pg_ts_config_map,3603) BKI_WITHOUT_OIDS { Oid mapcfg; /* OID of configuration owning this entry */ int32 maptokentype; /* token type from parser */ - int32 mapseqno; /* order in which to consult dictionaries */ - Oid mapdict; /* dictionary to consult */ + jsonb mapdicts; /* dictionary map Jsonb representation */ } FormData_pg_ts_config_map; typedef FormData_pg_ts_config_map *Form_pg_ts_config_map; +typedef struct TSMapExpression +{ + int operator; + Oid dictionary; + int options; + bool is_true; + struct TSMapExpression *left; + struct TSMapExpression *right; +} TSMapExpression; + +typedef struct TSMapCommand +{ + bool is_expression; + void *ruleList; /* this is a TSMapRuleList object */ + TSMapExpression *expression; +} TSMapCommand; + +typedef struct TSMapCondition +{ + TSMapExpression *expression; +} TSMapCondition; + +typedef struct TSMapRule +{ + Oid dictionary; + TSMapCondition condition; + TSMapCommand command; +} TSMapRule; + +typedef struct TSMapRuleList +{ + TSMapRule *data; + int count; +} TSMapRuleList; + /* ---------------- * compiler constants for pg_ts_config_map * ---------------- */ -#define Natts_pg_ts_config_map 4 +#define Natts_pg_ts_config_map 3 #define Anum_pg_ts_config_map_mapcfg 1 #define Anum_pg_ts_config_map_maptokentype 2 -#define Anum_pg_ts_config_map_mapseqno 3 -#define Anum_pg_ts_config_map_mapdict 4 +#define Anum_pg_ts_config_map_mapdicts 3 + +/* ---------------- + * Dictionary map operators + * ---------------- + */ +#define DICTMAP_OP_OR 1 +#define DICTMAP_OP_AND 2 +#define DICTMAP_OP_THEN 3 +#define DICTMAP_OP_MAPBY 4 +#define DICTMAP_OP_UNION 5 +#define DICTMAP_OP_EXCEPT 6 +#define DICTMAP_OP_INTERSECT 7 +#define DICTMAP_OP_NOT 8 + +/* ---------------- + * Dictionary map operant options (bit mask) + * ---------------- + */ + +#define DICTMAP_OPT_NOT 1 +#define DICTMAP_OPT_IS_NULL 2 +#define DICTMAP_OPT_IS_STOP 4 /* ---------------- * initial contents of pg_ts_config_map * ---------------- */ -DATA(insert ( 3748 1 1 3765 )); -DATA(insert ( 3748 2 1 3765 )); -DATA(insert ( 3748 3 1 3765 )); -DATA(insert ( 3748 4 1 3765 )); -DATA(insert ( 3748 5 1 3765 )); -DATA(insert ( 3748 6 1 3765 )); -DATA(insert ( 3748 7 1 3765 )); -DATA(insert ( 3748 8 1 3765 )); -DATA(insert ( 3748 9 1 3765 )); -DATA(insert ( 3748 10 1 3765 )); -DATA(insert ( 3748 11 1 3765 )); -DATA(insert ( 3748 15 1 3765 )); -DATA(insert ( 3748 16 1 3765 )); -DATA(insert ( 3748 17 1 3765 )); -DATA(insert ( 3748 18 1 3765 )); -DATA(insert ( 3748 19 1 3765 )); -DATA(insert ( 3748 20 1 3765 )); -DATA(insert ( 3748 21 1 3765 )); -DATA(insert ( 3748 22 1 3765 )); +DATA(insert ( 3748 1 "[3765]" )); +DATA(insert ( 3748 2 "[3765]" )); +DATA(insert ( 3748 3 "[3765]" )); +DATA(insert ( 3748 4 "[3765]" )); +DATA(insert ( 3748 5 "[3765]" )); +DATA(insert ( 3748 6 "[3765]" )); +DATA(insert ( 3748 7 "[3765]" )); +DATA(insert ( 3748 8 "[3765]" )); +DATA(insert ( 3748 9 "[3765]" )); +DATA(insert ( 3748 10 "[3765]" )); +DATA(insert ( 3748 11 "[3765]" )); +DATA(insert ( 3748 15 "[3765]" )); +DATA(insert ( 3748 16 "[3765]" )); +DATA(insert ( 3748 17 "[3765]" )); +DATA(insert ( 3748 18 "[3765]" )); +DATA(insert ( 3748 19 "[3765]" )); +DATA(insert ( 3748 20 "[3765]" )); +DATA(insert ( 3748 21 "[3765]" )); +DATA(insert ( 3748 22 "[3765]" )); #endif /* PG_TS_CONFIG_MAP_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index ffeeb49..d956b56 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -380,6 +380,8 @@ typedef enum NodeTag T_CreateEnumStmt, T_CreateRangeStmt, T_AlterEnumStmt, + T_DictMapExprElem, + T_DictMapElem, T_AlterTSDictionaryStmt, T_AlterTSConfigurationStmt, T_CreateFdwStmt, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 732e5d6..af4e961 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3369,6 +3369,33 @@ typedef enum AlterTSConfigType ALTER_TSCONFIG_DROP_MAPPING } AlterTSConfigType; +typedef enum DictPipeElemType +{ + DICT_MAP_OPERAND, + DICT_MAP_OPERATOR, + DICT_MAP_CONST_TRUE +} DictPipeType; + +typedef struct DictMapExprElem +{ + NodeTag type; + int8 kind; /* See DictMapExprElemType */ + List *dictname; /* Used in DICT_MAP_EXPR_OPERAND */ + struct DictMapExprElem *left; /* Used in DICT_MAP_EXPR_OPERATOR */ + struct DictMapExprElem *right; /* Used in DICT_MAP_EXPR_OPERATOR */ + int8 oper; /* Used in DICT_MAP_EXPR_OPERATOR */ + int8 options; /* Can be used in the future */ +} DictMapExprElem; + +typedef struct DictMapElem +{ + NodeTag type; + DictMapExprElem *condition; + DictMapExprElem *command; + List *commandmaps; + List *dictnames; +} DictMapElem; + typedef struct AlterTSConfigurationStmt { NodeTag type; @@ -3381,6 +3408,7 @@ typedef struct AlterTSConfigurationStmt */ List *tokentype; /* list of Value strings */ List *dicts; /* list of list of Value strings */ + List *dict_map; bool override; /* if true - remove old variant */ bool replace; /* if true - replace dictionary by another */ bool missing_ok; /* for DROP - skip error if missing? */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index f50e45e..5100aac 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -240,6 +240,7 @@ PG_KEYWORD("location", LOCATION, UNRESERVED_KEYWORD) PG_KEYWORD("lock", LOCK_P, UNRESERVED_KEYWORD) PG_KEYWORD("locked", LOCKED, UNRESERVED_KEYWORD) PG_KEYWORD("logged", LOGGED, UNRESERVED_KEYWORD) +PG_KEYWORD("map", MAP, UNRESERVED_KEYWORD) PG_KEYWORD("mapping", MAPPING, UNRESERVED_KEYWORD) PG_KEYWORD("match", MATCH, UNRESERVED_KEYWORD) PG_KEYWORD("materialized", MATERIALIZED, UNRESERVED_KEYWORD) @@ -376,6 +377,7 @@ PG_KEYWORD("statement", STATEMENT, UNRESERVED_KEYWORD) PG_KEYWORD("statistics", STATISTICS, UNRESERVED_KEYWORD) PG_KEYWORD("stdin", STDIN, UNRESERVED_KEYWORD) PG_KEYWORD("stdout", STDOUT, UNRESERVED_KEYWORD) +PG_KEYWORD("stopword", STOPWORD, UNRESERVED_KEYWORD) PG_KEYWORD("storage", STORAGE, UNRESERVED_KEYWORD) PG_KEYWORD("strict", STRICT_P, UNRESERVED_KEYWORD) PG_KEYWORD("strip", STRIP_P, UNRESERVED_KEYWORD) diff --git a/src/include/tsearch/ts_cache.h b/src/include/tsearch/ts_cache.h index abff0fd..bfde460 100644 --- a/src/include/tsearch/ts_cache.h +++ b/src/include/tsearch/ts_cache.h @@ -14,6 +14,7 @@ #define TS_CACHE_H #include "utils/guc.h" +#include "catalog/pg_ts_config_map.h" /* @@ -66,6 +67,7 @@ typedef struct { int len; Oid *dictIds; + int32 *dictOptions; } ListDictionary; typedef struct @@ -77,7 +79,7 @@ typedef struct Oid prsId; int lenmap; - ListDictionary *map; + TSMapRuleList **map; } TSConfigCacheEntry; diff --git a/src/include/tsearch/ts_configmap.h b/src/include/tsearch/ts_configmap.h new file mode 100644 index 0000000..73b87de --- /dev/null +++ b/src/include/tsearch/ts_configmap.h @@ -0,0 +1,48 @@ +/*------------------------------------------------------------------------- + * + * ts_configmap.h + * internal represtation of text search configuration and utilities for it + * + * Copyright (c) 1998-2017, PostgreSQL Global Development Group + * + * src/include/tsearch/ts_utils.h + * + *------------------------------------------------------------------------- + */ +#ifndef _PG_TS_CONFIGMAP_H_ +#define _PG_TS_CONFIGMAP_H_ + +#include "utils/jsonb.h" +#include "catalog/pg_ts_config_map.h" + +/* + * Configuration storage functions + * Provide interface to convert ts_configuration into JSONB and vice versa + */ + +/* Convert TSMapRuleList structure into JSONB */ +extern Jsonb *TSMapToJsonb(TSMapRuleList *rules); + +/* Extract TSMapRuleList from JSONB formated data */ +extern TSMapRuleList * JsonbToTSMap(Jsonb *json); +/* Replace all occurances of oldDict by newDict */ +extern void TSMapReplaceDictionary(TSMapRuleList *rules, Oid oldDict, Oid newDict); + +/* Return list of all dictionries in rule list in order they are defined in the lsit as array of Oids */ +extern Oid *TSMapGetDictionariesList(TSMapRuleList *rules); + +/* Return list of all dictionries in rule list in order they are defined in the list as ListDictionary structure */ +extern ListDictionary *TSMapGetListDictionary(TSMapRuleList *rules); + +/* Move rule list into specified memory context */ +extern TSMapRuleList * TSMapMoveToMemoryContext(TSMapRuleList *rules, MemoryContext context); +/* Free all nodes of the rule list */ +extern void TSMapFree(TSMapRuleList *rules); + +/* Print rule in human-readable format */ +extern void TSMapPrintRule(TSMapRule *rule, StringInfo result, int depth); + +/* Print rule list in human-readable format */ +extern void TSMapPrintRuleList(TSMapRuleList *rules, StringInfo result, int depth); + +#endif /* _PG_TS_CONFIGMAP_H_ */ diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 94ba7fc..e933d7b 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -14,6 +14,7 @@ #define _PG_TS_PUBLIC_H_ #include "tsearch/ts_type.h" +#include "catalog/pg_ts_config_map.h" /* * Parser's framework @@ -115,6 +116,7 @@ typedef struct #define TSL_ADDPOS 0x01 #define TSL_PREFIX 0x02 #define TSL_FILTER 0x04 +#define TSL_MULTI 0x08 /* * Struct for supporting complex dictionaries like thesaurus. diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out index 234b44f..40029f3 100644 --- a/src/test/regress/expected/oidjoins.out +++ b/src/test/regress/expected/oidjoins.out @@ -1081,14 +1081,6 @@ WHERE mapcfg != 0 AND ------+-------- (0 rows) -SELECT ctid, mapdict -FROM pg_catalog.pg_ts_config_map fk -WHERE mapdict != 0 AND - NOT EXISTS(SELECT 1 FROM pg_catalog.pg_ts_dict pk WHERE pk.oid = fk.mapdict); - ctid | mapdict -------+--------- -(0 rows) - SELECT ctid, dictnamespace FROM pg_catalog.pg_ts_dict fk WHERE dictnamespace != 0 AND diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index 0744ef8..760673c 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -420,6 +420,145 @@ SELECT ts_lexize('thesaurus', 'one'); {1} (1 row) +-- test dictionary pipeline in configuration +CREATE TEXT SEARCH CONFIGURATION english_multi( + COPY=english +); +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN english_stem OR simple THEN english_stem UNION simple END; +SELECT to_tsvector('english_multi', 'book'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'books'); + to_tsvector +-------------------- + 'book':1 'books':1 +(1 row) + +SELECT to_tsvector('english_multi', 'booking'); + to_tsvector +---------------------- + 'book':1 'booking':1 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN english_stem OR simple THEN english_stem INTERSECT simple END; +SELECT to_tsvector('english_multi', 'book'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'books'); + to_tsvector +------------- + +(1 row) + +SELECT to_tsvector('english_multi', 'booking'); + to_tsvector +------------- + +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN english_stem OR simple THEN simple EXCEPT english_stem END; +SELECT to_tsvector('english_multi', 'book'); + to_tsvector +------------- + +(1 row) + +SELECT to_tsvector('english_multi', 'books'); + to_tsvector +------------- + 'books':1 +(1 row) + +SELECT to_tsvector('english_multi', 'booking'); + to_tsvector +------------- + 'booking':1 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH ispell; +SELECT to_tsvector('english_multi', 'book'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'books'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'booking'); + to_tsvector +---------------------- + 'book':1 'booking':1 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN ispell THEN ispell + ELSE english_stem +END; +SELECT to_tsvector('english_multi', 'book'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'books'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'booking'); + to_tsvector +---------------------- + 'book':1 'booking':1 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN hunspell THEN english_stem MAP BY hunspell + ELSE english_stem +END; +SELECT to_tsvector('english_multi', 'book'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'books'); + to_tsvector +------------- + 'book':1 +(1 row) + +SELECT to_tsvector('english_multi', 'booking'); + to_tsvector +------------- + 'book':1 +(1 row) + -- Test ispell dictionary in configuration CREATE TEXT SEARCH CONFIGURATION ispell_tst ( COPY=english @@ -580,3 +719,74 @@ SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a 'card':3,10 'invit':2,9 'like':6 'look':5 'order':1,8 (1 row) +CREATE TEXT SEARCH CONFIGURATION english_multi2( + COPY=english_multi +); +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN english_stem OR simple THEN english_stem UNION simple +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + to_tsvector +-------------------------------------------------------------------------------------- + '1987a':6 'mysteri':2 'mysterious':2 'of':4 'ring':3 'rings':3 'supernova':5 'the':1 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN thesaurus ELSE english_stem +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + to_tsvector +--------------------------------------- + '1987a':6 'mysteri':2 'ring':3 'sn':5 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus IS NOT NULL OR english_stem IS NOT NULL THEN thesaurus UNION english_stem +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + to_tsvector +----------------------------------------------------- + '1987a':6 'mysteri':2 'ring':3 'sn':5 'supernova':5 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN simple UNION thesaurus +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + to_tsvector +-------------------------------- + '1987a':2 'sn':1 'supernova':1 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN simple UNION thesaurus + ELSE simple +END; +SELECT to_tsvector('english_multi2', 'one two'); + to_tsvector +------------------------ + '12':1 'one':1 'two':2 +(1 row) + +SELECT to_tsvector('english_multi2', 'one two three'); + to_tsvector +----------------------------------- + '123':1 'one':1 'three':3 'two':2 +(1 row) + +SELECT to_tsvector('english_multi2', 'one two four'); + to_tsvector +--------------------------------- + '12':1 'four':3 'one':1 'two':2 +(1 row) + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN thesaurus UNION simple + ELSE english_stem UNION simple +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + to_tsvector +--------------------------------------------------------------------------------------------- + '1987a':6 'mysteri':2 'mysterious':2 'of':4 'ring':3 'rings':3 'sn':5 'supernova':5 'the':1 +(1 row) + diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index d63fb12..5b6fe73 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -36,11 +36,11 @@ WHERE cfgnamespace = 0 OR cfgowner = 0 OR cfgparser = 0; -----+--------- (0 rows) -SELECT mapcfg, maptokentype, mapseqno +SELECT mapcfg, maptokentype FROM pg_ts_config_map -WHERE mapcfg = 0 OR mapdict = 0; - mapcfg | maptokentype | mapseqno ---------+--------------+---------- +WHERE mapcfg = 0; + mapcfg | maptokentype +--------+-------------- (0 rows) -- Look for pg_ts_config_map entries that aren't one of parser's token types @@ -51,8 +51,8 @@ RIGHT JOIN pg_ts_config_map AS m ON (tt.cfgid=m.mapcfg AND tt.tokid=m.maptokentype) WHERE tt.cfgid IS NULL OR tt.tokid IS NULL; - cfgid | tokid | mapcfg | maptokentype | mapseqno | mapdict --------+-------+--------+--------------+----------+--------- + cfgid | tokid | mapcfg | maptokentype | mapdicts +-------+-------+--------+--------------+---------- (0 rows) -- test basic text search behavior without indexes, then with @@ -567,66 +567,65 @@ SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://ae -- ts_debug SELECT * from ts_debug('english', 'abc&nm1;def©ghiõjkl'); - alias | description | token | dictionaries | dictionary | lexemes ------------+-----------------+----------------------------+----------------+--------------+--------- - tag | XML tag | | {} | | - asciiword | Word, all ASCII | abc | {english_stem} | english_stem | {abc} - entity | XML entity | &nm1; | {} | | - asciiword | Word, all ASCII | def | {english_stem} | english_stem | {def} - entity | XML entity | © | {} | | - asciiword | Word, all ASCII | ghi | {english_stem} | english_stem | {ghi} - entity | XML entity | õ | {} | | - asciiword | Word, all ASCII | jkl | {english_stem} | english_stem | {jkl} - tag | XML tag | | {} | | + alias | description | token | dictionaries | command | lexemes +-----------+-----------------+----------------------------+--------------+--------------+--------- + tag | XML tag | | | | + asciiword | Word, all ASCII | abc | english_stem | english_stem | {abc} + entity | XML entity | &nm1; | | | + asciiword | Word, all ASCII | def | english_stem | english_stem | {def} + entity | XML entity | © | | | + asciiword | Word, all ASCII | ghi | english_stem | english_stem | {ghi} + entity | XML entity | õ | | | + asciiword | Word, all ASCII | jkl | english_stem | english_stem | {jkl} + tag | XML tag | | | | (9 rows) -- check parsing of URLs SELECT * from ts_debug('english', 'http://www.harewoodsolutions.co.uk/press.aspx'); - alias | description | token | dictionaries | dictionary | lexemes -----------+---------------+----------------------------------------+--------------+------------+------------------------------------------ - protocol | Protocol head | http:// | {} | | - url | URL | www.harewoodsolutions.co.uk/press.aspx | {simple} | simple | {www.harewoodsolutions.co.uk/press.aspx} - host | Host | www.harewoodsolutions.co.uk | {simple} | simple | {www.harewoodsolutions.co.uk} - url_path | URL path | /press.aspx | {simple} | simple | {/press.aspx} - tag | XML tag | | {} | | + alias | description | token | dictionaries | command | lexemes +----------+---------------+----------------------------------------+--------------+---------+------------------------------------------ + protocol | Protocol head | http:// | | | + url | URL | www.harewoodsolutions.co.uk/press.aspx | simple | simple | {www.harewoodsolutions.co.uk/press.aspx} + host | Host | www.harewoodsolutions.co.uk | simple | simple | {www.harewoodsolutions.co.uk} + url_path | URL path | /press.aspx | simple | simple | {/press.aspx} + tag | XML tag | | | | (5 rows) SELECT * from ts_debug('english', 'http://aew.wer0c.ewr/id?ad=qwe&dw'); - alias | description | token | dictionaries | dictionary | lexemes -----------+---------------+----------------------------+--------------+------------+------------------------------ - protocol | Protocol head | http:// | {} | | - url | URL | aew.wer0c.ewr/id?ad=qwe&dw | {simple} | simple | {aew.wer0c.ewr/id?ad=qwe&dw} - host | Host | aew.wer0c.ewr | {simple} | simple | {aew.wer0c.ewr} - url_path | URL path | /id?ad=qwe&dw | {simple} | simple | {/id?ad=qwe&dw} - tag | XML tag | | {} | | + alias | description | token | dictionaries | command | lexemes +----------+---------------+----------------------------+--------------+---------+------------------------------ + protocol | Protocol head | http:// | | | + url | URL | aew.wer0c.ewr/id?ad=qwe&dw | simple | simple | {aew.wer0c.ewr/id?ad=qwe&dw} + host | Host | aew.wer0c.ewr | simple | simple | {aew.wer0c.ewr} + url_path | URL path | /id?ad=qwe&dw | simple | simple | {/id?ad=qwe&dw} + tag | XML tag | | | | (5 rows) SELECT * from ts_debug('english', 'http://5aew.werc.ewr:8100/?'); - alias | description | token | dictionaries | dictionary | lexemes -----------+---------------+----------------------+--------------+------------+------------------------ - protocol | Protocol head | http:// | {} | | - url | URL | 5aew.werc.ewr:8100/? | {simple} | simple | {5aew.werc.ewr:8100/?} - host | Host | 5aew.werc.ewr:8100 | {simple} | simple | {5aew.werc.ewr:8100} - url_path | URL path | /? | {simple} | simple | {/?} + alias | description | token | dictionaries | command | lexemes +----------+---------------+----------------------+--------------+---------+------------------------ + protocol | Protocol head | http:// | | | + url | URL | 5aew.werc.ewr:8100/? | simple | simple | {5aew.werc.ewr:8100/?} + host | Host | 5aew.werc.ewr:8100 | simple | simple | {5aew.werc.ewr:8100} + url_path | URL path | /? | simple | simple | {/?} (4 rows) SELECT * from ts_debug('english', '5aew.werc.ewr:8100/?xx'); - alias | description | token | dictionaries | dictionary | lexemes -----------+-------------+------------------------+--------------+------------+-------------------------- - url | URL | 5aew.werc.ewr:8100/?xx | {simple} | simple | {5aew.werc.ewr:8100/?xx} - host | Host | 5aew.werc.ewr:8100 | {simple} | simple | {5aew.werc.ewr:8100} - url_path | URL path | /?xx | {simple} | simple | {/?xx} + alias | description | token | dictionaries | command | lexemes +----------+-------------+------------------------+--------------+---------+-------------------------- + url | URL | 5aew.werc.ewr:8100/?xx | simple | simple | {5aew.werc.ewr:8100/?xx} + host | Host | 5aew.werc.ewr:8100 | simple | simple | {5aew.werc.ewr:8100} + url_path | URL path | /?xx | simple | simple | {/?xx} (3 rows) SELECT token, alias, - dictionaries, dictionaries is null as dnull, array_dims(dictionaries) as ddims, - lexemes, lexemes is null as lnull, array_dims(lexemes) as ldims + dictionaries, lexemes, lexemes is null as lnull, array_dims(lexemes) as ldims from ts_debug('english', 'a title'); - token | alias | dictionaries | dnull | ddims | lexemes | lnull | ldims --------+-----------+----------------+-------+-------+---------+-------+------- - a | asciiword | {english_stem} | f | [1:1] | {} | f | - | blank | {} | f | | | t | - title | asciiword | {english_stem} | f | [1:1] | {titl} | f | [1:1] + token | alias | dictionaries | lexemes | lnull | ldims +-------+-----------+--------------+---------+-------+------- + a | asciiword | english_stem | {} | f | + | blank | | | t | + title | asciiword | english_stem | {titl} | f | [1:1] (3 rows) -- to_tsquery diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql index fcf9990..320e220 100644 --- a/src/test/regress/sql/oidjoins.sql +++ b/src/test/regress/sql/oidjoins.sql @@ -541,10 +541,6 @@ SELECT ctid, mapcfg FROM pg_catalog.pg_ts_config_map fk WHERE mapcfg != 0 AND NOT EXISTS(SELECT 1 FROM pg_catalog.pg_ts_config pk WHERE pk.oid = fk.mapcfg); -SELECT ctid, mapdict -FROM pg_catalog.pg_ts_config_map fk -WHERE mapdict != 0 AND - NOT EXISTS(SELECT 1 FROM pg_catalog.pg_ts_dict pk WHERE pk.oid = fk.mapdict); SELECT ctid, dictnamespace FROM pg_catalog.pg_ts_dict fk WHERE dictnamespace != 0 AND diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql index a5a569e..337302b 100644 --- a/src/test/regress/sql/tsdicts.sql +++ b/src/test/regress/sql/tsdicts.sql @@ -117,6 +117,68 @@ CREATE TEXT SEARCH DICTIONARY thesaurus ( SELECT ts_lexize('thesaurus', 'one'); +-- test dictionary pipeline in configuration +CREATE TEXT SEARCH CONFIGURATION english_multi( + COPY=english +); + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN english_stem OR simple THEN english_stem UNION simple END; + +SELECT to_tsvector('english_multi', 'book'); +SELECT to_tsvector('english_multi', 'books'); +SELECT to_tsvector('english_multi', 'booking'); + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN english_stem OR simple THEN english_stem INTERSECT simple END; + +SELECT to_tsvector('english_multi', 'book'); +SELECT to_tsvector('english_multi', 'books'); +SELECT to_tsvector('english_multi', 'booking'); + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN english_stem OR simple THEN simple EXCEPT english_stem END; + +SELECT to_tsvector('english_multi', 'book'); +SELECT to_tsvector('english_multi', 'books'); +SELECT to_tsvector('english_multi', 'booking'); + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH ispell; + +SELECT to_tsvector('english_multi', 'book'); +SELECT to_tsvector('english_multi', 'books'); +SELECT to_tsvector('english_multi', 'booking'); + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN ispell THEN ispell + ELSE english_stem +END; + +SELECT to_tsvector('english_multi', 'book'); +SELECT to_tsvector('english_multi', 'books'); +SELECT to_tsvector('english_multi', 'booking'); + +ALTER TEXT SEARCH CONFIGURATION english_multi ALTER MAPPING FOR + asciiword + WITH CASE + WHEN hunspell THEN english_stem MAP BY hunspell + ELSE english_stem +END; + +SELECT to_tsvector('english_multi', 'book'); +SELECT to_tsvector('english_multi', 'books'); +SELECT to_tsvector('english_multi', 'booking'); + -- Test ispell dictionary in configuration CREATE TEXT SEARCH CONFIGURATION ispell_tst ( COPY=english @@ -188,3 +250,41 @@ ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbreviation SN)'); SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); +CREATE TEXT SEARCH CONFIGURATION english_multi2( + COPY=english_multi +); + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN english_stem OR simple THEN english_stem UNION simple +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN thesaurus ELSE english_stem +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus IS NOT NULL OR english_stem IS NOT NULL THEN thesaurus UNION english_stem +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN simple UNION thesaurus +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN simple UNION thesaurus + ELSE simple +END; +SELECT to_tsvector('english_multi2', 'one two'); +SELECT to_tsvector('english_multi2', 'one two three'); +SELECT to_tsvector('english_multi2', 'one two four'); + +ALTER TEXT SEARCH CONFIGURATION english_multi2 ALTER MAPPING FOR asciiword WITH CASE + WHEN thesaurus THEN thesaurus UNION simple + ELSE english_stem UNION simple +END; +SELECT to_tsvector('english_multi2', 'The Mysterious Rings of Supernova 1987A'); + diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 1c8520b..8ef3d71 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -26,9 +26,9 @@ SELECT oid, cfgname FROM pg_ts_config WHERE cfgnamespace = 0 OR cfgowner = 0 OR cfgparser = 0; -SELECT mapcfg, maptokentype, mapseqno +SELECT mapcfg, maptokentype FROM pg_ts_config_map -WHERE mapcfg = 0 OR mapdict = 0; +WHERE mapcfg = 0; -- Look for pg_ts_config_map entries that aren't one of parser's token types SELECT * FROM @@ -146,8 +146,7 @@ SELECT * from ts_debug('english', 'http://aew.wer0c.ewr/id?ad=qwe&dw'); SELECT * from ts_debug('english', 'http://5aew.werc.ewr:8100/?'); SELECT * from ts_debug('english', '5aew.werc.ewr:8100/?xx'); SELECT token, alias, - dictionaries, dictionaries is null as dnull, array_dims(dictionaries) as ddims, - lexemes, lexemes is null as lnull, array_dims(lexemes) as ldims + dictionaries, lexemes, lexemes is null as lnull, array_dims(lexemes) as ldims from ts_debug('english', 'a title'); -- to_tsquery