diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml new file mode 100644 index e87210d..0990eb4 *** a/doc/src/sgml/func.sgml --- b/doc/src/sgml/func.sgml *************** CREATE TYPE rainbow AS ENUM ('red', 'ora *** 9080,9085 **** --- 9080,9135 ---- + delete + + delete(tsvector, text) + + tsvector + remove entry from tsvector + delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat') + 'cat':3 'rat':5A + + + + + unnest + + unnest(tsvector) + + setof anyelement + expand a tsvector to a set of rows. Each row has following columns: lexeme, postings, weights. + unnest('fat:2,4 cat:3 rat:5A'::tsvector) + cat {3} {0} + fat {2,4} {0,0} + rat {5} {3}(3 rows) + + + + + to_array + + to_array(tsvector) + + text[] + convert tsvector to array of lexemes + to_array('fat:2,4 cat:3 rat:5A'::tsvector) + {cat,fat,rat} + + + + + array_to_tsvector + + to_tsvector(text[]) + + tsvector + convert array of lexemes to tsvector + to_tsvector('{fat,cat,rat}'::text[]) + 'fat' 'cat' 'rat' + + + + to_tsquery to_tsquery( config regconfig , query text) diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c new file mode 100644 index e822ba8..cc998c2 *** a/src/backend/utils/adt/tsvector_op.c --- b/src/backend/utils/adt/tsvector_op.c *************** add_pos(TSVector src, WordEntry *srcptr, *** 291,296 **** --- 291,515 ---- return *clen - startlen; } + Datum + tsvector_delete(PG_FUNCTION_ARGS) + { + TSVector tsin = PG_GETARG_TSVECTOR(0), + tsout; + WordEntry *arrin = ARRPTR(tsin), + *arrout; + char *lexin = TextDatumGetCString(PG_GETARG_DATUM(1)), + *data, + *cur; + int i, + j, + lexin_len = strlen(lexin), + shrink_len, + skip_index = -1, + curoff = 0, + len = 0; + + data = STRPTR(tsin); + for (i = 0; i < tsin->size; i++) + { + if ( (arrin[i].len == lexin_len) && !strncmp(lexin, data + arrin[i].pos, arrin[i].len)) + break; + } + + /* nothing to delete */ + if (i == tsin->size) + PG_RETURN_POINTER(tsin); + + /* otherwise we can skip i-th lexeme */ + skip_index = i; + shrink_len = POSDATALEN(tsin, arrin+i) * sizeof(WordEntryPos) + + sizeof(WordEntry) + sizeof(uint16); + + tsout = (TSVector) palloc0(VARSIZE(tsin) - shrink_len); + SET_VARSIZE(tsout, VARSIZE(tsin) - shrink_len); + tsout->size = tsin->size - 1; + arrout = ARRPTR(tsout); + + cur = STRPTR(tsout); + + for (i = 0, j = 0; i < tsin->size; i++) + { + if (i == skip_index) + continue; + + memcpy(cur + curoff, data + arrin[i].pos, arrin[i].len); + arrout[j].haspos = arrin[i].haspos; + arrout[j].len = arrin[i].len; + arrout[j].pos = curoff; + + curoff += arrin[i].len; + + if (arrin[i].haspos) + { + curoff = SHORTALIGN(curoff); + len = POSDATALEN(tsin, arrin+i) * sizeof(WordEntryPos) + sizeof(uint16); + memcpy(cur + curoff, (STRPTR(tsin) + SHORTALIGN(arrin[i].pos + arrin[i].len)), len); + curoff += len; + } + + j++; + } + + PG_FREE_IF_COPY(tsin, 0); + PG_RETURN_POINTER(tsout); + } + + Datum + tsvector_unnest(PG_FUNCTION_ARGS) + { + FuncCallContext *funcctx; + TSVector tsin = PG_GETARG_TSVECTOR(0); + WordEntry *arrin = ARRPTR(tsin); + char *data; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + tupdesc = CreateTemplateTupleDesc(3, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "postings", + INT4ARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights", + INT2ARRAYOID, -1, 0); + + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + + data = STRPTR(tsin); + if (funcctx->call_cntr < tsin->size) + { + char *values[3], + *buf; + HeapTuple tuple; + int i = funcctx->call_cntr, + j; + WordEntryPosVector *posv; + StringInfo postings = makeStringInfo(); + StringInfo weights = makeStringInfo(); + + buf = palloc(sizeof(char) * (arrin[funcctx->call_cntr].len + 1) ); + memcpy(buf, data + arrin[funcctx->call_cntr].pos, arrin[funcctx->call_cntr].len); + buf[arrin[funcctx->call_cntr].len] = '\0'; + values[0] = buf; + + if (arrin[i].haspos) + { + posv = (WordEntryPosVector *)(STRPTR(tsin) + SHORTALIGN(arrin[i].len + arrin[i].pos)); + + appendStringInfo(postings, "{"); + appendStringInfo(weights, "{"); + for (j = 0; j < posv->npos; j++) + { + appendStringInfo(postings, "%d", WEP_GETPOS(posv->pos[j])); + appendStringInfo(weights, "%d", WEP_GETWEIGHT(posv->pos[j])); + if (j != posv->npos-1){ + appendStringInfoChar(postings, ','); + appendStringInfoChar(weights, ','); + } + } + appendStringInfo(postings, "}"); + appendStringInfo(weights, "}"); + + values[1] = postings->data; + values[2] = weights->data; + } else { + *values[1] = '\0'; + *values[2] = '\0'; + } + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + + PG_FREE_IF_COPY(tsin, 0); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + SRF_RETURN_DONE(funcctx); + } + + Datum + tsvector_to_array(PG_FUNCTION_ARGS) + { + TSVector tsin = PG_GETARG_TSVECTOR(0); + WordEntry *arrin = ARRPTR(tsin); + Datum elements[tsin->size]; + int i; + ArrayType *array; + + for (i = 0; i < tsin->size; i++) + elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, + arrin[i].len)); + array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i'); + PG_FREE_IF_COPY(tsin, 0); + PG_RETURN_POINTER(array); + } + + Datum + array_to_tsvector(PG_FUNCTION_ARGS) + { + ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + Oid element_type = ARR_ELEMTYPE(v); + TSVector tsout; + Datum *dlexemes; + WordEntry *arrout; + bool *nulls; + int nitems, + i, + tslen, + *lexlens, + lexlen = 0; + char *cur, + **lexemes; + + if (element_type != TEXTOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("Only arrays of strings can be converted to tsvector"))); + + deconstruct_array(v, TEXTOID, -1, false, 'i', + &dlexemes, &nulls, &nitems); + + lexemes = (char **) palloc(nitems * sizeof(char *)); + lexlens = palloc(nitems * sizeof(int)); + for (i = 0; i < nitems; i++) + { + lexemes[i] = TextDatumGetCString(dlexemes[i]); + lexlens[i] = strlen(lexemes[i]); + lexlen += lexlens[i]; + } + + tslen = CALCDATASIZE(nitems, lexlen); + tsout = (TSVector) palloc0(tslen); + SET_VARSIZE(tsout, tslen); + tsout->size = nitems; + arrout = ARRPTR(tsout); + cur = STRPTR(tsout); + + for (i = 0; i < nitems; i++) + { + memcpy(cur, lexemes[i], lexlens[i]); + arrout[i].haspos = 0; + arrout[i].len = lexlens[i]; + arrout[i].pos = cur - STRPTR(tsout); + cur += lexlens[i]; + } + + PG_FREE_IF_COPY(v, 0); + PG_RETURN_POINTER(tsout); + } Datum tsvector_concat(PG_FUNCTION_ARGS) diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h new file mode 100644 index eb55b3a..f7fb490 *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** DATA(insert OID = 3624 ( setweight P *** 4574,4579 **** --- 4574,4588 ---- DESCR("set weight of lexeme's entries"); DATA(insert OID = 3625 ( tsvector_concat PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ )); + DATA(insert OID = 3315 ( delete PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 25" _null_ _null_ _null_ _null_ _null_ tsvector_delete _null_ _null_ _null_ )); + DESCR("delete lexeme"); + DATA(insert OID = 3316 ( unnest PGNSP PGUID 12 1 10 0 0 f f f f t t i s 1 0 2249 "3614" "{3614,25,1007,1005}" "{i,o,o,o}" "{tsvector,lexeme,postings,weights}" _null_ _null_ tsvector_unnest _null_ _null_ _null_ )); + DESCR("expand tsvector to set of rows"); + DATA(insert OID = 3317 ( to_array PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 1009 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_to_array _null_ _null_ _null_ )); + DESCR("convert to lexeme's array"); + DATA(insert OID = 3318 ( to_tsvector PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3614 "1009" _null_ _null_ _null_ _null_ _null_ array_to_tsvector _null_ _null_ _null_ )); + DESCR("build tsvector from lexeme's array"); + DATA(insert OID = 3634 ( ts_match_vq PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3614 3615" _null_ _null_ _null_ _null_ _null_ ts_match_vq _null_ _null_ _null_ )); DATA(insert OID = 3635 ( ts_match_qv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3614" _null_ _null_ _null_ _null_ _null_ ts_match_qv _null_ _null_ _null_ )); DATA(insert OID = 3760 ( ts_match_tt PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ ts_match_tt _null_ _null_ _null_ )); diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h new file mode 100644 index 281cdd6..81e7bbb *** a/src/include/tsearch/ts_type.h --- b/src/include/tsearch/ts_type.h *************** extern Datum tsvector_length(PG_FUNCTION *** 142,147 **** --- 142,152 ---- extern Datum tsvector_strip(PG_FUNCTION_ARGS); extern Datum tsvector_setweight(PG_FUNCTION_ARGS); extern Datum tsvector_concat(PG_FUNCTION_ARGS); + extern Datum tsvector_delete(PG_FUNCTION_ARGS); + extern Datum tsvector_unnest(PG_FUNCTION_ARGS); + extern Datum tsvector_to_array(PG_FUNCTION_ARGS); + extern Datum array_to_tsvector(PG_FUNCTION_ARGS); + extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS); extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out new file mode 100644 index 6284fb6..faa33e5 *** a/src/test/regress/expected/tstypes.out --- b/src/test/regress/expected/tstypes.out *************** SELECT ts_rank_cd(' a:1 s:2 d g'::tsvect *** 625,627 **** --- 625,687 ---- 0.1 (1 row) + SELECT delete(to_tsvector('Rebel spaceships, striking from a hidden base'), 'spaceship'); + delete + ------------------------------------------ + 'base':7 'hidden':6 'rebel':1 'strike':3 + (1 row) + + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base'); + delete + -------------------------------------------------------------- + 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3 + (1 row) + + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship'); + delete + ------------------------------------------ + 'base':7 'hidden':6 'rebel':1 'strike':3 + (1 row) + + SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + unnest + --------------------------------------------- + (base,{7},{0}) + (hidden,{6},{0}) + (rebel,{1},{0}) + (spaceship,"{2,33,34,35,36}","{0,3,2,1,0}") + (strike,{3},{0}) + (5 rows) + + SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + lexeme | postings | weights + -----------+-----------------+------------- + base | {7} | {0} + hidden | {6} | {0} + rebel | {1} | {0} + spaceship | {2,33,34,35,36} | {0,3,2,1,0} + strike | {3} | {0} + (5 rows) + + SELECT lexeme, postings[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + lexeme | postings + -----------+---------- + base | 7 + hidden | 6 + rebel | 1 + spaceship | 2 + strike | 3 + (5 rows) + + SELECT to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + to_array + -------------------------------------- + {base,hidden,rebel,spaceship,strike} + (1 row) + + SELECT to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']); + to_tsvector + ---------------------------------------------- + 'base' 'hidden' 'rebel' 'spaceship' 'strike' + (1 row) + diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql new file mode 100644 index fd7c702..51baf30 *** a/src/test/regress/sql/tstypes.sql --- b/src/test/regress/sql/tstypes.sql *************** SELECT ts_rank_cd(' a:1 s:2 d g'::tsvect *** 115,117 **** --- 115,126 ---- SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s'); + + SELECT delete(to_tsvector('Rebel spaceships, striking from a hidden base'), 'spaceship'); + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base'); + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship'); + SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT lexeme, postings[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);