diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
new file mode 100644
index e87210d..0990eb4
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
*************** CREATE TYPE rainbow AS ENUM ('red', 'ora
*** 9080,9085 ****
--- 9080,9135 ----
+ delete
+
+ delete(tsvector>, text>)
+
+ tsvector
+ remove entry from tsvector>
+ delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat')
+ 'cat':3 'rat':5A
+
+
+
+
+ unnest
+
+ unnest(tsvector>)
+
+ setof anyelement
+ expand a tsvector to a set of rows. Each row has following columns: lexeme, postings, weights.
+ unnest('fat:2,4 cat:3 rat:5A'::tsvector)
+ cat {3} {0}
+ fat {2,4} {0,0}
+ rat {5} {3}(3 rows)
+
+
+
+
+ to_array
+
+ to_array(tsvector>)
+
+ text[]
+ convert tsvector> to array of lexemes
+ to_array('fat:2,4 cat:3 rat:5A'::tsvector)
+ {cat,fat,rat}
+
+
+
+
+ array_to_tsvector
+
+ to_tsvector(text[]>)
+
+ tsvector
+ convert array of lexemes to tsvector
+ to_tsvector('{fat,cat,rat}'::text[])
+ 'fat' 'cat' 'rat'
+
+
+
+
to_tsquery
to_tsquery( config> regconfig> , query> text)
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
new file mode 100644
index e822ba8..cc998c2
*** a/src/backend/utils/adt/tsvector_op.c
--- b/src/backend/utils/adt/tsvector_op.c
*************** add_pos(TSVector src, WordEntry *srcptr,
*** 291,296 ****
--- 291,515 ----
return *clen - startlen;
}
+ Datum
+ tsvector_delete(PG_FUNCTION_ARGS)
+ {
+ TSVector tsin = PG_GETARG_TSVECTOR(0),
+ tsout;
+ WordEntry *arrin = ARRPTR(tsin),
+ *arrout;
+ char *lexin = TextDatumGetCString(PG_GETARG_DATUM(1)),
+ *data,
+ *cur;
+ int i,
+ j,
+ lexin_len = strlen(lexin),
+ shrink_len,
+ skip_index = -1,
+ curoff = 0,
+ len = 0;
+
+ data = STRPTR(tsin);
+ for (i = 0; i < tsin->size; i++)
+ {
+ if ( (arrin[i].len == lexin_len) && !strncmp(lexin, data + arrin[i].pos, arrin[i].len))
+ break;
+ }
+
+ /* nothing to delete */
+ if (i == tsin->size)
+ PG_RETURN_POINTER(tsin);
+
+ /* otherwise we can skip i-th lexeme */
+ skip_index = i;
+ shrink_len = POSDATALEN(tsin, arrin+i) * sizeof(WordEntryPos)
+ + sizeof(WordEntry) + sizeof(uint16);
+
+ tsout = (TSVector) palloc0(VARSIZE(tsin) - shrink_len);
+ SET_VARSIZE(tsout, VARSIZE(tsin) - shrink_len);
+ tsout->size = tsin->size - 1;
+ arrout = ARRPTR(tsout);
+
+ cur = STRPTR(tsout);
+
+ for (i = 0, j = 0; i < tsin->size; i++)
+ {
+ if (i == skip_index)
+ continue;
+
+ memcpy(cur + curoff, data + arrin[i].pos, arrin[i].len);
+ arrout[j].haspos = arrin[i].haspos;
+ arrout[j].len = arrin[i].len;
+ arrout[j].pos = curoff;
+
+ curoff += arrin[i].len;
+
+ if (arrin[i].haspos)
+ {
+ curoff = SHORTALIGN(curoff);
+ len = POSDATALEN(tsin, arrin+i) * sizeof(WordEntryPos) + sizeof(uint16);
+ memcpy(cur + curoff, (STRPTR(tsin) + SHORTALIGN(arrin[i].pos + arrin[i].len)), len);
+ curoff += len;
+ }
+
+ j++;
+ }
+
+ PG_FREE_IF_COPY(tsin, 0);
+ PG_RETURN_POINTER(tsout);
+ }
+
+ Datum
+ tsvector_unnest(PG_FUNCTION_ARGS)
+ {
+ FuncCallContext *funcctx;
+ TSVector tsin = PG_GETARG_TSVECTOR(0);
+ WordEntry *arrin = ARRPTR(tsin);
+ char *data;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ tupdesc = CreateTemplateTupleDesc(3, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "postings",
+ INT4ARRAYOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
+ INT2ARRAYOID, -1, 0);
+
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ data = STRPTR(tsin);
+ if (funcctx->call_cntr < tsin->size)
+ {
+ char *values[3],
+ *buf;
+ HeapTuple tuple;
+ int i = funcctx->call_cntr,
+ j;
+ WordEntryPosVector *posv;
+ StringInfo postings = makeStringInfo();
+ StringInfo weights = makeStringInfo();
+
+ buf = palloc(sizeof(char) * (arrin[funcctx->call_cntr].len + 1) );
+ memcpy(buf, data + arrin[funcctx->call_cntr].pos, arrin[funcctx->call_cntr].len);
+ buf[arrin[funcctx->call_cntr].len] = '\0';
+ values[0] = buf;
+
+ if (arrin[i].haspos)
+ {
+ posv = (WordEntryPosVector *)(STRPTR(tsin) + SHORTALIGN(arrin[i].len + arrin[i].pos));
+
+ appendStringInfo(postings, "{");
+ appendStringInfo(weights, "{");
+ for (j = 0; j < posv->npos; j++)
+ {
+ appendStringInfo(postings, "%d", WEP_GETPOS(posv->pos[j]));
+ appendStringInfo(weights, "%d", WEP_GETWEIGHT(posv->pos[j]));
+ if (j != posv->npos-1){
+ appendStringInfoChar(postings, ',');
+ appendStringInfoChar(weights, ',');
+ }
+ }
+ appendStringInfo(postings, "}");
+ appendStringInfo(weights, "}");
+
+ values[1] = postings->data;
+ values[2] = weights->data;
+ } else {
+ *values[1] = '\0';
+ *values[2] = '\0';
+ }
+
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+
+ PG_FREE_IF_COPY(tsin, 0);
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ SRF_RETURN_DONE(funcctx);
+ }
+
+ Datum
+ tsvector_to_array(PG_FUNCTION_ARGS)
+ {
+ TSVector tsin = PG_GETARG_TSVECTOR(0);
+ WordEntry *arrin = ARRPTR(tsin);
+ Datum elements[tsin->size];
+ int i;
+ ArrayType *array;
+
+ for (i = 0; i < tsin->size; i++)
+ elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
+ arrin[i].len));
+ array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
+ PG_FREE_IF_COPY(tsin, 0);
+ PG_RETURN_POINTER(array);
+ }
+
+ Datum
+ array_to_tsvector(PG_FUNCTION_ARGS)
+ {
+ ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
+ Oid element_type = ARR_ELEMTYPE(v);
+ TSVector tsout;
+ Datum *dlexemes;
+ WordEntry *arrout;
+ bool *nulls;
+ int nitems,
+ i,
+ tslen,
+ *lexlens,
+ lexlen = 0;
+ char *cur,
+ **lexemes;
+
+ if (element_type != TEXTOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("Only arrays of strings can be converted to tsvector")));
+
+ deconstruct_array(v, TEXTOID, -1, false, 'i',
+ &dlexemes, &nulls, &nitems);
+
+ lexemes = (char **) palloc(nitems * sizeof(char *));
+ lexlens = palloc(nitems * sizeof(int));
+ for (i = 0; i < nitems; i++)
+ {
+ lexemes[i] = TextDatumGetCString(dlexemes[i]);
+ lexlens[i] = strlen(lexemes[i]);
+ lexlen += lexlens[i];
+ }
+
+ tslen = CALCDATASIZE(nitems, lexlen);
+ tsout = (TSVector) palloc0(tslen);
+ SET_VARSIZE(tsout, tslen);
+ tsout->size = nitems;
+ arrout = ARRPTR(tsout);
+ cur = STRPTR(tsout);
+
+ for (i = 0; i < nitems; i++)
+ {
+ memcpy(cur, lexemes[i], lexlens[i]);
+ arrout[i].haspos = 0;
+ arrout[i].len = lexlens[i];
+ arrout[i].pos = cur - STRPTR(tsout);
+ cur += lexlens[i];
+ }
+
+ PG_FREE_IF_COPY(v, 0);
+ PG_RETURN_POINTER(tsout);
+ }
Datum
tsvector_concat(PG_FUNCTION_ARGS)
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
new file mode 100644
index eb55b3a..f7fb490
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
*************** DATA(insert OID = 3624 ( setweight P
*** 4574,4579 ****
--- 4574,4588 ----
DESCR("set weight of lexeme's entries");
DATA(insert OID = 3625 ( tsvector_concat PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ ));
+ DATA(insert OID = 3315 ( delete PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 25" _null_ _null_ _null_ _null_ _null_ tsvector_delete _null_ _null_ _null_ ));
+ DESCR("delete lexeme");
+ DATA(insert OID = 3316 ( unnest PGNSP PGUID 12 1 10 0 0 f f f f t t i s 1 0 2249 "3614" "{3614,25,1007,1005}" "{i,o,o,o}" "{tsvector,lexeme,postings,weights}" _null_ _null_ tsvector_unnest _null_ _null_ _null_ ));
+ DESCR("expand tsvector to set of rows");
+ DATA(insert OID = 3317 ( to_array PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 1009 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_to_array _null_ _null_ _null_ ));
+ DESCR("convert to lexeme's array");
+ DATA(insert OID = 3318 ( to_tsvector PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3614 "1009" _null_ _null_ _null_ _null_ _null_ array_to_tsvector _null_ _null_ _null_ ));
+ DESCR("build tsvector from lexeme's array");
+
DATA(insert OID = 3634 ( ts_match_vq PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3614 3615" _null_ _null_ _null_ _null_ _null_ ts_match_vq _null_ _null_ _null_ ));
DATA(insert OID = 3635 ( ts_match_qv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3614" _null_ _null_ _null_ _null_ _null_ ts_match_qv _null_ _null_ _null_ ));
DATA(insert OID = 3760 ( ts_match_tt PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ ts_match_tt _null_ _null_ _null_ ));
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
new file mode 100644
index 281cdd6..81e7bbb
*** a/src/include/tsearch/ts_type.h
--- b/src/include/tsearch/ts_type.h
*************** extern Datum tsvector_length(PG_FUNCTION
*** 142,147 ****
--- 142,152 ----
extern Datum tsvector_strip(PG_FUNCTION_ARGS);
extern Datum tsvector_setweight(PG_FUNCTION_ARGS);
extern Datum tsvector_concat(PG_FUNCTION_ARGS);
+ extern Datum tsvector_delete(PG_FUNCTION_ARGS);
+ extern Datum tsvector_unnest(PG_FUNCTION_ARGS);
+ extern Datum tsvector_to_array(PG_FUNCTION_ARGS);
+ extern Datum array_to_tsvector(PG_FUNCTION_ARGS);
+
extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS);
extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
new file mode 100644
index 6284fb6..faa33e5
*** a/src/test/regress/expected/tstypes.out
--- b/src/test/regress/expected/tstypes.out
*************** SELECT ts_rank_cd(' a:1 s:2 d g'::tsvect
*** 625,627 ****
--- 625,687 ----
0.1
(1 row)
+ SELECT delete(to_tsvector('Rebel spaceships, striking from a hidden base'), 'spaceship');
+ delete
+ ------------------------------------------
+ 'base':7 'hidden':6 'rebel':1 'strike':3
+ (1 row)
+
+ SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');
+ delete
+ --------------------------------------------------------------
+ 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
+ (1 row)
+
+ SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship');
+ delete
+ ------------------------------------------
+ 'base':7 'hidden':6 'rebel':1 'strike':3
+ (1 row)
+
+ SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ unnest
+ ---------------------------------------------
+ (base,{7},{0})
+ (hidden,{6},{0})
+ (rebel,{1},{0})
+ (spaceship,"{2,33,34,35,36}","{0,3,2,1,0}")
+ (strike,{3},{0})
+ (5 rows)
+
+ SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ lexeme | postings | weights
+ -----------+-----------------+-------------
+ base | {7} | {0}
+ hidden | {6} | {0}
+ rebel | {1} | {0}
+ spaceship | {2,33,34,35,36} | {0,3,2,1,0}
+ strike | {3} | {0}
+ (5 rows)
+
+ SELECT lexeme, postings[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ lexeme | postings
+ -----------+----------
+ base | 7
+ hidden | 6
+ rebel | 1
+ spaceship | 2
+ strike | 3
+ (5 rows)
+
+ SELECT to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ to_array
+ --------------------------------------
+ {base,hidden,rebel,spaceship,strike}
+ (1 row)
+
+ SELECT to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
+ to_tsvector
+ ----------------------------------------------
+ 'base' 'hidden' 'rebel' 'spaceship' 'strike'
+ (1 row)
+
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
new file mode 100644
index fd7c702..51baf30
*** a/src/test/regress/sql/tstypes.sql
--- b/src/test/regress/sql/tstypes.sql
*************** SELECT ts_rank_cd(' a:1 s:2 d g'::tsvect
*** 115,117 ****
--- 115,126 ----
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
+
+ SELECT delete(to_tsvector('Rebel spaceships, striking from a hidden base'), 'spaceship');
+ SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');
+ SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship');
+ SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ SELECT lexeme, postings[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ SELECT to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+ SELECT to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);