commit f33ee503463137aa1a2ae4c3ab04d1468ae1941c Author: Pavan Deolasee Date: Sat Sep 3 14:51:00 2016 +0530 Use HEAP_TUPLE_LATEST to mark a tuple as the latest tuple in an update chain and use OffsetNumber in t_ctid to store the root line pointer of the chain. t_ctid field in the tuple header is usually used to store TID of the next tuple in an update chain. But for the last tuple in the chain, t_ctid is made to point to itself. When t_ctid points to itself, that signals the end of the chain. With this patch, information about a tuple being the last tuple in the chain is stored a separate HEAP_TUPLE_LATEST flag. This uses another free bit in t_infomask2. When HEAP_TUPLE_LATEST is set, OffsetNumber field in the t_ctid stores the root line pointer of the chain. This will help us quickly find root of a update chain. diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 6a27ef4..ccf84be 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -93,7 +93,8 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, - HeapTuple newtup, HeapTuple old_key_tup, + HeapTuple newtup, OffsetNumber root_offnum, + HeapTuple old_key_tup, bool all_visible_cleared, bool new_all_visible_cleared); static void HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs, @@ -2250,13 +2251,13 @@ heap_get_latest_tid(Relation relation, */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || HeapTupleHeaderIsOnlyLocked(tp.t_data) || - ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) + HeapTupleHeaderIsHeapLatest(tp.t_data, ctid)) { UnlockReleaseBuffer(buffer); break; } - ctid = tp.t_data->t_ctid; + HeapTupleHeaderGetNextCtid(tp.t_data, &ctid, offnum); priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data); UnlockReleaseBuffer(buffer); } /* end of loop */ @@ -2415,7 +2416,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, START_CRIT_SECTION(); RelationPutHeapTuple(relation, buffer, heaptup, - (options & HEAP_INSERT_SPECULATIVE) != 0); + (options & HEAP_INSERT_SPECULATIVE) != 0, + InvalidOffsetNumber); if (PageIsAllVisible(BufferGetPage(buffer))) { @@ -2713,7 +2715,8 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ - RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false); + RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false, + InvalidOffsetNumber); for (nthispage = 1; ndone + nthispage < ntuples; nthispage++) { HeapTuple heaptup = heaptuples[ndone + nthispage]; @@ -2721,7 +2724,8 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; - RelationPutHeapTuple(relation, buffer, heaptup, false); + RelationPutHeapTuple(relation, buffer, heaptup, false, + InvalidOffsetNumber); /* * We don't use heap_multi_insert for catalog tuples yet, but @@ -2993,6 +2997,7 @@ heap_delete(Relation relation, ItemPointer tid, HeapTupleData tp; Page page; BlockNumber block; + OffsetNumber offnum; Buffer buffer; Buffer vmbuffer = InvalidBuffer; TransactionId new_xmax; @@ -3044,7 +3049,8 @@ heap_delete(Relation relation, ItemPointer tid, LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } - lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid)); + offnum = ItemPointerGetOffsetNumber(tid); + lp = PageGetItemId(page, offnum); Assert(ItemIdIsNormal(lp)); tp.t_tableOid = RelationGetRelid(relation); @@ -3174,7 +3180,7 @@ l1: result == HeapTupleUpdated || result == HeapTupleBeingUpdated); Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID)); - hufd->ctid = tp.t_data->t_ctid; + HeapTupleHeaderGetNextCtid(tp.t_data, &hufd->ctid, offnum); hufd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(tp.t_data); @@ -3250,8 +3256,8 @@ l1: HeapTupleHeaderClearHotUpdated(tp.t_data); HeapTupleHeaderSetXmax(tp.t_data, new_xmax); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); - /* Make sure there is no forward chain link in t_ctid */ - tp.t_data->t_ctid = tp.t_self; + /* Mark this tuple as the latest tuple in the update chain */ + HeapTupleHeaderSetHeapLatest(tp.t_data); MarkBufferDirty(buffer); @@ -3450,6 +3456,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, bool old_key_copied = false; Page page; BlockNumber block; + OffsetNumber offnum; + OffsetNumber root_offnum; MultiXactStatus mxact_status; Buffer buffer, newbuf, @@ -3506,6 +3514,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, INDEX_ATTR_BITMAP_IDENTITY_KEY); block = ItemPointerGetBlockNumber(otid); + offnum = ItemPointerGetOffsetNumber(otid); buffer = ReadBuffer(relation, block); page = BufferGetPage(buffer); @@ -3789,7 +3798,7 @@ l2: result == HeapTupleUpdated || result == HeapTupleBeingUpdated); Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); - hufd->ctid = oldtup.t_data->t_ctid; + HeapTupleHeaderGetNextCtid(oldtup.t_data, &hufd->ctid, offnum); hufd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); @@ -3968,7 +3977,7 @@ l2: HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* temporarily make it look not-updated, but locked */ - oldtup.t_data->t_ctid = oldtup.t_self; + HeapTupleHeaderSetHeapLatest(oldtup.t_data); /* * Clear all-frozen bit on visibility map if needed. We could @@ -4149,6 +4158,20 @@ l2: HeapTupleSetHeapOnly(heaptup); /* Mark the caller's copy too, in case different from heaptup */ HeapTupleSetHeapOnly(newtup); + /* + * For HOT (or WARM) updated tuples, we store the offset of the root + * line pointer of this chain in the ip_posid field of the new tuple. + * Usually this information will be available in the corresponding + * field of the old tuple. But for aborted updates or pg_upgraded + * databases, we might be seeing the old-style CTID chains and hence + * the information must be obtained by hard way + */ + if (HeapTupleHeaderHasRootOffset(oldtup.t_data)) + root_offnum = HeapTupleHeaderGetRootOffset(oldtup.t_data); + else + heap_get_root_tuple_one(page, + ItemPointerGetOffsetNumber(&(oldtup.t_self)), + &root_offnum); } else { @@ -4156,10 +4179,29 @@ l2: HeapTupleClearHotUpdated(&oldtup); HeapTupleClearHeapOnly(heaptup); HeapTupleClearHeapOnly(newtup); + root_offnum = InvalidOffsetNumber; } - RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */ + /* insert new tuple */ + RelationPutHeapTuple(relation, newbuf, heaptup, false, root_offnum); + HeapTupleHeaderSetHeapLatest(heaptup->t_data); + HeapTupleHeaderSetHeapLatest(newtup->t_data); + /* + * Also update the in-memory copy with the root line pointer information + */ + if (OffsetNumberIsValid(root_offnum)) + { + HeapTupleHeaderSetRootOffset(heaptup->t_data, root_offnum); + HeapTupleHeaderSetRootOffset(newtup->t_data, root_offnum); + } + else + { + HeapTupleHeaderSetRootOffset(heaptup->t_data, + ItemPointerGetOffsetNumber(&heaptup->t_self)); + HeapTupleHeaderSetRootOffset(newtup->t_data, + ItemPointerGetOffsetNumber(&heaptup->t_self)); + } /* Clear obsolete visibility flags, possibly set by ourselves above... */ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); @@ -4172,7 +4214,9 @@ l2: HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* record address of new tuple in t_ctid of old one */ - oldtup.t_data->t_ctid = heaptup->t_self; + HeapTupleHeaderSetNextCtid(oldtup.t_data, + ItemPointerGetBlockNumber(&(heaptup->t_self)), + ItemPointerGetOffsetNumber(&(heaptup->t_self))); /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */ if (PageIsAllVisible(BufferGetPage(buffer))) @@ -4211,6 +4255,7 @@ l2: recptr = log_heap_update(relation, buffer, newbuf, &oldtup, heaptup, + root_offnum, old_key_tuple, all_visible_cleared, all_visible_cleared_new); @@ -4573,7 +4618,8 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, ItemId lp; Page page; Buffer vmbuffer = InvalidBuffer; - BlockNumber block; + BlockNumber block; + OffsetNumber offnum; TransactionId xid, xmax; uint16 old_infomask, @@ -4585,6 +4631,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); block = ItemPointerGetBlockNumber(tid); + offnum = ItemPointerGetOffsetNumber(tid); /* * Before locking the buffer, pin the visibility map page if it appears to @@ -4631,7 +4678,7 @@ l3: xwait = HeapTupleHeaderGetRawXmax(tuple->t_data); infomask = tuple->t_data->t_infomask; infomask2 = tuple->t_data->t_infomask2; - ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); + HeapTupleHeaderGetNextCtid(tuple->t_data, &t_ctid, offnum); LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); @@ -5069,7 +5116,7 @@ failed: Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || result == HeapTupleWouldBlock); Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)); - hufd->ctid = tuple->t_data->t_ctid; + HeapTupleHeaderGetNextCtid(tuple->t_data, &hufd->ctid, offnum); hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); @@ -5145,7 +5192,7 @@ failed: * the tuple as well. */ if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) - tuple->t_data->t_ctid = *tid; + HeapTupleHeaderSetHeapLatest(tuple->t_data); /* Clear only the all-frozen bit on visibility map if needed */ if (PageIsAllVisible(page) && @@ -5659,6 +5706,7 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, bool cleared_all_frozen = false; Buffer vmbuffer = InvalidBuffer; BlockNumber block; + OffsetNumber offnum; ItemPointerCopy(tid, &tupid); @@ -5667,6 +5715,8 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, new_infomask = 0; new_xmax = InvalidTransactionId; block = ItemPointerGetBlockNumber(&tupid); + offnum = ItemPointerGetOffsetNumber(&tupid); + ItemPointerCopy(&tupid, &(mytup.t_self)); if (!heap_fetch(rel, SnapshotAny, &mytup, &buf, false, NULL)) @@ -5885,7 +5935,7 @@ l4: /* if we find the end of update chain, we're done. */ if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || - ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || + HeapTupleHeaderIsHeapLatest(mytup.t_data, mytup.t_self) || HeapTupleHeaderIsOnlyLocked(mytup.t_data)) { result = HeapTupleMayBeUpdated; @@ -5894,7 +5944,7 @@ l4: /* tail recursion */ priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data); - ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); + HeapTupleHeaderGetNextCtid(mytup.t_data, &tupid, offnum); UnlockReleaseBuffer(buf); if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); @@ -6011,7 +6061,8 @@ heap_finish_speculative(Relation relation, HeapTuple tuple) * Replace the speculative insertion token with a real t_ctid, pointing to * itself like it does on regular tuples. */ - htup->t_ctid = tuple->t_self; + HeapTupleHeaderSetHeapLatest(htup); + HeapTupleHeaderSetRootOffset(htup, offnum); /* XLOG stuff */ if (RelationNeedsWAL(relation)) @@ -6137,7 +6188,9 @@ heap_abort_speculative(Relation relation, HeapTuple tuple) HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId); /* Clear the speculative insertion token too */ - tp.t_data->t_ctid = tp.t_self; + HeapTupleHeaderSetNextCtid(tp.t_data, + ItemPointerGetBlockNumber(&tp.t_self), + ItemPointerGetOffsetNumber(&tp.t_self)); MarkBufferDirty(buffer); @@ -7486,6 +7539,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, + OffsetNumber root_offnum, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared) { @@ -7605,6 +7659,7 @@ log_heap_update(Relation reln, Buffer oldbuf, /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data); + xlrec.root_offnum = root_offnum; bufflags = REGBUF_STANDARD; if (init) @@ -8260,7 +8315,7 @@ heap_xlog_delete(XLogReaderState *record) PageClearAllVisible(page); /* Make sure there is no forward chain link in t_ctid */ - htup->t_ctid = target_tid; + HeapTupleHeaderSetHeapLatest(htup); PageSetLSN(page, lsn); MarkBufferDirty(buffer); } @@ -8350,7 +8405,9 @@ heap_xlog_insert(XLogReaderState *record) htup->t_hoff = xlhdr.t_hoff; HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); - htup->t_ctid = target_tid; + + HeapTupleHeaderSetHeapLatest(htup); + HeapTupleHeaderSetRootOffset(htup, xlrec->offnum); if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber) @@ -8485,8 +8542,9 @@ heap_xlog_multi_insert(XLogReaderState *record) htup->t_hoff = xlhdr->t_hoff; HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); - ItemPointerSetBlockNumber(&htup->t_ctid, blkno); - ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); + + HeapTupleHeaderSetHeapLatest(htup); + HeapTupleHeaderSetRootOffset(htup, offnum); offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true); if (offnum == InvalidOffsetNumber) @@ -8622,7 +8680,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) HeapTupleHeaderSetXmax(htup, xlrec->old_xmax); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Set forward chain link in t_ctid */ - htup->t_ctid = newtid; + HeapTupleHeaderSetNextCtid(htup, ItemPointerGetBlockNumber(&newtid), + ItemPointerGetOffsetNumber(&newtid)); /* Mark the page as a candidate for pruning */ PageSetPrunable(page, XLogRecGetXid(record)); @@ -8756,12 +8815,17 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) HeapTupleHeaderSetCmin(htup, FirstCommandId); HeapTupleHeaderSetXmax(htup, xlrec->new_xmax); /* Make sure there is no forward chain link in t_ctid */ - htup->t_ctid = newtid; + HeapTupleHeaderSetHeapLatest(htup); offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true); if (offnum == InvalidOffsetNumber) elog(PANIC, "failed to add tuple"); + if (OffsetNumberIsValid(xlrec->root_offnum)) + HeapTupleHeaderSetRootOffset(htup, xlrec->root_offnum); + else + HeapTupleHeaderSetRootOffset(htup, offnum); + if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); @@ -8889,9 +8953,7 @@ heap_xlog_lock(XLogReaderState *record) { HeapTupleHeaderClearHotUpdated(htup); /* Make sure there is no forward chain link in t_ctid */ - ItemPointerSet(&htup->t_ctid, - BufferGetBlockNumber(buffer), - offnum); + HeapTupleHeaderSetHeapLatest(htup); } HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index c90fb71..e32deb1 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -31,12 +31,18 @@ * !!! EREPORT(ERROR) IS DISALLOWED HERE !!! Must PANIC on failure!!! * * Note - caller must hold BUFFER_LOCK_EXCLUSIVE on the buffer. + * + * The caller can optionally tell us to set the root offset to the given value. + * Otherwise, the root offset is set to the offset of the new location once its + * known. The former is used while updating an existing tuple while latter is + * used during insertion of a new row. */ void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, - bool token) + bool token, + OffsetNumber root_offnum) { Page pageHeader; OffsetNumber offnum; @@ -69,7 +75,13 @@ RelationPutHeapTuple(Relation relation, ItemId itemId = PageGetItemId(pageHeader, offnum); Item item = PageGetItem(pageHeader, itemId); - ((HeapTupleHeader) item)->t_ctid = tuple->t_self; + HeapTupleHeaderSetHeapLatest((HeapTupleHeader) item); + if (OffsetNumberIsValid(root_offnum)) + HeapTupleHeaderSetRootOffset((HeapTupleHeader) item, + root_offnum); + else + HeapTupleHeaderSetRootOffset((HeapTupleHeader) item, + offnum); } } diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 6ff9251..7c2231a 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -55,6 +55,8 @@ static void heap_prune_record_redirect(PruneState *prstate, static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum); static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum); +static void heap_get_root_tuples_internal(Page page, + OffsetNumber target_offnum, OffsetNumber *root_offsets); /* * Optionally prune and repair fragmentation in the specified page. @@ -740,8 +742,9 @@ heap_page_prune_execute(Buffer buffer, * holds a pin on the buffer. Once pin is released, a tuple might be pruned * and reused by a completely unrelated tuple. */ -void -heap_get_root_tuples(Page page, OffsetNumber *root_offsets) +static void +heap_get_root_tuples_internal(Page page, OffsetNumber target_offnum, + OffsetNumber *root_offsets) { OffsetNumber offnum, maxoff; @@ -820,6 +823,14 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) /* Remember the root line pointer for this item */ root_offsets[nextoffnum - 1] = offnum; + /* + * If the caller is interested in just one offset and we found + * that, just return + */ + if (OffsetNumberIsValid(target_offnum) && + (nextoffnum == target_offnum)) + return; + /* Advance to next chain member, if any */ if (!HeapTupleHeaderIsHotUpdated(htup)) break; @@ -829,3 +840,25 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) } } } + +/* + * Get root line pointer for the given tuple + */ +void +heap_get_root_tuple_one(Page page, OffsetNumber target_offnum, + OffsetNumber *root_offnum) +{ + OffsetNumber offsets[MaxHeapTuplesPerPage]; + heap_get_root_tuples_internal(page, target_offnum, offsets); + *root_offnum = offsets[target_offnum - 1]; +} + +/* + * Get root line pointers for all tuples in the page + */ +void +heap_get_root_tuples(Page page, OffsetNumber *root_offsets) +{ + return heap_get_root_tuples_internal(page, InvalidOffsetNumber, + root_offsets); +} diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 17584ba..09a164c 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -419,14 +419,14 @@ rewrite_heap_tuple(RewriteState state, */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) && - !(ItemPointerEquals(&(old_tuple->t_self), - &(old_tuple->t_data->t_ctid)))) + !(HeapTupleHeaderIsHeapLatest(old_tuple->t_data, old_tuple->t_self))) { OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data); - hashkey.tid = old_tuple->t_data->t_ctid; + HeapTupleHeaderGetNextCtid(old_tuple->t_data, &hashkey.tid, + ItemPointerGetOffsetNumber(&old_tuple->t_self)); mapping = (OldToNewMapping) hash_search(state->rs_old_new_tid_map, &hashkey, @@ -439,7 +439,9 @@ rewrite_heap_tuple(RewriteState state, * set the ctid of this tuple to point to the new location, and * insert it right away. */ - new_tuple->t_data->t_ctid = mapping->new_tid; + HeapTupleHeaderSetNextCtid(new_tuple->t_data, + ItemPointerGetBlockNumber(&mapping->new_tid), + ItemPointerGetOffsetNumber(&mapping->new_tid)); /* We don't need the mapping entry anymore */ hash_search(state->rs_old_new_tid_map, &hashkey, @@ -525,7 +527,9 @@ rewrite_heap_tuple(RewriteState state, new_tuple = unresolved->tuple; free_new = true; old_tid = unresolved->old_tid; - new_tuple->t_data->t_ctid = new_tid; + HeapTupleHeaderSetNextCtid(new_tuple->t_data, + ItemPointerGetBlockNumber(&new_tid), + ItemPointerGetOffsetNumber(&new_tid)); /* * We don't need the hash entry anymore, but don't free its @@ -731,7 +735,10 @@ raw_heap_insert(RewriteState state, HeapTuple tup) newitemid = PageGetItemId(page, newoff); onpage_tup = (HeapTupleHeader) PageGetItem(page, newitemid); - onpage_tup->t_ctid = tup->t_self; + HeapTupleHeaderSetNextCtid(onpage_tup, + ItemPointerGetBlockNumber(&tup->t_self), + ItemPointerGetOffsetNumber(&tup->t_self)); + HeapTupleHeaderSetHeapLatest(onpage_tup); } /* If heaptup is a private copy, release it. */ diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 32bb3f9..079a77f 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -2443,7 +2443,7 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, * As above, it should be safe to examine xmax and t_ctid without the * buffer content lock, because they can't be changing. */ - if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid)) + if (HeapTupleHeaderIsHeapLatest(tuple.t_data, tuple.t_self)) { /* deleted, so forget about it */ ReleaseBuffer(buffer); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index b3a595c..94b46b8 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -188,6 +188,8 @@ extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused); +extern void heap_get_root_tuple_one(Page page, OffsetNumber target_offnum, + OffsetNumber *root_offnum); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); /* in heap/syncscan.c */ diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 06a8242..5a04561 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -193,6 +193,8 @@ typedef struct xl_heap_update uint8 flags; TransactionId new_xmax; /* xmax of the new tuple */ OffsetNumber new_offnum; /* new tuple's offset */ + OffsetNumber root_offnum; /* offset of the root line pointer in case of + HOT or WARM update */ /* * If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are @@ -200,7 +202,7 @@ typedef struct xl_heap_update */ } xl_heap_update; -#define SizeOfHeapUpdate (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber)) +#define SizeOfHeapUpdate (offsetof(xl_heap_update, root_offnum) + sizeof(OffsetNumber)) /* * This is what we need to know about vacuum page cleanup/redirect diff --git a/src/include/access/hio.h b/src/include/access/hio.h index a174b34..82e5b5f 100644 --- a/src/include/access/hio.h +++ b/src/include/access/hio.h @@ -36,7 +36,7 @@ typedef struct BulkInsertStateData extern void RelationPutHeapTuple(Relation relation, Buffer buffer, - HeapTuple tuple, bool token); + HeapTuple tuple, bool token, OffsetNumber root_offnum); extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index d7e5fad..d01e0d8 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -260,13 +260,19 @@ struct HeapTupleHeaderData * information stored in t_infomask2: */ #define HEAP_NATTS_MASK 0x07FF /* 11 bits for number of attributes */ -/* bits 0x1800 are available */ +/* bits 0x0800 are available */ +#define HEAP_LATEST_TUPLE 0x1000 /* + * This is the last tuple in chain and + * ip_posid points to the root line + * pointer + */ #define HEAP_KEYS_UPDATED 0x2000 /* tuple was updated and key cols * modified, or tuple deleted */ #define HEAP_HOT_UPDATED 0x4000 /* tuple was HOT-updated */ #define HEAP_ONLY_TUPLE 0x8000 /* this is heap-only tuple */ -#define HEAP2_XACT_MASK 0xE000 /* visibility-related bits */ +#define HEAP2_XACT_MASK 0xF000 /* visibility-related bits */ + /* * HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins. It is @@ -504,6 +510,30 @@ do { \ (tup)->t_infomask2 & HEAP_ONLY_TUPLE \ ) +#define HeapTupleHeaderSetHeapLatest(tup) \ +( \ + (tup)->t_infomask2 |= HEAP_LATEST_TUPLE \ +) + +#define HeapTupleHeaderClearHeapLatest(tup) \ +( \ + (tup)->t_infomask2 &= ~HEAP_LATEST_TUPLE \ +) + +/* + * If HEAP_LATEST_TUPLE is set in the last tuple in the update chain. But for + * clusters which are upgraded from pre-10.0 release, we still check if c_tid + * is pointing to itself and declare such tuple as the latest tuple in the + * chain + */ +#define HeapTupleHeaderIsHeapLatest(tup, tid) \ +( \ + ((tup)->t_infomask2 & HEAP_LATEST_TUPLE) || \ + ((ItemPointerGetBlockNumber(&(tup)->t_ctid) == ItemPointerGetBlockNumber(&tid)) && \ + (ItemPointerGetOffsetNumber(&(tup)->t_ctid) == ItemPointerGetOffsetNumber(&tid))) \ +) + + #define HeapTupleHeaderSetHeapOnly(tup) \ ( \ (tup)->t_infomask2 |= HEAP_ONLY_TUPLE \ @@ -542,6 +572,55 @@ do { \ /* + * Set the t_ctid chain and also clear the HEAP_LATEST_TUPLE flag since we + * probably have a new tuple in the chain + */ +#define HeapTupleHeaderSetNextCtid(tup, block, offset) \ +do { \ + ItemPointerSetBlockNumber(&((tup)->t_ctid), (block)); \ + ItemPointerSetOffsetNumber(&((tup)->t_ctid), (offset)); \ + HeapTupleHeaderClearHeapLatest((tup)); \ +} while (0) + +/* + * Get TID of next tuple in the update chain. Traditionally, we have stored + * self TID in the t_ctid field if the tuple is the last tuple in the chain. We + * try to preserve that behaviour by returning self-TID if HEAP_LATEST_TUPLE + * flag is set. + */ +#define HeapTupleHeaderGetNextCtid(tup, next_ctid, offnum) \ +do { \ + if ((tup)->t_infomask2 & HEAP_LATEST_TUPLE) \ + { \ + ItemPointerSet((next_ctid), ItemPointerGetBlockNumber(&(tup)->t_ctid), \ + (offnum)); \ + } \ + else \ + { \ + ItemPointerSet((next_ctid), ItemPointerGetBlockNumber(&(tup)->t_ctid), \ + ItemPointerGetOffsetNumber(&(tup)->t_ctid)); \ + } \ +} while (0) + +#define HeapTupleHeaderSetRootOffset(tup, offset) \ +do { \ + AssertMacro(!HeapTupleHeaderIsHotUpdated(tup)); \ + AssertMacro((tup)->t_infomask2 & HEAP_LATEST_TUPLE); \ + ItemPointerSetOffsetNumber(&(tup)->t_ctid, (offset)); \ +} while (0) + +#define HeapTupleHeaderGetRootOffset(tup) \ +( \ + AssertMacro((tup)->t_infomask2 & HEAP_LATEST_TUPLE), \ + ItemPointerGetOffsetNumber(&(tup)->t_ctid) \ +) + +#define HeapTupleHeaderHasRootOffset(tup) \ +( \ + (tup)->t_infomask2 & HEAP_LATEST_TUPLE \ +) + +/* * BITMAPLEN(NATTS) - * Computes size of null bitmap given number of data columns. */