From efaf90b264d97f08233b6532717e1f5166f1f164 Mon Sep 17 00:00:00 2001 From: Hari Babu Kommi Date: Fri, 8 Sep 2017 15:45:04 +1000 Subject: [PATCH 7/8] Scan functions are added to storage AM All the scan functions that are present in heapam module are moved into heapm_storage and corresponding function hooks are added. Replaced HeapTuple with StorageTuple whereever possible. Currently directly returning slot functionality instead of tuple is added only to limited number of places. --- contrib/pgrowlocks/pgrowlocks.c | 6 +- contrib/pgstattuple/pgstattuple.c | 6 +- src/backend/access/heap/heapam.c | 1504 ++-------------------------- src/backend/access/heap/heapam_storage.c | 1299 ++++++++++++++++++++++++ src/backend/access/heap/rewriteheap.c | 2 +- src/backend/access/heap/storageam.c | 235 +++++ src/backend/access/index/genam.c | 7 +- src/backend/access/index/indexam.c | 3 +- src/backend/access/nbtree/nbtinsert.c | 5 +- src/backend/bootstrap/bootstrap.c | 25 +- src/backend/catalog/aclchk.c | 13 +- src/backend/catalog/index.c | 27 +- src/backend/catalog/partition.c | 6 +- src/backend/catalog/pg_conversion.c | 7 +- src/backend/catalog/pg_db_role_setting.c | 7 +- src/backend/catalog/pg_publication.c | 7 +- src/backend/catalog/pg_subscription.c | 7 +- src/backend/commands/cluster.c | 13 +- src/backend/commands/constraint.c | 3 +- src/backend/commands/copy.c | 6 +- src/backend/commands/dbcommands.c | 19 +- src/backend/commands/indexcmds.c | 7 +- src/backend/commands/tablecmds.c | 30 +- src/backend/commands/tablespace.c | 39 +- src/backend/commands/trigger.c | 3 +- src/backend/commands/typecmds.c | 13 +- src/backend/commands/vacuum.c | 13 +- src/backend/executor/execAmi.c | 2 +- src/backend/executor/execIndexing.c | 13 +- src/backend/executor/execReplication.c | 16 +- src/backend/executor/execTuples.c | 8 +- src/backend/executor/functions.c | 4 +- src/backend/executor/nodeAgg.c | 4 +- src/backend/executor/nodeBitmapHeapscan.c | 11 +- src/backend/executor/nodeForeignscan.c | 6 +- src/backend/executor/nodeGather.c | 8 +- src/backend/executor/nodeGatherMerge.c | 12 +- src/backend/executor/nodeIndexonlyscan.c | 4 +- src/backend/executor/nodeIndexscan.c | 16 +- src/backend/executor/nodeSamplescan.c | 21 +- src/backend/executor/nodeSeqscan.c | 39 +- src/backend/executor/nodeWindowAgg.c | 4 +- src/backend/executor/spi.c | 20 +- src/backend/executor/tqueue.c | 16 +- src/backend/postmaster/autovacuum.c | 18 +- src/backend/postmaster/pgstat.c | 7 +- src/backend/replication/logical/launcher.c | 7 +- src/backend/rewrite/rewriteDefine.c | 7 +- src/backend/utils/init/postinit.c | 7 +- src/include/access/heapam.h | 30 +- src/include/access/heapam_common.h | 8 + src/include/access/storageam.h | 42 +- src/include/executor/functions.h | 2 +- src/include/executor/spi.h | 10 +- src/include/executor/tqueue.h | 2 +- src/include/funcapi.h | 2 +- 56 files changed, 1924 insertions(+), 1734 deletions(-) diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index 5f076ef..063e079 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -125,7 +125,7 @@ pgrowlocks(PG_FUNCTION_ARGS) aclcheck_error(aclresult, ACL_KIND_CLASS, RelationGetRelationName(rel)); - scan = heap_beginscan(rel, GetActiveSnapshot(), 0, NULL); + scan = storage_beginscan(rel, GetActiveSnapshot(), 0, NULL); mydata = palloc(sizeof(*mydata)); mydata->rel = rel; mydata->scan = scan; @@ -141,7 +141,7 @@ pgrowlocks(PG_FUNCTION_ARGS) scan = mydata->scan; /* scan the relation */ - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { HTSU_Result htsu; TransactionId xmax; @@ -306,7 +306,7 @@ pgrowlocks(PG_FUNCTION_ARGS) } } - heap_endscan(scan); + storage_endscan(scan); heap_close(mydata->rel, AccessShareLock); SRF_RETURN_DONE(funcctx); diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index f7b68a8..eb33b26 100644 --- a/contrib/pgstattuple/pgstattuple.c +++ b/contrib/pgstattuple/pgstattuple.c @@ -325,13 +325,13 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) StorageAmRoutine *method = rel->rd_stamroutine; /* Disable syncscan because we assume we scan from block zero upwards */ - scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); + scan = storage_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); InitDirtySnapshot(SnapshotDirty); nblocks = scan->rs_nblocks; /* # blocks to be scanned */ /* scan the relation */ - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { CHECK_FOR_INTERRUPTS(); @@ -384,7 +384,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) block++; } - heap_endscan(scan); + storage_endscan(scan); relation_close(rel, AccessShareLock); stat.table_len = (uint64) nblocks * BLCKSZ; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d20f211..b64fec8 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -81,19 +81,6 @@ /* GUC variable */ bool synchronize_seqscans = true; - -static HeapScanDesc heap_beginscan_internal(Relation relation, - Snapshot snapshot, - int nkeys, ScanKey key, - ParallelHeapScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap); -static void heap_parallelscan_startblock_init(HeapScanDesc scan); -static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tup, @@ -112,139 +99,6 @@ static bool Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status, static HeapTuple ExtractReplicaIdentity(Relation rel, HeapTuple tup, bool key_modified, bool *copy); -/* ---------------------------------------------------------------- - * heap support routines - * ---------------------------------------------------------------- - */ - -/* ---------------- - * initscan - scan code common to heap_beginscan and heap_rescan - * ---------------- - */ -static void -initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) -{ - bool allow_strat; - bool allow_sync; - - /* - * Determine the number of blocks we have to scan. - * - * It is sufficient to do this once at scan start, since any tuples added - * while the scan is in progress will be invisible to my snapshot anyway. - * (That is not true when using a non-MVCC snapshot. However, we couldn't - * guarantee to return tuples added after scan start anyway, since they - * might go into pages we already scanned. To guarantee consistent - * results for a non-MVCC snapshot, the caller must hold some higher-level - * lock that ensures the interesting tuple(s) won't change.) - */ - if (scan->rs_parallel != NULL) - scan->rs_nblocks = scan->rs_parallel->phs_nblocks; - else - scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd); - - /* - * If the table is large relative to NBuffers, use a bulk-read access - * strategy and enable synchronized scanning (see syncscan.c). Although - * the thresholds for these features could be different, we make them the - * same so that there are only two behaviors to tune rather than four. - * (However, some callers need to be able to disable one or both of these - * behaviors, independently of the size of the table; also there is a GUC - * variable that can disable synchronized scanning.) - * - * Note that heap_parallelscan_initialize has a very similar test; if you - * change this, consider changing that one, too. - */ - if (!RelationUsesLocalBuffers(scan->rs_rd) && - scan->rs_nblocks > NBuffers / 4) - { - allow_strat = scan->rs_allow_strat; - allow_sync = scan->rs_allow_sync; - } - else - allow_strat = allow_sync = false; - - if (allow_strat) - { - /* During a rescan, keep the previous strategy object. */ - if (scan->rs_strategy == NULL) - scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD); - } - else - { - if (scan->rs_strategy != NULL) - FreeAccessStrategy(scan->rs_strategy); - scan->rs_strategy = NULL; - } - - if (scan->rs_parallel != NULL) - { - /* For parallel scan, believe whatever ParallelHeapScanDesc says. */ - scan->rs_syncscan = scan->rs_parallel->phs_syncscan; - } - else if (keep_startblock) - { - /* - * When rescanning, we want to keep the previous startblock setting, - * so that rewinding a cursor doesn't generate surprising results. - * Reset the active syncscan setting, though. - */ - scan->rs_syncscan = (allow_sync && synchronize_seqscans); - } - else if (allow_sync && synchronize_seqscans) - { - scan->rs_syncscan = true; - scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks); - } - else - { - scan->rs_syncscan = false; - scan->rs_startblock = 0; - } - - scan->rs_numblocks = InvalidBlockNumber; - scan->rs_inited = false; - scan->rs_ctup.t_data = NULL; - ItemPointerSetInvalid(&scan->rs_ctup.t_self); - scan->rs_cbuf = InvalidBuffer; - scan->rs_cblock = InvalidBlockNumber; - - /* page-at-a-time fields are always invalid when not rs_inited */ - - /* - * copy the scan key, if appropriate - */ - if (key != NULL) - memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData)); - - /* - * Currently, we don't have a stats counter for bitmap heap scans (but the - * underlying bitmap index scans will be counted) or sample scans (we only - * update stats for tuple fetches there) - */ - if (!scan->rs_bitmapscan && !scan->rs_samplescan) - pgstat_count_heap_scan(scan->rs_rd); -} - -/* - * heap_setscanlimits - restrict range of a heapscan - * - * startBlk is the page to start at - * numBlks is number of pages to scan (InvalidBlockNumber means "all") - */ -void -heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks) -{ - Assert(!scan->rs_inited); /* else too late to change */ - Assert(!scan->rs_syncscan); /* else rs_startblock is significant */ - - /* Check startBlk is valid (but allow case of zero blocks...) */ - Assert(startBlk == 0 || startBlk < scan->rs_nblocks); - - scan->rs_startblock = startBlk; - scan->rs_numblocks = numBlks; -} - /* * heapgetpage - subroutine for heapgettup() * @@ -363,603 +217,6 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) scan->rs_ntuples = ntup; } -/* ---------------- - * heapgettup - fetch next heap tuple - * - * Initialize the scan if not already done; then advance to the next - * tuple as indicated by "dir"; return the next tuple in scan->rs_ctup, - * or set scan->rs_ctup.t_data = NULL if no more tuples. - * - * dir == NoMovementScanDirection means "re-fetch the tuple indicated - * by scan->rs_ctup". - * - * Note: the reason nkeys/key are passed separately, even though they are - * kept in the scan descriptor, is that the caller may not want us to check - * the scankeys. - * - * Note: when we fall off the end of the scan in either direction, we - * reset rs_inited. This means that a further request with the same - * scan direction will restart the scan, which is a bit odd, but a - * request with the opposite scan direction will start a fresh scan - * in the proper direction. The latter is required behavior for cursors, - * while the former case is generally undefined behavior in Postgres - * so we don't care too much. - * ---------------- - */ -static void -heapgettup(HeapScanDesc scan, - ScanDirection dir, - int nkeys, - ScanKey key) -{ - HeapTuple tuple = &(scan->rs_ctup); - Snapshot snapshot = scan->rs_snapshot; - bool backward = ScanDirectionIsBackward(dir); - BlockNumber page; - bool finished; - Page dp; - int lines; - OffsetNumber lineoff; - int linesleft; - ItemId lpp; - - /* - * calculate next starting lineoff, given scan direction - */ - if (ScanDirectionIsForward(dir)) - { - if (!scan->rs_inited) - { - /* - * return null immediately if relation is empty - */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - if (scan->rs_parallel != NULL) - { - heap_parallelscan_startblock_init(scan); - - page = heap_parallelscan_nextpage(scan); - - /* Other processes might have already finished the scan. */ - if (page == InvalidBlockNumber) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - } - else - page = scan->rs_startblock; /* first page */ - heapgetpage(scan, page); - lineoff = FirstOffsetNumber; /* first offnum */ - scan->rs_inited = true; - } - else - { - /* continue from previously returned page/tuple */ - page = scan->rs_cblock; /* current page */ - lineoff = /* next offnum */ - OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self))); - } - - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); - lines = PageGetMaxOffsetNumber(dp); - /* page and lineoff now reference the physically next tid */ - - linesleft = lines - lineoff + 1; - } - else if (backward) - { - /* backward parallel scan not supported */ - Assert(scan->rs_parallel == NULL); - - if (!scan->rs_inited) - { - /* - * return null immediately if relation is empty - */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - - /* - * Disable reporting to syncscan logic in a backwards scan; it's - * not very likely anyone else is doing the same thing at the same - * time, and much more likely that we'll just bollix things for - * forward scanners. - */ - scan->rs_syncscan = false; - /* start from last page of the scan */ - if (scan->rs_startblock > 0) - page = scan->rs_startblock - 1; - else - page = scan->rs_nblocks - 1; - heapgetpage(scan, page); - } - else - { - /* continue from previously returned page/tuple */ - page = scan->rs_cblock; /* current page */ - } - - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); - lines = PageGetMaxOffsetNumber(dp); - - if (!scan->rs_inited) - { - lineoff = lines; /* final offnum */ - scan->rs_inited = true; - } - else - { - lineoff = /* previous offnum */ - OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self))); - } - /* page and lineoff now reference the physically previous tid */ - - linesleft = lineoff; - } - else - { - /* - * ``no movement'' scan direction: refetch prior tuple - */ - if (!scan->rs_inited) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - - page = ItemPointerGetBlockNumber(&(tuple->t_self)); - if (page != scan->rs_cblock) - heapgetpage(scan, page); - - /* Since the tuple was previously fetched, needn't lock page here */ - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); - lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); - lpp = PageGetItemId(dp, lineoff); - Assert(ItemIdIsNormal(lpp)); - - tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); - tuple->t_len = ItemIdGetLength(lpp); - - return; - } - - /* - * advance the scan until we find a qualifying tuple or run out of stuff - * to scan - */ - lpp = PageGetItemId(dp, lineoff); - for (;;) - { - while (linesleft > 0) - { - if (ItemIdIsNormal(lpp)) - { - bool valid; - - tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); - tuple->t_len = ItemIdGetLength(lpp); - ItemPointerSet(&(tuple->t_self), page, lineoff); - - /* - * if current tuple qualifies, return it. - */ - valid = HeapTupleSatisfiesVisibility(scan->rs_rd->rd_stamroutine, - tuple, - snapshot, - scan->rs_cbuf); - - CheckForSerializableConflictOut(valid, scan->rs_rd, tuple, - scan->rs_cbuf, snapshot); - - if (valid && key != NULL) - HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), - nkeys, key, valid); - - if (valid) - { - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - return; - } - } - - /* - * otherwise move to the next item on the page - */ - --linesleft; - if (backward) - { - --lpp; /* move back in this page's ItemId array */ - --lineoff; - } - else - { - ++lpp; /* move forward in this page's ItemId array */ - ++lineoff; - } - } - - /* - * if we get here, it means we've exhausted the items on this page and - * it's time to move to the next. - */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - - /* - * advance to next/prior page and detect end of scan - */ - if (backward) - { - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); - if (page == 0) - page = scan->rs_nblocks; - page--; - } - else if (scan->rs_parallel != NULL) - { - page = heap_parallelscan_nextpage(scan); - finished = (page == InvalidBlockNumber); - } - else - { - page++; - if (page >= scan->rs_nblocks) - page = 0; - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); - - /* - * Report our new scan position for synchronization purposes. We - * don't do that when moving backwards, however. That would just - * mess up any other forward-moving scanners. - * - * Note: we do this before checking for end of scan so that the - * final state of the position hint is back at the start of the - * rel. That's not strictly necessary, but otherwise when you run - * the same query multiple times the starting position would shift - * a little bit backwards on every invocation, which is confusing. - * We don't guarantee any specific ordering in general, though. - */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, page); - } - - /* - * return NULL if we've exhausted all the pages - */ - if (finished) - { - if (BufferIsValid(scan->rs_cbuf)) - ReleaseBuffer(scan->rs_cbuf); - scan->rs_cbuf = InvalidBuffer; - scan->rs_cblock = InvalidBlockNumber; - tuple->t_data = NULL; - scan->rs_inited = false; - return; - } - - heapgetpage(scan, page); - - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); - lines = PageGetMaxOffsetNumber((Page) dp); - linesleft = lines; - if (backward) - { - lineoff = lines; - lpp = PageGetItemId(dp, lines); - } - else - { - lineoff = FirstOffsetNumber; - lpp = PageGetItemId(dp, FirstOffsetNumber); - } - } -} - -/* ---------------- - * heapgettup_pagemode - fetch next heap tuple in page-at-a-time mode - * - * Same API as heapgettup, but used in page-at-a-time mode - * - * The internal logic is much the same as heapgettup's too, but there are some - * differences: we do not take the buffer content lock (that only needs to - * happen inside heapgetpage), and we iterate through just the tuples listed - * in rs_vistuples[] rather than all tuples on the page. Notice that - * lineindex is 0-based, where the corresponding loop variable lineoff in - * heapgettup is 1-based. - * ---------------- - */ -static void -heapgettup_pagemode(HeapScanDesc scan, - ScanDirection dir, - int nkeys, - ScanKey key) -{ - HeapTuple tuple = &(scan->rs_ctup); - bool backward = ScanDirectionIsBackward(dir); - BlockNumber page; - bool finished; - Page dp; - int lines; - int lineindex; - OffsetNumber lineoff; - int linesleft; - ItemId lpp; - - /* - * calculate next starting lineindex, given scan direction - */ - if (ScanDirectionIsForward(dir)) - { - if (!scan->rs_inited) - { - /* - * return null immediately if relation is empty - */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - if (scan->rs_parallel != NULL) - { - heap_parallelscan_startblock_init(scan); - - page = heap_parallelscan_nextpage(scan); - - /* Other processes might have already finished the scan. */ - if (page == InvalidBlockNumber) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - } - else - page = scan->rs_startblock; /* first page */ - heapgetpage(scan, page); - lineindex = 0; - scan->rs_inited = true; - } - else - { - /* continue from previously returned page/tuple */ - page = scan->rs_cblock; /* current page */ - lineindex = scan->rs_cindex + 1; - } - - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); - lines = scan->rs_ntuples; - /* page and lineindex now reference the next visible tid */ - - linesleft = lines - lineindex; - } - else if (backward) - { - /* backward parallel scan not supported */ - Assert(scan->rs_parallel == NULL); - - if (!scan->rs_inited) - { - /* - * return null immediately if relation is empty - */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - - /* - * Disable reporting to syncscan logic in a backwards scan; it's - * not very likely anyone else is doing the same thing at the same - * time, and much more likely that we'll just bollix things for - * forward scanners. - */ - scan->rs_syncscan = false; - /* start from last page of the scan */ - if (scan->rs_startblock > 0) - page = scan->rs_startblock - 1; - else - page = scan->rs_nblocks - 1; - heapgetpage(scan, page); - } - else - { - /* continue from previously returned page/tuple */ - page = scan->rs_cblock; /* current page */ - } - - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); - lines = scan->rs_ntuples; - - if (!scan->rs_inited) - { - lineindex = lines - 1; - scan->rs_inited = true; - } - else - { - lineindex = scan->rs_cindex - 1; - } - /* page and lineindex now reference the previous visible tid */ - - linesleft = lineindex + 1; - } - else - { - /* - * ``no movement'' scan direction: refetch prior tuple - */ - if (!scan->rs_inited) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return; - } - - page = ItemPointerGetBlockNumber(&(tuple->t_self)); - if (page != scan->rs_cblock) - heapgetpage(scan, page); - - /* Since the tuple was previously fetched, needn't lock page here */ - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); - lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); - lpp = PageGetItemId(dp, lineoff); - Assert(ItemIdIsNormal(lpp)); - - tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); - tuple->t_len = ItemIdGetLength(lpp); - - /* check that rs_cindex is in sync */ - Assert(scan->rs_cindex < scan->rs_ntuples); - Assert(lineoff == scan->rs_vistuples[scan->rs_cindex]); - - return; - } - - /* - * advance the scan until we find a qualifying tuple or run out of stuff - * to scan - */ - for (;;) - { - while (linesleft > 0) - { - lineoff = scan->rs_vistuples[lineindex]; - lpp = PageGetItemId(dp, lineoff); - Assert(ItemIdIsNormal(lpp)); - - tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); - tuple->t_len = ItemIdGetLength(lpp); - ItemPointerSet(&(tuple->t_self), page, lineoff); - - /* - * if current tuple qualifies, return it. - */ - if (key != NULL) - { - bool valid; - - HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), - nkeys, key, valid); - if (valid) - { - scan->rs_cindex = lineindex; - return; - } - } - else - { - scan->rs_cindex = lineindex; - return; - } - - /* - * otherwise move to the next item on the page - */ - --linesleft; - if (backward) - --lineindex; - else - ++lineindex; - } - - /* - * if we get here, it means we've exhausted the items on this page and - * it's time to move to the next. - */ - if (backward) - { - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); - if (page == 0) - page = scan->rs_nblocks; - page--; - } - else if (scan->rs_parallel != NULL) - { - page = heap_parallelscan_nextpage(scan); - finished = (page == InvalidBlockNumber); - } - else - { - page++; - if (page >= scan->rs_nblocks) - page = 0; - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); - - /* - * Report our new scan position for synchronization purposes. We - * don't do that when moving backwards, however. That would just - * mess up any other forward-moving scanners. - * - * Note: we do this before checking for end of scan so that the - * final state of the position hint is back at the start of the - * rel. That's not strictly necessary, but otherwise when you run - * the same query multiple times the starting position would shift - * a little bit backwards on every invocation, which is confusing. - * We don't guarantee any specific ordering in general, though. - */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, page); - } - - /* - * return NULL if we've exhausted all the pages - */ - if (finished) - { - if (BufferIsValid(scan->rs_cbuf)) - ReleaseBuffer(scan->rs_cbuf); - scan->rs_cbuf = InvalidBuffer; - scan->rs_cblock = InvalidBlockNumber; - tuple->t_data = NULL; - scan->rs_inited = false; - return; - } - - heapgetpage(scan, page); - - dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); - lines = scan->rs_ntuples; - linesleft = lines; - if (backward) - lineindex = lines - 1; - else - lineindex = 0; - } -} - #if defined(DISABLE_COMPLEX_MACRO) /* @@ -1186,317 +443,96 @@ relation_close(Relation relation, LOCKMODE lockmode) UnlockRelationId(&relid, lockmode); } - -/* ---------------- - * heap_open - open a heap relation by relation OID - * - * This is essentially relation_open plus check that the relation - * is not an index nor a composite type. (The caller should also - * check that it's not a view or foreign table before assuming it has - * storage.) - * ---------------- - */ -Relation -heap_open(Oid relationId, LOCKMODE lockmode) -{ - Relation r; - - r = relation_open(relationId, lockmode); - - if (r->rd_rel->relkind == RELKIND_INDEX) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is an index", - RelationGetRelationName(r)))); - else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is a composite type", - RelationGetRelationName(r)))); - - return r; -} - -/* ---------------- - * heap_openrv - open a heap relation specified - * by a RangeVar node - * - * As above, but relation is specified by a RangeVar. - * ---------------- - */ -Relation -heap_openrv(const RangeVar *relation, LOCKMODE lockmode) -{ - Relation r; - - r = relation_openrv(relation, lockmode); - - if (r->rd_rel->relkind == RELKIND_INDEX) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is an index", - RelationGetRelationName(r)))); - else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is a composite type", - RelationGetRelationName(r)))); - - return r; -} - -/* ---------------- - * heap_openrv_extended - open a heap relation specified - * by a RangeVar node - * - * As above, but optionally return NULL instead of failing for - * relation-not-found. - * ---------------- - */ -Relation -heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode, - bool missing_ok) -{ - Relation r; - - r = relation_openrv_extended(relation, lockmode, missing_ok); - - if (r) - { - if (r->rd_rel->relkind == RELKIND_INDEX) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is an index", - RelationGetRelationName(r)))); - else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is a composite type", - RelationGetRelationName(r)))); - } - - return r; -} - - -/* ---------------- - * heap_beginscan - begin relation scan - * - * heap_beginscan is the "standard" case. - * - * heap_beginscan_catalog differs in setting up its own temporary snapshot. - * - * heap_beginscan_strat offers an extended API that lets the caller control - * whether a nondefault buffer access strategy can be used, and whether - * syncscan can be chosen (possibly resulting in the scan not starting from - * block zero). Both of these default to TRUE with plain heap_beginscan. - * - * heap_beginscan_bm is an alternative entry point for setting up a - * HeapScanDesc for a bitmap heap scan. Although that scan technology is - * really quite unlike a standard seqscan, there is just enough commonality - * to make it worth using the same data structure. - * - * heap_beginscan_sampling is an alternative entry point for setting up a - * HeapScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth - * using the same data structure although the behavior is rather different. - * In addition to the options offered by heap_beginscan_strat, this call - * also allows control of whether page-mode visibility checking is used. - * ---------------- - */ -HeapScanDesc -heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - true, true, true, false, false, false); -} - -HeapScanDesc -heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key) -{ - Oid relid = RelationGetRelid(relation); - Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); - - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - true, true, true, false, false, true); -} - -HeapScanDesc -heap_beginscan_strat(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, true, - false, false, false); -} - -HeapScanDesc -heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - false, false, true, true, false, false); -} - -HeapScanDesc -heap_beginscan_sampling(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, allow_pagemode, - false, true, false); -} - -static HeapScanDesc -heap_beginscan_internal(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - ParallelHeapScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap) -{ - HeapScanDesc scan; - - /* - * increment relation ref count while scanning relation - * - * This is just to make really sure the relcache entry won't go away while - * the scan has a pointer to it. Caller should be holding the rel open - * anyway, so this is redundant in all normal scenarios... - */ - RelationIncrementReferenceCount(relation); - - /* - * allocate and initialize scan descriptor - */ - scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData)); - - scan->rs_rd = relation; - scan->rs_snapshot = snapshot; - scan->rs_nkeys = nkeys; - scan->rs_bitmapscan = is_bitmapscan; - scan->rs_samplescan = is_samplescan; - scan->rs_strategy = NULL; /* set in initscan */ - scan->rs_allow_strat = allow_strat; - scan->rs_allow_sync = allow_sync; - scan->rs_temp_snap = temp_snap; - scan->rs_parallel = parallel_scan; - - /* - * we can use page-at-a-time mode if it's an MVCC-safe snapshot - */ - scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot); - - /* - * For a seqscan in a serializable transaction, acquire a predicate lock - * on the entire relation. This is required not only to lock all the - * matching tuples, but also to conflict with new insertions into the - * table. In an indexscan, we take page locks on the index pages covering - * the range specified in the scan qual, but in a heap scan there is - * nothing more fine-grained to lock. A bitmap scan is a different story, - * there we have already scanned the index and locked the index pages - * covering the predicate. But in that case we still have to lock any - * matching heap tuples. - */ - if (!is_bitmapscan) - PredicateLockRelation(relation, snapshot); - - /* we only need to set this up once */ - scan->rs_ctup.t_tableOid = RelationGetRelid(relation); - - /* - * we do this here instead of in initscan() because heap_rescan also calls - * initscan() and we don't want to allocate memory again - */ - if (nkeys > 0) - scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); - else - scan->rs_key = NULL; - - initscan(scan, key, false); - - return scan; -} - + /* ---------------- - * heap_rescan - restart a relation scan + * heap_open - open a heap relation by relation OID + * + * This is essentially relation_open plus check that the relation + * is not an index nor a composite type. (The caller should also + * check that it's not a view or foreign table before assuming it has + * storage.) * ---------------- */ -void -heap_rescan(HeapScanDesc scan, - ScanKey key) +Relation +heap_open(Oid relationId, LOCKMODE lockmode) { - /* - * unpin scan buffers - */ - if (BufferIsValid(scan->rs_cbuf)) - ReleaseBuffer(scan->rs_cbuf); + Relation r; - /* - * reinitialize scan descriptor - */ - initscan(scan, key, true); + r = relation_open(relationId, lockmode); + + if (r->rd_rel->relkind == RELKIND_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is an index", + RelationGetRelationName(r)))); + else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a composite type", + RelationGetRelationName(r)))); + + return r; } /* ---------------- - * heap_rescan_set_params - restart a relation scan after changing params + * heap_openrv - open a heap relation specified + * by a RangeVar node * - * This call allows changing the buffer strategy, syncscan, and pagemode - * options before starting a fresh scan. Note that although the actual use - * of syncscan might change (effectively, enabling or disabling reporting), - * the previously selected startblock will be kept. + * As above, but relation is specified by a RangeVar. * ---------------- */ -void -heap_rescan_set_params(HeapScanDesc scan, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode) +Relation +heap_openrv(const RangeVar *relation, LOCKMODE lockmode) { - /* adjust parameters */ - scan->rs_allow_strat = allow_strat; - scan->rs_allow_sync = allow_sync; - scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot); - /* ... and rescan */ - heap_rescan(scan, key); + Relation r; + + r = relation_openrv(relation, lockmode); + + if (r->rd_rel->relkind == RELKIND_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is an index", + RelationGetRelationName(r)))); + else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a composite type", + RelationGetRelationName(r)))); + + return r; } /* ---------------- - * heap_endscan - end relation scan + * heap_openrv_extended - open a heap relation specified + * by a RangeVar node * - * See how to integrate with index scans. - * Check handling if reldesc caching. + * As above, but optionally return NULL instead of failing for + * relation-not-found. * ---------------- */ -void -heap_endscan(HeapScanDesc scan) +Relation +heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode, + bool missing_ok) { - /* Note: no locking manipulations needed */ - - /* - * unpin scan buffers - */ - if (BufferIsValid(scan->rs_cbuf)) - ReleaseBuffer(scan->rs_cbuf); - - /* - * decrement relation reference count and free scan descriptor storage - */ - RelationDecrementReferenceCount(scan->rs_rd); - - if (scan->rs_key) - pfree(scan->rs_key); + Relation r; - if (scan->rs_strategy != NULL) - FreeAccessStrategy(scan->rs_strategy); + r = relation_openrv_extended(relation, lockmode, missing_ok); - if (scan->rs_temp_snap) - UnregisterSnapshot(scan->rs_snapshot); + if (r) + { + if (r->rd_rel->relkind == RELKIND_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is an index", + RelationGetRelationName(r)))); + else if (r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a composite type", + RelationGetRelationName(r)))); + } - pfree(scan); + return r; } /* ---------------- @@ -1550,384 +586,6 @@ heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan) pg_atomic_write_u64(¶llel_scan->phs_nallocated, 0); } -/* ---------------- - * heap_beginscan_parallel - join a parallel scan - * - * Caller must hold a suitable lock on the correct relation. - * ---------------- - */ -HeapScanDesc -heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan) -{ - Snapshot snapshot; - - Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); - snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data); - RegisterSnapshot(snapshot); - - return heap_beginscan_internal(relation, snapshot, 0, NULL, parallel_scan, - true, true, true, false, false, true); -} - -/* ---------------- - * heap_parallelscan_startblock_init - find and set the scan's startblock - * - * Determine where the parallel seq scan should start. This function may - * be called many times, once by each parallel worker. We must be careful - * only to set the startblock once. - * ---------------- - */ -static void -heap_parallelscan_startblock_init(HeapScanDesc scan) -{ - BlockNumber sync_startpage = InvalidBlockNumber; - ParallelHeapScanDesc parallel_scan; - - Assert(scan->rs_parallel); - parallel_scan = scan->rs_parallel; - -retry: - /* Grab the spinlock. */ - SpinLockAcquire(¶llel_scan->phs_mutex); - - /* - * If the scan's startblock has not yet been initialized, we must do so - * now. If this is not a synchronized scan, we just start at block 0, but - * if it is a synchronized scan, we must get the starting position from - * the synchronized scan machinery. We can't hold the spinlock while - * doing that, though, so release the spinlock, get the information we - * need, and retry. If nobody else has initialized the scan in the - * meantime, we'll fill in the value we fetched on the second time - * through. - */ - if (parallel_scan->phs_startblock == InvalidBlockNumber) - { - if (!parallel_scan->phs_syncscan) - parallel_scan->phs_startblock = 0; - else if (sync_startpage != InvalidBlockNumber) - parallel_scan->phs_startblock = sync_startpage; - else - { - SpinLockRelease(¶llel_scan->phs_mutex); - sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks); - goto retry; - } - } - SpinLockRelease(¶llel_scan->phs_mutex); -} - -/* ---------------- - * heap_parallelscan_nextpage - get the next page to scan - * - * Get the next page to scan. Even if there are no pages left to scan, - * another backend could have grabbed a page to scan and not yet finished - * looking at it, so it doesn't follow that the scan is done when the - * first backend gets an InvalidBlockNumber return. - * ---------------- - */ -static BlockNumber -heap_parallelscan_nextpage(HeapScanDesc scan) -{ - BlockNumber page; - ParallelHeapScanDesc parallel_scan; - uint64 nallocated; - - Assert(scan->rs_parallel); - parallel_scan = scan->rs_parallel; - - /* - * phs_nallocated tracks how many pages have been allocated to workers - * already. When phs_nallocated >= rs_nblocks, all blocks have been - * allocated. - * - * Because we use an atomic fetch-and-add to fetch the current value, the - * phs_nallocated counter will exceed rs_nblocks, because workers will - * still increment the value, when they try to allocate the next block but - * all blocks have been allocated already. The counter must be 64 bits - * wide because of that, to avoid wrapping around when rs_nblocks is close - * to 2^32. - * - * The actual page to return is calculated by adding the counter to the - * starting block number, modulo nblocks. - */ - nallocated = pg_atomic_fetch_add_u64(¶llel_scan->phs_nallocated, 1); - if (nallocated >= scan->rs_nblocks) - page = InvalidBlockNumber; /* all blocks have been allocated */ - else - page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks; - - /* - * Report scan location. Normally, we report the current page number. - * When we reach the end of the scan, though, we report the starting page, - * not the ending page, just so the starting positions for later scans - * doesn't slew backwards. We only report the position at the end of the - * scan once, though: subsequent callers will report nothing. - */ - if (scan->rs_syncscan) - { - if (page != InvalidBlockNumber) - ss_report_location(scan->rs_rd, page); - else if (nallocated == scan->rs_nblocks) - ss_report_location(scan->rs_rd, parallel_scan->phs_startblock); - } - - return page; -} - -/* ---------------- - * heap_update_snapshot - * - * Update snapshot info in heap scan descriptor. - * ---------------- - */ -void -heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot) -{ - Assert(IsMVCCSnapshot(snapshot)); - - RegisterSnapshot(snapshot); - scan->rs_snapshot = snapshot; - scan->rs_temp_snap = true; -} - -/* ---------------- - * heap_getnext - retrieve next tuple in scan - * - * Fix to work with index relations. - * We don't return the buffer anymore, but you can get it from the - * returned HeapTuple. - * ---------------- - */ - -#ifdef HEAPDEBUGALL -#define HEAPDEBUG_1 \ - elog(DEBUG2, "heap_getnext([%s,nkeys=%d],dir=%d) called", \ - RelationGetRelationName(scan->rs_rd), scan->rs_nkeys, (int) direction) -#define HEAPDEBUG_2 \ - elog(DEBUG2, "heap_getnext returning EOS") -#define HEAPDEBUG_3 \ - elog(DEBUG2, "heap_getnext returning tuple") -#else -#define HEAPDEBUG_1 -#define HEAPDEBUG_2 -#define HEAPDEBUG_3 -#endif /* !defined(HEAPDEBUGALL) */ - - -HeapTuple -heap_getnext(HeapScanDesc scan, ScanDirection direction) -{ - /* Note: no locking manipulations needed */ - - HEAPDEBUG_1; /* heap_getnext( info ) */ - - if (scan->rs_pageatatime) - heapgettup_pagemode(scan, direction, - scan->rs_nkeys, scan->rs_key); - else - heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key); - - if (scan->rs_ctup.t_data == NULL) - { - HEAPDEBUG_2; /* heap_getnext returning EOS */ - return NULL; - } - - /* - * if we get here it means we have a new current scan tuple, so point to - * the proper return buffer and return the tuple. - */ - HEAPDEBUG_3; /* heap_getnext returning tuple */ - - pgstat_count_heap_getnext(scan->rs_rd); - - return &(scan->rs_ctup); -} - -/* - * heap_hot_search_buffer - search HOT chain for tuple satisfying snapshot - * - * On entry, *tid is the TID of a tuple (either a simple tuple, or the root - * of a HOT chain), and buffer is the buffer holding this tuple. We search - * for the first chain member satisfying the given snapshot. If one is - * found, we update *tid to reference that tuple's offset number, and - * return TRUE. If no match, return FALSE without modifying *tid. - * - * heapTuple is a caller-supplied buffer. When a match is found, we return - * the tuple here, in addition to updating *tid. If no match is found, the - * contents of this buffer on return are undefined. - * - * If all_dead is not NULL, we check non-visible tuples to see if they are - * globally dead; *all_dead is set TRUE if all members of the HOT chain - * are vacuumable, FALSE if not. - * - * Unlike heap_fetch, the caller must already have pin and (at least) share - * lock on the buffer; it is still pinned/locked at exit. Also unlike - * heap_fetch, we do not report any pgstats count; caller may do so if wanted. - */ -bool -heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, - Snapshot snapshot, HeapTuple heapTuple, - bool *all_dead, bool first_call) -{ - Page dp = (Page) BufferGetPage(buffer); - TransactionId prev_xmax = InvalidTransactionId; - OffsetNumber offnum; - bool at_chain_start; - bool valid; - bool skip; - - /* If this is not the first call, previous call returned a (live!) tuple */ - if (all_dead) - *all_dead = first_call; - - Assert(TransactionIdIsValid(RecentGlobalXmin)); - - Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer)); - offnum = ItemPointerGetOffsetNumber(tid); - at_chain_start = first_call; - skip = !first_call; - - heapTuple->t_self = *tid; - - /* Scan through possible multiple members of HOT-chain */ - for (;;) - { - ItemId lp; - - /* check for bogus TID */ - if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) - break; - - lp = PageGetItemId(dp, offnum); - - /* check for unused, dead, or redirected items */ - if (!ItemIdIsNormal(lp)) - { - /* We should only see a redirect at start of chain */ - if (ItemIdIsRedirected(lp) && at_chain_start) - { - /* Follow the redirect */ - offnum = ItemIdGetRedirect(lp); - at_chain_start = false; - continue; - } - /* else must be end of chain */ - break; - } - - heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); - heapTuple->t_len = ItemIdGetLength(lp); - heapTuple->t_tableOid = RelationGetRelid(relation); - ItemPointerSetOffsetNumber(&heapTuple->t_self, offnum); - - /* - * Shouldn't see a HEAP_ONLY tuple at chain start. - */ - if (at_chain_start && HeapTupleIsHeapOnly(heapTuple)) - break; - - /* - * The xmin should match the previous xmax value, else chain is - * broken. - */ - if (TransactionIdIsValid(prev_xmax) && - !TransactionIdEquals(prev_xmax, - HeapTupleHeaderGetXmin(heapTuple->t_data))) - break; - - /* - * When first_call is true (and thus, skip is initially false) we'll - * return the first tuple we find. But on later passes, heapTuple - * will initially be pointing to the tuple we returned last time. - * Returning it again would be incorrect (and would loop forever), so - * we skip it and return the next match we find. - */ - if (!skip) - { - /* - * For the benefit of logical decoding, have t_self point at the - * element of the HOT chain we're currently investigating instead - * of the root tuple of the HOT chain. This is important because - * the *Satisfies routine for historical mvcc snapshots needs the - * correct tid to decide about the visibility in some cases. - */ - ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum); - - /* If it's visible per the snapshot, we must return it */ - valid = HeapTupleSatisfiesVisibility(relation->rd_stamroutine, heapTuple, snapshot, buffer); - CheckForSerializableConflictOut(valid, relation, heapTuple, - buffer, snapshot); - /* reset to original, non-redirected, tid */ - heapTuple->t_self = *tid; - - if (valid) - { - ItemPointerSetOffsetNumber(tid, offnum); - PredicateLockTuple(relation, heapTuple, snapshot); - if (all_dead) - *all_dead = false; - return true; - } - } - skip = false; - - /* - * If we can't see it, maybe no one else can either. At caller - * request, check whether all chain members are dead to all - * transactions. - * - * Note: if you change the criterion here for what is "dead", fix the - * planner's get_actual_variable_range() function to match. - */ - if (all_dead && *all_dead && - !HeapTupleIsSurelyDead(heapTuple, RecentGlobalXmin)) - *all_dead = false; - - /* - * Check to see if HOT chain continues past this tuple; if so fetch - * the next offnum and loop around. - */ - if (HeapTupleIsHotUpdated(heapTuple)) - { - Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) == - ItemPointerGetBlockNumber(tid)); - offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); - at_chain_start = false; - prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); - } - else - break; /* end of chain */ - } - - return false; -} - -/* - * heap_hot_search - search HOT chain for tuple satisfying snapshot - * - * This has the same API as heap_hot_search_buffer, except that the caller - * does not provide the buffer containing the page, rather we access it - * locally. - */ -bool -heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot, - bool *all_dead) -{ - bool result; - Buffer buffer; - HeapTupleData heapTuple; - - buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); - LockBuffer(buffer, BUFFER_LOCK_SHARE); - result = heap_hot_search_buffer(tid, relation, buffer, snapshot, - &heapTuple, all_dead, true); - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buffer); - return result; -} - - /* * UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends * @@ -4762,32 +3420,6 @@ heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) tuple->t_infomask2 = frz->t_infomask2; } -/* - * heap_freeze_tuple - * Freeze tuple in place, without WAL logging. - * - * Useful for callers like CLUSTER that perform their own WAL logging. - */ -bool -heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, - TransactionId cutoff_multi) -{ - xl_heap_freeze_tuple frz; - bool do_freeze; - bool tuple_totally_frozen; - - do_freeze = heap_prepare_freeze_tuple(tuple, cutoff_xid, cutoff_multi, - &frz, &tuple_totally_frozen); - - /* - * Note that because this is not a WAL-logged operation, we don't need to - * fill in the offset in the freeze record. - */ - - if (do_freeze) - heap_execute_freeze_tuple(tuple, &frz); - return do_freeze; -} /* * For a given MultiXactId, return the hint bits that should be set in the diff --git a/src/backend/access/heap/heapam_storage.c b/src/backend/access/heap/heapam_storage.c index a0e3272..12a8f56 100644 --- a/src/backend/access/heap/heapam_storage.c +++ b/src/backend/access/heap/heapam_storage.c @@ -1666,6 +1666,1094 @@ HeapTupleSatisfiesHistoricMVCC(StorageTuple stup, Snapshot snapshot, return true; } +/* ---------------------------------------------------------------- + * heap support routines + * ---------------------------------------------------------------- + */ + +/* ---------------- + * heap_parallelscan_startblock_init - find and set the scan's startblock + * + * Determine where the parallel seq scan should start. This function may + * be called many times, once by each parallel worker. We must be careful + * only to set the startblock once. + * ---------------- + */ +static void +heap_parallelscan_startblock_init(HeapScanDesc scan) +{ + BlockNumber sync_startpage = InvalidBlockNumber; + ParallelHeapScanDesc parallel_scan; + + Assert(scan->rs_parallel); + parallel_scan = scan->rs_parallel; + +retry: + /* Grab the spinlock. */ + SpinLockAcquire(¶llel_scan->phs_mutex); + + /* + * If the scan's startblock has not yet been initialized, we must do so + * now. If this is not a synchronized scan, we just start at block 0, but + * if it is a synchronized scan, we must get the starting position from + * the synchronized scan machinery. We can't hold the spinlock while + * doing that, though, so release the spinlock, get the information we + * need, and retry. If nobody else has initialized the scan in the + * meantime, we'll fill in the value we fetched on the second time + * through. + */ + if (parallel_scan->phs_startblock == InvalidBlockNumber) + { + if (!parallel_scan->phs_syncscan) + parallel_scan->phs_startblock = 0; + else if (sync_startpage != InvalidBlockNumber) + parallel_scan->phs_startblock = sync_startpage; + else + { + SpinLockRelease(¶llel_scan->phs_mutex); + sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks); + goto retry; + } + } + SpinLockRelease(¶llel_scan->phs_mutex); +} + +/* ---------------- + * heap_parallelscan_nextpage - get the next page to scan + * + * Get the next page to scan. Even if there are no pages left to scan, + * another backend could have grabbed a page to scan and not yet finished + * looking at it, so it doesn't follow that the scan is done when the + * first backend gets an InvalidBlockNumber return. + * ---------------- + */ +static BlockNumber +heap_parallelscan_nextpage(HeapScanDesc scan) +{ + BlockNumber page; + ParallelHeapScanDesc parallel_scan; + uint64 nallocated; + + Assert(scan->rs_parallel); + parallel_scan = scan->rs_parallel; + + /* + * phs_nallocated tracks how many pages have been allocated to workers + * already. When phs_nallocated >= rs_nblocks, all blocks have been + * allocated. + * + * Because we use an atomic fetch-and-add to fetch the current value, the + * phs_nallocated counter will exceed rs_nblocks, because workers will + * still increment the value, when they try to allocate the next block but + * all blocks have been allocated already. The counter must be 64 bits + * wide because of that, to avoid wrapping around when rs_nblocks is close + * to 2^32. + * + * The actual page to return is calculated by adding the counter to the + * starting block number, modulo nblocks. + */ + nallocated = pg_atomic_fetch_add_u64(¶llel_scan->phs_nallocated, 1); + if (nallocated >= scan->rs_nblocks) + page = InvalidBlockNumber; /* all blocks have been allocated */ + else + page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks; + + /* + * Report scan location. Normally, we report the current page number. + * When we reach the end of the scan, though, we report the starting page, + * not the ending page, just so the starting positions for later scans + * doesn't slew backwards. We only report the position at the end of the + * scan once, though: subsequent callers will report nothing. + */ + if (scan->rs_syncscan) + { + if (page != InvalidBlockNumber) + ss_report_location(scan->rs_rd, page); + else if (nallocated == scan->rs_nblocks) + ss_report_location(scan->rs_rd, parallel_scan->phs_startblock); + } + + return page; +} + + +/* ---------------- + * initscan - scan code common to heap_beginscan and heap_rescan + * ---------------- + */ +static void +initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) +{ + bool allow_strat; + bool allow_sync; + + /* + * Determine the number of blocks we have to scan. + * + * It is sufficient to do this once at scan start, since any tuples added + * while the scan is in progress will be invisible to my snapshot anyway. + * (That is not true when using a non-MVCC snapshot. However, we couldn't + * guarantee to return tuples added after scan start anyway, since they + * might go into pages we already scanned. To guarantee consistent + * results for a non-MVCC snapshot, the caller must hold some higher-level + * lock that ensures the interesting tuple(s) won't change.) + */ + if (scan->rs_parallel != NULL) + scan->rs_nblocks = scan->rs_parallel->phs_nblocks; + else + scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd); + + /* + * If the table is large relative to NBuffers, use a bulk-read access + * strategy and enable synchronized scanning (see syncscan.c). Although + * the thresholds for these features could be different, we make them the + * same so that there are only two behaviors to tune rather than four. + * (However, some callers need to be able to disable one or both of these + * behaviors, independently of the size of the table; also there is a GUC + * variable that can disable synchronized scanning.) + * + * Note that heap_parallelscan_initialize has a very similar test; if you + * change this, consider changing that one, too. + */ + if (!RelationUsesLocalBuffers(scan->rs_rd) && + scan->rs_nblocks > NBuffers / 4) + { + allow_strat = scan->rs_allow_strat; + allow_sync = scan->rs_allow_sync; + } + else + allow_strat = allow_sync = false; + + if (allow_strat) + { + /* During a rescan, keep the previous strategy object. */ + if (scan->rs_strategy == NULL) + scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD); + } + else + { + if (scan->rs_strategy != NULL) + FreeAccessStrategy(scan->rs_strategy); + scan->rs_strategy = NULL; + } + + if (scan->rs_parallel != NULL) + { + /* For parallel scan, believe whatever ParallelHeapScanDesc says. */ + scan->rs_syncscan = scan->rs_parallel->phs_syncscan; + } + else if (keep_startblock) + { + /* + * When rescanning, we want to keep the previous startblock setting, + * so that rewinding a cursor doesn't generate surprising results. + * Reset the active syncscan setting, though. + */ + scan->rs_syncscan = (allow_sync && synchronize_seqscans); + } + else if (allow_sync && synchronize_seqscans) + { + scan->rs_syncscan = true; + scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks); + } + else + { + scan->rs_syncscan = false; + scan->rs_startblock = 0; + } + + scan->rs_numblocks = InvalidBlockNumber; + scan->rs_inited = false; + scan->rs_ctup.t_data = NULL; + ItemPointerSetInvalid(&scan->rs_ctup.t_self); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + + /* page-at-a-time fields are always invalid when not rs_inited */ + + /* + * copy the scan key, if appropriate + */ + if (key != NULL) + memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData)); + + /* + * Currently, we don't have a stats counter for bitmap heap scans (but the + * underlying bitmap index scans will be counted) or sample scans (we only + * update stats for tuple fetches there) + */ + if (!scan->rs_bitmapscan && !scan->rs_samplescan) + pgstat_count_heap_scan(scan->rs_rd); +} + + +/* ---------------- + * heapgettup - fetch next heap tuple + * + * Initialize the scan if not already done; then advance to the next + * tuple as indicated by "dir"; return the next tuple in scan->rs_ctup, + * or set scan->rs_ctup.t_data = NULL if no more tuples. + * + * dir == NoMovementScanDirection means "re-fetch the tuple indicated + * by scan->rs_ctup". + * + * Note: the reason nkeys/key are passed separately, even though they are + * kept in the scan descriptor, is that the caller may not want us to check + * the scankeys. + * + * Note: when we fall off the end of the scan in either direction, we + * reset rs_inited. This means that a further request with the same + * scan direction will restart the scan, which is a bit odd, but a + * request with the opposite scan direction will start a fresh scan + * in the proper direction. The latter is required behavior for cursors, + * while the former case is generally undefined behavior in Postgres + * so we don't care too much. + * ---------------- + */ +static void +heapgettup(HeapScanDesc scan, + ScanDirection dir, + int nkeys, + ScanKey key) +{ + HeapTuple tuple = &(scan->rs_ctup); + Snapshot snapshot = scan->rs_snapshot; + bool backward = ScanDirectionIsBackward(dir); + BlockNumber page; + bool finished; + Page dp; + int lines; + OffsetNumber lineoff; + int linesleft; + ItemId lpp; + + /* + * calculate next starting lineoff, given scan direction + */ + if (ScanDirectionIsForward(dir)) + { + if (!scan->rs_inited) + { + /* + * return null immediately if relation is empty + */ + if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + if (scan->rs_parallel != NULL) + { + heap_parallelscan_startblock_init(scan); + + page = heap_parallelscan_nextpage(scan); + + /* Other processes might have already finished the scan. */ + if (page == InvalidBlockNumber) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + } + else + page = scan->rs_startblock; /* first page */ + heapgetpage(scan, page); + lineoff = FirstOffsetNumber; /* first offnum */ + scan->rs_inited = true; + } + else + { + /* continue from previously returned page/tuple */ + page = scan->rs_cblock; /* current page */ + lineoff = /* next offnum */ + OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self))); + } + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(snapshot, scan->rs_rd, dp); + lines = PageGetMaxOffsetNumber(dp); + /* page and lineoff now reference the physically next tid */ + + linesleft = lines - lineoff + 1; + } + else if (backward) + { + /* backward parallel scan not supported */ + Assert(scan->rs_parallel == NULL); + + if (!scan->rs_inited) + { + /* + * return null immediately if relation is empty + */ + if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + + /* + * Disable reporting to syncscan logic in a backwards scan; it's + * not very likely anyone else is doing the same thing at the same + * time, and much more likely that we'll just bollix things for + * forward scanners. + */ + scan->rs_syncscan = false; + /* start from last page of the scan */ + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; + heapgetpage(scan, page); + } + else + { + /* continue from previously returned page/tuple */ + page = scan->rs_cblock; /* current page */ + } + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(snapshot, scan->rs_rd, dp); + lines = PageGetMaxOffsetNumber(dp); + + if (!scan->rs_inited) + { + lineoff = lines; /* final offnum */ + scan->rs_inited = true; + } + else + { + lineoff = /* previous offnum */ + OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self))); + } + /* page and lineoff now reference the physically previous tid */ + + linesleft = lineoff; + } + else + { + /* + * ``no movement'' scan direction: refetch prior tuple + */ + if (!scan->rs_inited) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + + page = ItemPointerGetBlockNumber(&(tuple->t_self)); + if (page != scan->rs_cblock) + heapgetpage(scan, page); + + /* Since the tuple was previously fetched, needn't lock page here */ + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(snapshot, scan->rs_rd, dp); + lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); + lpp = PageGetItemId(dp, lineoff); + Assert(ItemIdIsNormal(lpp)); + + tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); + tuple->t_len = ItemIdGetLength(lpp); + + return; + } + + /* + * advance the scan until we find a qualifying tuple or run out of stuff + * to scan + */ + lpp = PageGetItemId(dp, lineoff); + for (;;) + { + while (linesleft > 0) + { + if (ItemIdIsNormal(lpp)) + { + bool valid; + + tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); + tuple->t_len = ItemIdGetLength(lpp); + ItemPointerSet(&(tuple->t_self), page, lineoff); + + /* + * if current tuple qualifies, return it. + */ + valid = HeapTupleSatisfiesVisibility(scan->rs_rd->rd_stamroutine, + tuple, + snapshot, + scan->rs_cbuf); + + CheckForSerializableConflictOut(valid, scan->rs_rd, tuple, + scan->rs_cbuf, snapshot); + + if (valid && key != NULL) + HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), + nkeys, key, valid); + + if (valid) + { + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + return; + } + } + + /* + * otherwise move to the next item on the page + */ + --linesleft; + if (backward) + { + --lpp; /* move back in this page's ItemId array */ + --lineoff; + } + else + { + ++lpp; /* move forward in this page's ItemId array */ + ++lineoff; + } + } + + /* + * if we get here, it means we've exhausted the items on this page and + * it's time to move to the next. + */ + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + /* + * advance to next/prior page and detect end of scan + */ + if (backward) + { + finished = (page == scan->rs_startblock) || + (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + if (page == 0) + page = scan->rs_nblocks; + page--; + } + else if (scan->rs_parallel != NULL) + { + page = heap_parallelscan_nextpage(scan); + finished = (page == InvalidBlockNumber); + } + else + { + page++; + if (page >= scan->rs_nblocks) + page = 0; + finished = (page == scan->rs_startblock) || + (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + + /* + * Report our new scan position for synchronization purposes. We + * don't do that when moving backwards, however. That would just + * mess up any other forward-moving scanners. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, page); + } + + /* + * return NULL if we've exhausted all the pages + */ + if (finished) + { + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + tuple->t_data = NULL; + scan->rs_inited = false; + return; + } + + heapgetpage(scan, page); + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(snapshot, scan->rs_rd, dp); + lines = PageGetMaxOffsetNumber((Page) dp); + linesleft = lines; + if (backward) + { + lineoff = lines; + lpp = PageGetItemId(dp, lines); + } + else + { + lineoff = FirstOffsetNumber; + lpp = PageGetItemId(dp, FirstOffsetNumber); + } + } +} + +/* ---------------- + * heapgettup_pagemode - fetch next heap tuple in page-at-a-time mode + * + * Same API as heapgettup, but used in page-at-a-time mode + * + * The internal logic is much the same as heapgettup's too, but there are some + * differences: we do not take the buffer content lock (that only needs to + * happen inside heapgetpage), and we iterate through just the tuples listed + * in rs_vistuples[] rather than all tuples on the page. Notice that + * lineindex is 0-based, where the corresponding loop variable lineoff in + * heapgettup is 1-based. + * ---------------- + */ +static void +heapgettup_pagemode(HeapScanDesc scan, + ScanDirection dir, + int nkeys, + ScanKey key) +{ + HeapTuple tuple = &(scan->rs_ctup); + bool backward = ScanDirectionIsBackward(dir); + BlockNumber page; + bool finished; + Page dp; + int lines; + int lineindex; + OffsetNumber lineoff; + int linesleft; + ItemId lpp; + + /* + * calculate next starting lineindex, given scan direction + */ + if (ScanDirectionIsForward(dir)) + { + if (!scan->rs_inited) + { + /* + * return null immediately if relation is empty + */ + if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + if (scan->rs_parallel != NULL) + { + heap_parallelscan_startblock_init(scan); + + page = heap_parallelscan_nextpage(scan); + + /* Other processes might have already finished the scan. */ + if (page == InvalidBlockNumber) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + } + else + page = scan->rs_startblock; /* first page */ + heapgetpage(scan, page); + lineindex = 0; + scan->rs_inited = true; + } + else + { + /* continue from previously returned page/tuple */ + page = scan->rs_cblock; /* current page */ + lineindex = scan->rs_cindex + 1; + } + + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + lines = scan->rs_ntuples; + /* page and lineindex now reference the next visible tid */ + + linesleft = lines - lineindex; + } + else if (backward) + { + /* backward parallel scan not supported */ + Assert(scan->rs_parallel == NULL); + + if (!scan->rs_inited) + { + /* + * return null immediately if relation is empty + */ + if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + + /* + * Disable reporting to syncscan logic in a backwards scan; it's + * not very likely anyone else is doing the same thing at the same + * time, and much more likely that we'll just bollix things for + * forward scanners. + */ + scan->rs_syncscan = false; + /* start from last page of the scan */ + if (scan->rs_startblock > 0) + page = scan->rs_startblock - 1; + else + page = scan->rs_nblocks - 1; + heapgetpage(scan, page); + } + else + { + /* continue from previously returned page/tuple */ + page = scan->rs_cblock; /* current page */ + } + + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + lines = scan->rs_ntuples; + + if (!scan->rs_inited) + { + lineindex = lines - 1; + scan->rs_inited = true; + } + else + { + lineindex = scan->rs_cindex - 1; + } + /* page and lineindex now reference the previous visible tid */ + + linesleft = lineindex + 1; + } + else + { + /* + * ``no movement'' scan direction: refetch prior tuple + */ + if (!scan->rs_inited) + { + Assert(!BufferIsValid(scan->rs_cbuf)); + tuple->t_data = NULL; + return; + } + + page = ItemPointerGetBlockNumber(&(tuple->t_self)); + if (page != scan->rs_cblock) + heapgetpage(scan, page); + + /* Since the tuple was previously fetched, needn't lock page here */ + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); + lpp = PageGetItemId(dp, lineoff); + Assert(ItemIdIsNormal(lpp)); + + tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); + tuple->t_len = ItemIdGetLength(lpp); + + /* check that rs_cindex is in sync */ + Assert(scan->rs_cindex < scan->rs_ntuples); + Assert(lineoff == scan->rs_vistuples[scan->rs_cindex]); + + return; + } + + /* + * advance the scan until we find a qualifying tuple or run out of stuff + * to scan + */ + for (;;) + { + while (linesleft > 0) + { + lineoff = scan->rs_vistuples[lineindex]; + lpp = PageGetItemId(dp, lineoff); + Assert(ItemIdIsNormal(lpp)); + + tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); + tuple->t_len = ItemIdGetLength(lpp); + ItemPointerSet(&(tuple->t_self), page, lineoff); + + /* + * if current tuple qualifies, return it. + */ + if (key != NULL) + { + bool valid; + + HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), + nkeys, key, valid); + if (valid) + { + scan->rs_cindex = lineindex; + return; + } + } + else + { + scan->rs_cindex = lineindex; + return; + } + + /* + * otherwise move to the next item on the page + */ + --linesleft; + if (backward) + --lineindex; + else + ++lineindex; + } + + /* + * if we get here, it means we've exhausted the items on this page and + * it's time to move to the next. + */ + if (backward) + { + finished = (page == scan->rs_startblock) || + (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + if (page == 0) + page = scan->rs_nblocks; + page--; + } + else if (scan->rs_parallel != NULL) + { + page = heap_parallelscan_nextpage(scan); + finished = (page == InvalidBlockNumber); + } + else + { + page++; + if (page >= scan->rs_nblocks) + page = 0; + finished = (page == scan->rs_startblock) || + (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + + /* + * Report our new scan position for synchronization purposes. We + * don't do that when moving backwards, however. That would just + * mess up any other forward-moving scanners. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_syncscan) + ss_report_location(scan->rs_rd, page); + } + + /* + * return NULL if we've exhausted all the pages + */ + if (finished) + { + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + tuple->t_data = NULL; + scan->rs_inited = false; + return; + } + + heapgetpage(scan, page); + + dp = BufferGetPage(scan->rs_cbuf); + TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + lines = scan->rs_ntuples; + linesleft = lines; + if (backward) + lineindex = lines - 1; + else + lineindex = 0; + } +} + + +static HeapScanDesc +heapam_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelHeapScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap) +{ + HeapScanDesc scan; + + /* + * increment relation ref count while scanning relation + * + * This is just to make really sure the relcache entry won't go away while + * the scan has a pointer to it. Caller should be holding the rel open + * anyway, so this is redundant in all normal scenarios... + */ + RelationIncrementReferenceCount(relation); + + /* + * allocate and initialize scan descriptor + */ + scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData)); + + scan->rs_rd = relation; + scan->rs_snapshot = snapshot; + scan->rs_nkeys = nkeys; + scan->rs_bitmapscan = is_bitmapscan; + scan->rs_samplescan = is_samplescan; + scan->rs_strategy = NULL; /* set in initscan */ + scan->rs_allow_strat = allow_strat; + scan->rs_allow_sync = allow_sync; + scan->rs_temp_snap = temp_snap; + scan->rs_parallel = parallel_scan; + + /* + * we can use page-at-a-time mode if it's an MVCC-safe snapshot + */ + scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot); + + /* + * For a seqscan in a serializable transaction, acquire a predicate lock + * on the entire relation. This is required not only to lock all the + * matching tuples, but also to conflict with new insertions into the + * table. In an indexscan, we take page locks on the index pages covering + * the range specified in the scan qual, but in a heap scan there is + * nothing more fine-grained to lock. A bitmap scan is a different story, + * there we have already scanned the index and locked the index pages + * covering the predicate. But in that case we still have to lock any + * matching heap tuples. + */ + if (!is_bitmapscan) + PredicateLockRelation(relation, snapshot); + + /* we only need to set this up once */ + scan->rs_ctup.t_tableOid = RelationGetRelid(relation); + + /* + * we do this here instead of in initscan() because heap_rescan also calls + * initscan() and we don't want to allocate memory again + */ + if (nkeys > 0) + scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); + else + scan->rs_key = NULL; + + initscan(scan, key, false); + + return scan; +} + +/* ---------------- + * heapam_rescan - restart a relation scan + * ---------------- + */ +static void +heapam_rescan(HeapScanDesc scan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + if (set_params) + { + scan->rs_allow_strat = allow_strat; + scan->rs_allow_sync = allow_sync; + scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot); + } + + /* + * unpin scan buffers + */ + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + + /* + * reinitialize scan descriptor + */ + initscan(scan, key, true); + + /* + * reset parallel scan, if present + */ + if (scan->rs_parallel != NULL) + { + ParallelHeapScanDesc parallel_scan; + + /* + * Caller is responsible for making sure that all workers have + * finished the scan before calling this. + */ + parallel_scan = scan->rs_parallel; + pg_atomic_write_u64(¶llel_scan->phs_nallocated, 0); + } +} + +/* ---------------- + * heapam_endscan - end relation scan + * + * See how to integrate with index scans. + * Check handling if reldesc caching. + * ---------------- + */ +static void +heapam_endscan(HeapScanDesc scan) +{ + /* Note: no locking manipulations needed */ + + /* + * unpin scan buffers + */ + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + + /* + * decrement relation reference count and free scan descriptor storage + */ + RelationDecrementReferenceCount(scan->rs_rd); + + if (scan->rs_key) + pfree(scan->rs_key); + + if (scan->rs_strategy != NULL) + FreeAccessStrategy(scan->rs_strategy); + + if (scan->rs_temp_snap) + UnregisterSnapshot(scan->rs_snapshot); + + pfree(scan); +} + +/* ---------------- + * heapam_scan_update_snapshot + * + * Update snapshot info in heap scan descriptor. + * ---------------- + */ +static void +heapam_scan_update_snapshot(HeapScanDesc scan, Snapshot snapshot) +{ + Assert(IsMVCCSnapshot(snapshot)); + + RegisterSnapshot(snapshot); + scan->rs_snapshot = snapshot; + scan->rs_temp_snap = true; +} + +/* ---------------- + * heapam_getnext - retrieve next tuple in scan + * + * Fix to work with index relations. + * We don't return the buffer anymore, but you can get it from the + * returned HeapTuple. + * ---------------- + */ + +#ifdef HEAPAMDEBUGALL +#define HEAPAMDEBUG_1 \ + elog(DEBUG2, "heapam_getnext([%s,nkeys=%d],dir=%d) called", \ + RelationGetRelationName(scan->rs_rd), scan->rs_nkeys, (int) direction) +#define HEAPAMDEBUG_2 \ + elog(DEBUG2, "heapam_getnext returning EOS") +#define HEAPAMDEBUG_3 \ + elog(DEBUG2, "heapam_getnext returning tuple") +#else +#define HEAPAMDEBUG_1 +#define HEAPAMDEBUG_2 +#define HEAPAMDEBUG_3 +#endif /* !defined(HEAPDEBUGALL) */ + + +static StorageTuple +heapam_getnext(HeapScanDesc sscan, ScanDirection direction) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + + /* Note: no locking manipulations needed */ + + HEAPAMDEBUG_1; /* heap_getnext( info ) */ + + if (scan->rs_pageatatime) + heapgettup_pagemode(scan, direction, + scan->rs_nkeys, scan->rs_key); + else + heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key); + + if (scan->rs_ctup.t_data == NULL) + { + HEAPAMDEBUG_2; /* heap_getnext returning EOS */ + return NULL; + } + + /* + * if we get here it means we have a new current scan tuple, so point to + * the proper return buffer and return the tuple. + */ + HEAPAMDEBUG_3; /* heap_getnext returning tuple */ + + pgstat_count_heap_getnext(scan->rs_rd); + + return &(scan->rs_ctup); +} + +#ifdef HEAPAMSLOTDEBUGALL +#define HEAPAMSLOTDEBUG_1 \ + elog(DEBUG2, "heapam_getnext([%s,nkeys=%d],dir=%d) called", \ + RelationGetRelationName(scan->rs_rd), scan->rs_nkeys, (int) direction) +#define HEAPAMSLOTDEBUG_2 \ + elog(DEBUG2, "heapam_getnext returning EOS") +#define HEAPAMSLOTDEBUG_3 \ + elog(DEBUG2, "heapam_getnext returning tuple") +#else +#define HEAPAMSLOTDEBUG_1 +#define HEAPAMSLOTDEBUG_2 +#define HEAPAMSLOTDEBUG_3 +#endif + +static TupleTableSlot * +heapam_getnextslot(HeapScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + + /* Note: no locking manipulations needed */ + + HEAPAMSLOTDEBUG_1; /* heap_getnext( info ) */ + + if (scan->rs_pageatatime) + heapgettup_pagemode(scan, direction, + scan->rs_nkeys, scan->rs_key); + else + heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key); + + if (scan->rs_ctup.t_data == NULL) + { + HEAPAMSLOTDEBUG_2; /* heap_getnext returning EOS */ + ExecClearTuple(slot); + return slot; + } + + /* + * if we get here it means we have a new current scan tuple, so point to + * the proper return buffer and return the tuple. + */ + HEAPAMSLOTDEBUG_3; /* heap_getnext returning tuple */ + + pgstat_count_heap_getnext(scan->rs_rd); + return ExecStoreTuple(heap_copytuple(&(scan->rs_ctup)), + slot, InvalidBuffer, true); +} /* * heap_fetch - retrieve tuple with given tid @@ -1818,9 +2906,210 @@ heap_fetch(Relation relation, return false; } +/* + * heapam_hot_search_buffer - search HOT chain for tuple satisfying snapshot + * + * On entry, *tid is the TID of a tuple (either a simple tuple, or the root + * of a HOT chain), and buffer is the buffer holding this tuple. We search + * for the first chain member satisfying the given snapshot. If one is + * found, we update *tid to reference that tuple's offset number, and + * return TRUE. If no match, return FALSE without modifying *tid. + * + * heapTuple is a caller-supplied buffer. When a match is found, we return + * the tuple here, in addition to updating *tid. If no match is found, the + * contents of this buffer on return are undefined. + * + * If all_dead is not NULL, we check non-visible tuples to see if they are + * globally dead; *all_dead is set TRUE if all members of the HOT chain + * are vacuumable, FALSE if not. + * + * Unlike heap_fetch, the caller must already have pin and (at least) share + * lock on the buffer; it is still pinned/locked at exit. Also unlike + * heap_fetch, we do not report any pgstats count; caller may do so if wanted. + */ +static bool +heapam_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, + Snapshot snapshot, HeapTuple heapTuple, + bool *all_dead, bool first_call) +{ + Page dp = (Page) BufferGetPage(buffer); + TransactionId prev_xmax = InvalidTransactionId; + OffsetNumber offnum; + bool at_chain_start; + bool valid; + bool skip; + + /* If this is not the first call, previous call returned a (live!) tuple */ + if (all_dead) + *all_dead = first_call; + + Assert(TransactionIdIsValid(RecentGlobalXmin)); + + Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer)); + offnum = ItemPointerGetOffsetNumber(tid); + at_chain_start = first_call; + skip = !first_call; + + heapTuple->t_self = *tid; + /* Scan through possible multiple members of HOT-chain */ + for (;;) + { + ItemId lp; + /* check for bogus TID */ + if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp)) + break; + lp = PageGetItemId(dp, offnum); + + /* check for unused, dead, or redirected items */ + if (!ItemIdIsNormal(lp)) + { + /* We should only see a redirect at start of chain */ + if (ItemIdIsRedirected(lp) && at_chain_start) + { + /* Follow the redirect */ + offnum = ItemIdGetRedirect(lp); + at_chain_start = false; + continue; + } + /* else must be end of chain */ + break; + } + + heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); + heapTuple->t_len = ItemIdGetLength(lp); + heapTuple->t_tableOid = RelationGetRelid(relation); + ItemPointerSetOffsetNumber(&heapTuple->t_self, offnum); + + /* + * Shouldn't see a HEAP_ONLY tuple at chain start. + */ + if (at_chain_start && HeapTupleIsHeapOnly(heapTuple)) + break; + + /* + * The xmin should match the previous xmax value, else chain is + * broken. + */ + if (TransactionIdIsValid(prev_xmax) && + !TransactionIdEquals(prev_xmax, + HeapTupleHeaderGetXmin(heapTuple->t_data))) + break; + + /* + * When first_call is true (and thus, skip is initially false) we'll + * return the first tuple we find. But on later passes, heapTuple + * will initially be pointing to the tuple we returned last time. + * Returning it again would be incorrect (and would loop forever), so + * we skip it and return the next match we find. + */ + if (!skip) + { + /* + * For the benefit of logical decoding, have t_self point at the + * element of the HOT chain we're currently investigating instead + * of the root tuple of the HOT chain. This is important because + * the *Satisfies routine for historical mvcc snapshots needs the + * correct tid to decide about the visibility in some cases. + */ + ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum); + + /* If it's visible per the snapshot, we must return it */ + valid = HeapTupleSatisfiesVisibility(relation->rd_stamroutine, heapTuple, snapshot, buffer); + CheckForSerializableConflictOut(valid, relation, heapTuple, + buffer, snapshot); + /* reset to original, non-redirected, tid */ + heapTuple->t_self = *tid; + + if (valid) + { + ItemPointerSetOffsetNumber(tid, offnum); + PredicateLockTuple(relation, heapTuple, snapshot); + if (all_dead) + *all_dead = false; + return true; + } + } + skip = false; + + /* + * If we can't see it, maybe no one else can either. At caller + * request, check whether all chain members are dead to all + * transactions. + */ + if (all_dead && *all_dead && + !HeapTupleIsSurelyDead(heapTuple, RecentGlobalXmin)) + *all_dead = false; + + /* + * Check to see if HOT chain continues past this tuple; if so fetch + * the next offnum and loop around. + */ + if (HeapTupleIsHotUpdated(heapTuple)) + { + Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) == + ItemPointerGetBlockNumber(tid)); + offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); + at_chain_start = false; + prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + } + else + break; /* end of chain */ + } + + return false; +} + +/* + * heapam_setscanlimits - restrict range of a heapscan + * + * startBlk is the page to start at + * numBlks is number of pages to scan (InvalidBlockNumber means "all") + */ +static void +heapam_setscanlimits(HeapScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + + Assert(!scan->rs_inited); /* else too late to change */ + Assert(!scan->rs_syncscan); /* else rs_startblock is significant */ + + /* Check startBlk is valid (but allow case of zero blocks...) */ + Assert(startBlk == 0 || startBlk < scan->rs_nblocks); + + scan->rs_startblock = startBlk; + scan->rs_numblocks = numBlks; +} + + +/* + * heapam_freeze_tuple + * Freeze tuple in place, without WAL logging. + * + * Useful for callers like CLUSTER that perform their own WAL logging. + */ +static bool +heapam_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, + TransactionId cutoff_multi) +{ + xl_heap_freeze_tuple frz; + bool do_freeze; + bool tuple_totally_frozen; + + do_freeze = heap_prepare_freeze_tuple(tuple, cutoff_xid, cutoff_multi, + &frz, &tuple_totally_frozen); + + /* + * Note that because this is not a WAL-logged operation, we don't need to + * fill in the offset in the freeze record. + */ + + if (do_freeze) + heap_execute_freeze_tuple(tuple, &frz); + return do_freeze; +} /* ---------------------------------------------------------------- * storage AM support routines for heapam @@ -3793,6 +5082,16 @@ heapam_storage_handler(PG_FUNCTION_ARGS) { StorageAmRoutine *amroutine = makeNode(StorageAmRoutine); + amroutine->scan_begin = heapam_beginscan; + amroutine->scansetlimits = heapam_setscanlimits; + amroutine->scan_getnext = heapam_getnext; + amroutine->scan_getnextslot = heapam_getnextslot; + amroutine->scan_end = heapam_endscan; + amroutine->scan_rescan = heapam_rescan; + amroutine->scan_update_snapshot = heapam_scan_update_snapshot; + amroutine->tuple_freeze = heapam_freeze_tuple; + amroutine->hot_search_buffer = heapam_hot_search_buffer; + amroutine->tuple_fetch = heapam_fetch; amroutine->tuple_insert = heapam_heap_insert; amroutine->tuple_delete = heapam_heap_delete; diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 8fba61c..a475a85 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -409,7 +409,7 @@ rewrite_heap_tuple(RewriteState state, * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ - heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid, + storage_freeze_tuple(state->rs_new_rel, new_tuple->t_data, state->rs_freeze_xid, state->rs_cutoff_multi); /* diff --git a/src/backend/access/heap/storageam.c b/src/backend/access/heap/storageam.c index d1d7364..76b94dc 100644 --- a/src/backend/access/heap/storageam.c +++ b/src/backend/access/heap/storageam.c @@ -48,6 +48,174 @@ #include "utils/tqual.h" +/* ---------------- + * heap_beginscan_parallel - join a parallel scan + * + * Caller must hold a suitable lock on the correct relation. + * ---------------- + */ +HeapScanDesc +storage_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan) +{ + Snapshot snapshot; + + Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); + snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data); + RegisterSnapshot(snapshot); + + return relation->rd_stamroutine->scan_begin(relation, snapshot, 0, NULL, parallel_scan, + true, true, true, false, false, true); +} + +/* + * heap_setscanlimits - restrict range of a heapscan + * + * startBlk is the page to start at + * numBlks is number of pages to scan (InvalidBlockNumber means "all") + */ +void +storage_setscanlimits(HeapScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks) +{ + sscan->rs_rd->rd_stamroutine->scansetlimits(sscan, startBlk, numBlks); +} + + +/* ---------------- + * heap_beginscan - begin relation scan + * + * heap_beginscan is the "standard" case. + * + * heap_beginscan_catalog differs in setting up its own temporary snapshot. + * + * heap_beginscan_strat offers an extended API that lets the caller control + * whether a nondefault buffer access strategy can be used, and whether + * syncscan can be chosen (possibly resulting in the scan not starting from + * block zero). Both of these default to TRUE with plain heap_beginscan. + * + * heap_beginscan_bm is an alternative entry point for setting up a + * HeapScanDesc for a bitmap heap scan. Although that scan technology is + * really quite unlike a standard seqscan, there is just enough commonality + * to make it worth using the same data structure. + * + * heap_beginscan_sampling is an alternative entry point for setting up a + * HeapScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth + * using the same data structure although the behavior is rather different. + * In addition to the options offered by heap_beginscan_strat, this call + * also allows control of whether page-mode visibility checking is used. + * ---------------- + */ +HeapScanDesc +storage_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key) +{ + return relation->rd_stamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + true, true, true, false, false, false); +} + +HeapScanDesc +storage_beginscan_catalog(Relation relation, int nkeys, ScanKey key) +{ + Oid relid = RelationGetRelid(relation); + Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + + return relation->rd_stamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + true, true, true, false, false, true); +} + +HeapScanDesc +storage_beginscan_strat(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + bool allow_strat, bool allow_sync) +{ + return relation->rd_stamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + allow_strat, allow_sync, true, + false, false, false); +} + +HeapScanDesc +storage_beginscan_bm(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key) +{ + return relation->rd_stamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + false, false, true, true, false, false); +} + +HeapScanDesc +storage_beginscan_sampling(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + return relation->rd_stamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + allow_strat, allow_sync, allow_pagemode, + false, true, false); +} + +/* ---------------- + * heap_rescan - restart a relation scan + * ---------------- + */ +void +storage_rescan(HeapScanDesc scan, + ScanKey key) +{ + scan->rs_rd->rd_stamroutine->scan_rescan(scan, key, false, false, false, false); +} + +/* ---------------- + * heap_rescan_set_params - restart a relation scan after changing params + * + * This call allows changing the buffer strategy, syncscan, and pagemode + * options before starting a fresh scan. Note that although the actual use + * of syncscan might change (effectively, enabling or disabling reporting), + * the previously selected startblock will be kept. + * ---------------- + */ +void +storage_rescan_set_params(HeapScanDesc scan, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + scan->rs_rd->rd_stamroutine->scan_rescan(scan, key, true, + allow_strat, allow_sync, (allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot))); +} + +/* ---------------- + * heap_endscan - end relation scan + * + * See how to integrate with index scans. + * Check handling if reldesc caching. + * ---------------- + */ +void +storage_endscan(HeapScanDesc scan) +{ + scan->rs_rd->rd_stamroutine->scan_end(scan); +} + + +/* ---------------- + * heap_update_snapshot + * + * Update snapshot info in heap scan descriptor. + * ---------------- + */ +void +storage_update_snapshot(HeapScanDesc scan, Snapshot snapshot) +{ + scan->rs_rd->rd_stamroutine->scan_update_snapshot(scan, snapshot); +} + +StorageTuple +storage_getnext(HeapScanDesc sscan, ScanDirection direction) +{ + return sscan->rs_rd->rd_stamroutine->scan_getnext(sscan, direction); +} + +TupleTableSlot* +storage_getnextslot(HeapScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + return sscan->rs_rd->rd_stamroutine->scan_getnextslot(sscan, direction, slot); +} + /* * storage_fetch - retrieve tuple with given tid * @@ -99,6 +267,73 @@ storage_fetch(Relation relation, userbuf, keep_buf, stats_relation); } +/* + * heap_hot_search_buffer - search HOT chain for tuple satisfying snapshot + * + * On entry, *tid is the TID of a tuple (either a simple tuple, or the root + * of a HOT chain), and buffer is the buffer holding this tuple. We search + * for the first chain member satisfying the given snapshot. If one is + * found, we update *tid to reference that tuple's offset number, and + * return TRUE. If no match, return FALSE without modifying *tid. + * + * heapTuple is a caller-supplied buffer. When a match is found, we return + * the tuple here, in addition to updating *tid. If no match is found, the + * contents of this buffer on return are undefined. + * + * If all_dead is not NULL, we check non-visible tuples to see if they are + * globally dead; *all_dead is set TRUE if all members of the HOT chain + * are vacuumable, FALSE if not. + * + * Unlike heap_fetch, the caller must already have pin and (at least) share + * lock on the buffer; it is still pinned/locked at exit. Also unlike + * heap_fetch, we do not report any pgstats count; caller may do so if wanted. + */ +bool +storage_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, + Snapshot snapshot, HeapTuple heapTuple, + bool *all_dead, bool first_call) +{ + return relation->rd_stamroutine->hot_search_buffer(tid, relation, buffer, + snapshot, heapTuple, all_dead, first_call); +} + +/* + * heap_hot_search - search HOT chain for tuple satisfying snapshot + * + * This has the same API as heap_hot_search_buffer, except that the caller + * does not provide the buffer containing the page, rather we access it + * locally. + */ +bool +storage_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot, + bool *all_dead) +{ + bool result; + Buffer buffer; + HeapTupleData heapTuple; + + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + result = relation->rd_stamroutine->hot_search_buffer(tid, relation, buffer, + snapshot, &heapTuple, all_dead, true); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + return result; +} + +/* + * heap_freeze_tuple + * Freeze tuple in place, without WAL logging. + * + * Useful for callers like CLUSTER that perform their own WAL logging. + */ +bool +storage_freeze_tuple(Relation rel, HeapTupleHeader tuple, TransactionId cutoff_xid, + TransactionId cutoff_multi) +{ + return rel->rd_stamroutine->tuple_freeze(tuple, cutoff_xid, cutoff_multi); +} + /* * storage_lock_tuple - lock a tuple in shared or exclusive mode diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 01321a2..db5c93b 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/storageam.h" #include "access/transam.h" #include "catalog/index.h" #include "lib/stringinfo.h" @@ -394,7 +395,7 @@ systable_beginscan(Relation heapRelation, * disadvantage; and there are no compensating advantages, because * it's unlikely that such scans will occur in parallel. */ - sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, + sysscan->scan = storage_beginscan_strat(heapRelation, snapshot, nkeys, key, true, false); sysscan->iscan = NULL; @@ -432,7 +433,7 @@ systable_getnext(SysScanDesc sysscan) elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); } else - htup = heap_getnext(sysscan->scan, ForwardScanDirection); + htup = storage_getnext(sysscan->scan, ForwardScanDirection); return htup; } @@ -504,7 +505,7 @@ systable_endscan(SysScanDesc sysscan) index_close(sysscan->irel, AccessShareLock); } else - heap_endscan(sysscan->scan); + storage_endscan(sysscan->scan); if (sysscan->snapshot) UnregisterSnapshot(sysscan->snapshot); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index bef4255..349a127 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -71,6 +71,7 @@ #include "access/amapi.h" #include "access/relscan.h" +#include "access/storageam.h" #include "access/transam.h" #include "access/xlog.h" #include "catalog/catalog.h" @@ -605,7 +606,7 @@ index_fetch_heap(IndexScanDesc scan) /* Obtain share-lock on the buffer so we can examine visibility */ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); - got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, + got_heap_tuple = storage_hot_search_buffer(tid, scan->heapRelation, scan->xs_cbuf, scan->xs_snapshot, &scan->xs_ctup, diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index bf963fc..0e25e9a 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -18,6 +18,7 @@ #include "access/heapam.h" #include "access/nbtree.h" #include "access/nbtxlog.h" +#include "access/storageam.h" #include "access/transam.h" #include "access/xloginsert.h" #include "miscadmin.h" @@ -325,7 +326,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * that satisfies SnapshotDirty. This is necessary because we * have just a single index entry for the entire chain. */ - else if (heap_hot_search(&htid, heapRel, &SnapshotDirty, + else if (storage_hot_search(&htid, heapRel, &SnapshotDirty, &all_dead)) { TransactionId xwait; @@ -379,7 +380,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * entry. */ htid = itup->t_tid; - if (heap_hot_search(&htid, heapRel, SnapshotSelf, NULL)) + if (storage_hot_search(&htid, heapRel, SnapshotSelf, NULL)) { /* Normal case --- it's still live */ } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 0453fd4..975cd5b 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -18,6 +18,7 @@ #include #include "access/htup_details.h" +#include "access/storageam.h" #include "access/xact.h" #include "bootstrap/bootstrap.h" #include "catalog/index.h" @@ -573,18 +574,18 @@ boot_openrel(char *relname) { /* We can now load the pg_type data */ rel = heap_open(TypeRelationId, NoLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = storage_beginscan_catalog(rel, 0, NULL); i = 0; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = storage_getnext(scan, ForwardScanDirection)) != NULL) ++i; - heap_endscan(scan); + storage_endscan(scan); app = Typ = ALLOC(struct typmap *, i + 1); while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = storage_beginscan_catalog(rel, 0, NULL); app = Typ; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = storage_getnext(scan, ForwardScanDirection)) != NULL) { (*app)->am_oid = HeapTupleGetOid(tup); memcpy((char *) &(*app)->am_typ, @@ -592,7 +593,7 @@ boot_openrel(char *relname) sizeof((*app)->am_typ)); app++; } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, NoLock); } @@ -903,25 +904,25 @@ gettype(char *type) } elog(DEBUG4, "external type: %s", type); rel = heap_open(TypeRelationId, NoLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = storage_beginscan_catalog(rel, 0, NULL); i = 0; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = storage_getnext(scan, ForwardScanDirection)) != NULL) ++i; - heap_endscan(scan); + storage_endscan(scan); app = Typ = ALLOC(struct typmap *, i + 1); while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = storage_beginscan_catalog(rel, 0, NULL); app = Typ; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = storage_getnext(scan, ForwardScanDirection)) != NULL) { (*app)->am_oid = HeapTupleGetOid(tup); memmove((char *) &(*app++)->am_typ, (char *) GETSTRUCT(tup), sizeof((*app)->am_typ)); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, NoLock); return gettype(type); } diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index ccde66a..d2a8a06 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -20,6 +20,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/sysattr.h" #include "access/xact.h" #include "catalog/binary_upgrade.h" @@ -797,14 +798,14 @@ objectsInSchemaToOids(GrantObjectType objtype, List *nspnames) ObjectIdGetDatum(namespaceId)); rel = heap_open(ProcedureRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 1, key); + scan = storage_beginscan_catalog(rel, 1, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { objects = lappend_oid(objects, HeapTupleGetOid(tuple)); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); } break; @@ -842,14 +843,14 @@ getRelationsInNamespace(Oid namespaceId, char relkind) CharGetDatum(relkind)); rel = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 2, key); + scan = storage_beginscan_catalog(rel, 2, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { relations = lappend_oid(relations, HeapTupleGetOid(tuple)); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); return relations; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0240df7..68c46f7 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -26,6 +26,7 @@ #include "access/amapi.h" #include "access/multixact.h" #include "access/relscan.h" +#include "access/storageam.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/visibilitymap.h" @@ -1904,10 +1905,10 @@ index_update_stats(Relation rel, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); - pg_class_scan = heap_beginscan_catalog(pg_class, 1, key); - tuple = heap_getnext(pg_class_scan, ForwardScanDirection); + pg_class_scan = storage_beginscan_catalog(pg_class, 1, key); + tuple = storage_getnext(pg_class_scan, ForwardScanDirection); tuple = heap_copytuple(tuple); - heap_endscan(pg_class_scan); + storage_endscan(pg_class_scan); } else { @@ -2279,7 +2280,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, } method = heapRelation->rd_stamroutine; - scan = heap_beginscan_strat(heapRelation, /* relation */ + scan = storage_beginscan_strat(heapRelation, /* relation */ snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ @@ -2288,7 +2289,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, /* set our scan endpoints */ if (!allow_sync) - heap_setscanlimits(scan, start_blockno, numblocks); + storage_setscanlimits(scan, start_blockno, numblocks); else { /* syncscan can only be requested on whole relation */ @@ -2301,7 +2302,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, /* * Scan all tuples in the base relation. */ - while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((heapTuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { bool tupleIsAlive; @@ -2613,7 +2614,7 @@ IndexBuildHeapRangeScan(Relation heapRelation, } } - heap_endscan(scan); + storage_endscan(scan); /* we can now forget our snapshot, if set */ if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent) @@ -2684,14 +2685,14 @@ IndexCheckExclusion(Relation heapRelation, * Scan all live tuples in the base relation. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan_strat(heapRelation, /* relation */ + scan = storage_beginscan_strat(heapRelation, /* relation */ snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ true); /* syncscan OK */ - while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((heapTuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { CHECK_FOR_INTERRUPTS(); @@ -2727,7 +2728,7 @@ IndexCheckExclusion(Relation heapRelation, estate, true); } - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); @@ -3004,7 +3005,7 @@ validate_index_heapscan(Relation heapRelation, * here, because it's critical that we read from block zero forward to * match the sorted TIDs. */ - scan = heap_beginscan_strat(heapRelation, /* relation */ + scan = storage_beginscan_strat(heapRelation, /* relation */ snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ @@ -3014,7 +3015,7 @@ validate_index_heapscan(Relation heapRelation, /* * Scan all tuples matching the snapshot. */ - while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((heapTuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { ItemPointer heapcursor = &heapTuple->t_self; ItemPointerData rootTuple; @@ -3171,7 +3172,7 @@ validate_index_heapscan(Relation heapRelation, } } - heap_endscan(scan); + storage_endscan(scan); ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 73eff17..b8137f2 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -986,7 +986,7 @@ check_default_allows_bound(Relation parent, Relation default_rel, econtext = GetPerTupleExprContext(estate); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(part_rel, snapshot, 0, NULL); + scan = storage_beginscan(part_rel, snapshot, 0, NULL); tupslot = MakeSingleTupleTableSlot(tupdesc); /* @@ -995,7 +995,7 @@ check_default_allows_bound(Relation parent, Relation default_rel, */ oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { ExecStoreTuple(tuple, tupslot, InvalidBuffer, false); econtext->ecxt_scantuple = tupslot; @@ -1011,7 +1011,7 @@ check_default_allows_bound(Relation parent, Relation default_rel, } MemoryContextSwitchTo(oldCxt); - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(tupslot); FreeExecutorState(estate); diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c index 5746dc3..1d048e6 100644 --- a/src/backend/catalog/pg_conversion.c +++ b/src/backend/catalog/pg_conversion.c @@ -16,6 +16,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/sysattr.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -161,14 +162,14 @@ RemoveConversionById(Oid conversionOid) /* open pg_conversion */ rel = heap_open(ConversionRelationId, RowExclusiveLock); - scan = heap_beginscan_catalog(rel, 1, &scanKeyData); + scan = storage_beginscan_catalog(rel, 1, &scanKeyData); /* search for the target tuple */ - if (HeapTupleIsValid(tuple = heap_getnext(scan, ForwardScanDirection))) + if (HeapTupleIsValid(tuple = storage_getnext(scan, ForwardScanDirection))) CatalogTupleDelete(rel, &tuple->t_self); else elog(ERROR, "could not find tuple for conversion %u", conversionOid); - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, RowExclusiveLock); } diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c index 323471b..517e310 100644 --- a/src/backend/catalog/pg_db_role_setting.c +++ b/src/backend/catalog/pg_db_role_setting.c @@ -13,6 +13,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" #include "catalog/pg_db_role_setting.h" @@ -196,12 +197,12 @@ DropSetting(Oid databaseid, Oid roleid) numkeys++; } - scan = heap_beginscan_catalog(relsetting, numkeys, keys); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + scan = storage_beginscan_catalog(relsetting, numkeys, keys); + while (HeapTupleIsValid(tup = storage_getnext(scan, ForwardScanDirection))) { CatalogTupleDelete(relsetting, &tup->t_self); } - heap_endscan(scan); + storage_endscan(scan); heap_close(relsetting, RowExclusiveLock); } diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index 3ef7ba8..145e3c1 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -21,6 +21,7 @@ #include "access/hash.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -324,9 +325,9 @@ GetAllTablesPublicationRelations(void) BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(RELKIND_RELATION)); - scan = heap_beginscan_catalog(classRel, 1, key); + scan = storage_beginscan_catalog(classRel, 1, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { Oid relid = HeapTupleGetOid(tuple); Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple); @@ -335,7 +336,7 @@ GetAllTablesPublicationRelations(void) result = lappend_oid(result, relid); } - heap_endscan(scan); + storage_endscan(scan); heap_close(classRel, AccessShareLock); return result; diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index fb53d71..a51f2e4 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -19,6 +19,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/xact.h" #include "catalog/indexing.h" @@ -402,12 +403,12 @@ RemoveSubscriptionRel(Oid subid, Oid relid) } /* Do the search and delete what we found. */ - scan = heap_beginscan_catalog(rel, nkeys, skey); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + scan = storage_beginscan_catalog(rel, nkeys, skey); + while (HeapTupleIsValid(tup = storage_getnext(scan, ForwardScanDirection))) { CatalogTupleDelete(rel, &tup->t_self); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, RowExclusiveLock); } diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index dbcc5bc..e0f6973 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -21,6 +21,7 @@ #include "access/multixact.h" #include "access/relscan.h" #include "access/rewriteheap.h" +#include "access/storageam.h" #include "access/storageamapi.h" #include "access/transam.h" #include "access/tuptoaster.h" @@ -909,7 +910,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, } else { - heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL); + heapScan = storage_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL); indexScan = NULL; } @@ -959,7 +960,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, } else { - tuple = heap_getnext(heapScan, ForwardScanDirection); + tuple = storage_getnext(heapScan, ForwardScanDirection); if (tuple == NULL) break; @@ -1045,7 +1046,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (indexScan != NULL) index_endscan(indexScan); if (heapScan != NULL) - heap_endscan(heapScan); + storage_endscan(heapScan); /* * In scan-and-sort mode, complete the sort, then read out all live tuples @@ -1656,8 +1657,8 @@ get_tables_to_cluster(MemoryContext cluster_context) Anum_pg_index_indisclustered, BTEqualStrategyNumber, F_BOOLEQ, BoolGetDatum(true)); - scan = heap_beginscan_catalog(indRelation, 1, &entry); - while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan_catalog(indRelation, 1, &entry); + while ((indexTuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { index = (Form_pg_index) GETSTRUCT(indexTuple); @@ -1677,7 +1678,7 @@ get_tables_to_cluster(MemoryContext cluster_context) MemoryContextSwitchTo(old_context); } - heap_endscan(scan); + storage_endscan(scan); relation_close(indRelation, AccessShareLock); diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index e2544e5..6727d15 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/storageam.h" #include "catalog/index.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -102,7 +103,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) * removed. */ tmptid = new_row->t_self; - if (!heap_hot_search(&tmptid, trigdata->tg_relation, SnapshotSelf, NULL)) + if (!storage_hot_search(&tmptid, trigdata->tg_relation, SnapshotSelf, NULL)) { /* * All rows in the HOT chain are dead, so skip the check. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index c81ddf5..00e71e3 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2028,10 +2028,10 @@ CopyTo(CopyState cstate) values = (Datum *) palloc(num_phys_attrs * sizeof(Datum)); nulls = (bool *) palloc(num_phys_attrs * sizeof(bool)); - scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); + scandesc = storage_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); processed = 0; - while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scandesc, ForwardScanDirection)) != NULL) { CHECK_FOR_INTERRUPTS(); @@ -2043,7 +2043,7 @@ CopyTo(CopyState cstate) processed++; } - heap_endscan(scandesc); + storage_endscan(scandesc); pfree(values); pfree(nulls); diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index e138539..39850b1 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -26,6 +26,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/xact.h" #include "access/xloginsert.h" #include "access/xlogutils.h" @@ -590,8 +591,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) * each one to the new database. */ rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan_catalog(rel, 0, NULL); + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { Oid srctablespace = HeapTupleGetOid(tuple); Oid dsttablespace; @@ -643,7 +644,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); } } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); /* @@ -1875,8 +1876,8 @@ remove_dbtablespaces(Oid db_id) HeapTuple tuple; rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan_catalog(rel, 0, NULL); + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { Oid dsttablespace = HeapTupleGetOid(tuple); char *dstpath; @@ -1917,7 +1918,7 @@ remove_dbtablespaces(Oid db_id) pfree(dstpath); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); } @@ -1942,8 +1943,8 @@ check_db_file_conflict(Oid db_id) HeapTuple tuple; rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan_catalog(rel, 0, NULL); + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { Oid dsttablespace = HeapTupleGetOid(tuple); char *dstpath; @@ -1966,7 +1967,7 @@ check_db_file_conflict(Oid db_id) pfree(dstpath); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); return result; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index b61aaac..46bc3da 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -18,6 +18,7 @@ #include "access/amapi.h" #include "access/htup_details.h" #include "access/reloptions.h" +#include "access/storageam.h" #include "access/sysattr.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -1948,8 +1949,8 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, * rels will be processed indirectly by reindex_relation). */ relationRelation = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(relationRelation, num_keys, scan_keys); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan_catalog(relationRelation, num_keys, scan_keys); + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple); Oid relid = HeapTupleGetOid(tuple); @@ -1989,7 +1990,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, MemoryContextSwitchTo(old); } - heap_endscan(scan); + storage_endscan(scan); heap_close(relationRelation, AccessShareLock); /* Now reindex each rel in a separate transaction */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index ec6523e..6b6beb9 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -4540,7 +4540,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) * checking all the constraints. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(oldrel, snapshot, 0, NULL); + scan = storage_beginscan(oldrel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -4548,7 +4548,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) */ oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { if (tab->rewrite > 0) { @@ -4661,7 +4661,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) } MemoryContextSwitchTo(oldCxt); - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(oldslot); @@ -5064,9 +5064,9 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(typeOid)); - scan = heap_beginscan_catalog(classRel, 1, key); + scan = storage_beginscan_catalog(classRel, 1, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { if (behavior == DROP_RESTRICT) ereport(ERROR, @@ -5078,7 +5078,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be result = lappend_oid(result, HeapTupleGetOid(tuple)); } - heap_endscan(scan); + storage_endscan(scan); heap_close(classRel, AccessShareLock); return result; @@ -8243,7 +8243,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) econtext->ecxt_scantuple = slot; snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = storage_beginscan(rel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -8251,7 +8251,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) */ oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { ExecStoreTuple(tuple, slot, InvalidBuffer, false); @@ -8266,7 +8266,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) } MemoryContextSwitchTo(oldcxt); - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); FreeExecutorState(estate); @@ -8321,9 +8321,9 @@ validateForeignKeyConstraint(char *conname, * ereport(ERROR) and that's that. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = storage_beginscan(rel, snapshot, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { FunctionCallInfoData fcinfo; TriggerData trigdata; @@ -8352,7 +8352,7 @@ validateForeignKeyConstraint(char *conname, RI_FKey_check_ins(&fcinfo); } - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); } @@ -10802,8 +10802,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) ObjectIdGetDatum(orig_tablespaceoid)); rel = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 1, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan_catalog(rel, 1, key); + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { Oid relOid = HeapTupleGetOid(tuple); Form_pg_class relForm; @@ -10862,7 +10862,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) relations = lappend_oid(relations, relOid); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); if (relations == NIL) diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 8559c3b..cdfa8ff 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -53,6 +53,7 @@ #include "access/heapam.h" #include "access/reloptions.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -416,8 +417,8 @@ DropTableSpace(DropTableSpaceStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tuple = heap_getnext(scandesc, ForwardScanDirection); + scandesc = storage_beginscan_catalog(rel, 1, entry); + tuple = storage_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) { @@ -434,7 +435,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) (errmsg("tablespace \"%s\" does not exist, skipping", tablespacename))); /* XXX I assume I need one or both of these next two calls */ - heap_endscan(scandesc); + storage_endscan(scandesc); heap_close(rel, NoLock); } return; @@ -461,7 +462,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) */ CatalogTupleDelete(rel, &tuple->t_self); - heap_endscan(scandesc); + storage_endscan(scandesc); /* * Remove any comments or security labels on this tablespace. @@ -925,8 +926,8 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(oldname)); - scan = heap_beginscan_catalog(rel, 1, entry); - tup = heap_getnext(scan, ForwardScanDirection); + scan = storage_beginscan_catalog(rel, 1, entry); + tup = storage_getnext(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), @@ -937,7 +938,7 @@ RenameTableSpace(const char *oldname, const char *newname) newtuple = heap_copytuple(tup); newform = (Form_pg_tablespace) GETSTRUCT(newtuple); - heap_endscan(scan); + storage_endscan(scan); /* Must be owner */ if (!pg_tablespace_ownercheck(HeapTupleGetOid(newtuple), GetUserId())) @@ -955,15 +956,15 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(newname)); - scan = heap_beginscan_catalog(rel, 1, entry); - tup = heap_getnext(scan, ForwardScanDirection); + scan = storage_beginscan_catalog(rel, 1, entry); + tup = storage_getnext(scan, ForwardScanDirection); if (HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("tablespace \"%s\" already exists", newname))); - heap_endscan(scan); + storage_endscan(scan); /* OK, update the entry */ namestrcpy(&(newform->spcname), newname); @@ -1005,8 +1006,8 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(stmt->tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tup = heap_getnext(scandesc, ForwardScanDirection); + scandesc = storage_beginscan_catalog(rel, 1, entry); + tup = storage_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), @@ -1047,7 +1048,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) heap_freetuple(newtuple); /* Conclude heap scan. */ - heap_endscan(scandesc); + storage_endscan(scandesc); heap_close(rel, NoLock); return tablespaceoid; @@ -1396,8 +1397,8 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tuple = heap_getnext(scandesc, ForwardScanDirection); + scandesc = storage_beginscan_catalog(rel, 1, entry); + tuple = storage_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ if (HeapTupleIsValid(tuple)) @@ -1405,7 +1406,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) else result = InvalidOid; - heap_endscan(scandesc); + storage_endscan(scandesc); heap_close(rel, AccessShareLock); if (!OidIsValid(result) && !missing_ok) @@ -1442,8 +1443,8 @@ get_tablespace_name(Oid spc_oid) ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(spc_oid)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tuple = heap_getnext(scandesc, ForwardScanDirection); + scandesc = storage_beginscan_catalog(rel, 1, entry); + tuple = storage_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ if (HeapTupleIsValid(tuple)) @@ -1451,7 +1452,7 @@ get_tablespace_name(Oid spc_oid) else result = NULL; - heap_endscan(scandesc); + storage_endscan(scandesc); heap_close(rel, AccessShareLock); return result; diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 5d83506..a9452d4 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -15,8 +15,9 @@ #include "access/genam.h" #include "access/heapam.h" -#include "access/sysattr.h" #include "access/htup_details.h" +#include "access/storageam.h" +#include "access/sysattr.h" #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/dependency.h" diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 7ed16ae..c07f508 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -32,6 +32,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/xact.h" #include "catalog/binary_upgrade.h" #include "catalog/catalog.h" @@ -2315,8 +2316,8 @@ AlterDomainNotNull(List *names, bool notNull) /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(testrel, snapshot, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan(testrel, snapshot, 0, NULL); + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { int i; @@ -2345,7 +2346,7 @@ AlterDomainNotNull(List *names, bool notNull) } } } - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); /* Close each rel after processing, but keep lock */ @@ -2711,8 +2712,8 @@ validateDomainConstraint(Oid domainoid, char *ccbin) /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(testrel, snapshot, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan(testrel, snapshot, 0, NULL); + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { int i; @@ -2755,7 +2756,7 @@ validateDomainConstraint(Oid domainoid, char *ccbin) ResetExprContext(econtext); } - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); /* Hold relation lock till commit (XXX bad for concurrency) */ diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index faa1812..e24ac9f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -28,6 +28,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/storageam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/namespace.h" @@ -447,9 +448,9 @@ get_rel_oids(Oid relid, const RangeVar *vacrel) pgclass = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(pgclass, 0, NULL); + scan = storage_beginscan_catalog(pgclass, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); @@ -469,7 +470,7 @@ get_rel_oids(Oid relid, const RangeVar *vacrel) MemoryContextSwitchTo(oldcontext); } - heap_endscan(scan); + storage_endscan(scan); heap_close(pgclass, AccessShareLock); } @@ -1121,9 +1122,9 @@ vac_truncate_clog(TransactionId frozenXID, */ relation = heap_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan_catalog(relation, 0, NULL); + scan = storage_beginscan_catalog(relation, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(scan, ForwardScanDirection)) != NULL) { volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple); TransactionId datfrozenxid = dbform->datfrozenxid; @@ -1160,7 +1161,7 @@ vac_truncate_clog(TransactionId frozenXID, } } - heap_endscan(scan); + storage_endscan(scan); heap_close(relation, AccessShareLock); diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index f1636a5..6ade9df 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -544,7 +544,7 @@ static bool IndexSupportsBackwardScan(Oid indexid) { bool result; - HeapTuple ht_idxrel; + StorageTuple ht_idxrel; Form_pg_class idxrelrec; IndexAmRoutine *amroutine; diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 89e189f..5e9daea 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -650,7 +650,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, Oid *index_collations = index->rd_indcollation; int index_natts = index->rd_index->indnatts; IndexScanDesc index_scan; - HeapTuple tup; + StorageTuple tup; ScanKeyData scankeys[INDEX_MAX_KEYS]; SnapshotData DirtySnapshot; int i; @@ -732,12 +732,13 @@ retry: bool existing_isnull[INDEX_MAX_KEYS]; char *error_new; char *error_existing; + tuple_data t_data = storage_tuple_get_data(heap, tup, TID); /* * Ignore the entry for the tuple we're trying to check. */ if (ItemPointerIsValid(tupleid) && - ItemPointerEquals(tupleid, &tup->t_self)) + ItemPointerEquals(tupleid, &(t_data.tid))) { if (found_self) /* should not happen */ elog(ERROR, "found self tuple multiple times in index \"%s\"", @@ -785,7 +786,8 @@ retry: DirtySnapshot.speculativeToken && TransactionIdPrecedes(GetCurrentTransactionId(), xwait)))) { - ctid_wait = tup->t_data->t_ctid; + t_data = storage_tuple_get_data(heap, tup, CTID); + ctid_wait = t_data.tid; reason_wait = indexInfo->ii_ExclusionOps ? XLTW_RecheckExclusionConstr : XLTW_InsertIndex; index_endscan(index_scan); @@ -805,7 +807,10 @@ retry: { conflict = true; if (conflictTid) - *conflictTid = tup->t_self; + { + t_data = storage_tuple_get_data(heap, tup, TID); + *conflictTid = t_data.tid; + } break; } diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 8d625b6..6f6861f 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -118,7 +118,7 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, TupleTableSlot *searchslot, TupleTableSlot *outslot) { - HeapTuple scantuple; + StorageTuple scantuple; ScanKeyData skey[INDEX_MAX_KEYS]; IndexScanDesc scan; SnapshotData snap; @@ -228,8 +228,7 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { TupleTableSlot *scanslot; - HeapTuple scantuple; - HeapScanDesc scan; + StorageScanDesc scan; SnapshotData snap; TransactionId xwait; bool found; @@ -239,19 +238,20 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, /* Start an index scan. */ InitDirtySnapshot(snap); - scan = heap_beginscan(rel, &snap, 0, NULL); + scan = storage_beginscan(rel, &snap, 0, NULL); scanslot = MakeSingleTupleTableSlot(desc); retry: found = false; - heap_rescan(scan, NULL); + storage_rescan(scan, NULL); /* Try to find the tuple */ - while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((scanslot = storage_getnextslot(scan, ForwardScanDirection, scanslot)) + && !TupIsNull(scanslot)) { - ExecStoreTuple(scantuple, scanslot, InvalidBuffer, false); + if (!ExecSlotCompare(scanslot, searchslot)) continue; @@ -313,7 +313,7 @@ retry: } } - heap_endscan(scan); + storage_endscan(scan); ExecDropSingleTupleTableSlot(scanslot); return found; diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index b7a2cbc..0a5098d 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -681,7 +681,7 @@ ExecFetchSlotMinimalTuple(TupleTableSlot *slot) Datum ExecFetchSlotTupleDatum(TupleTableSlot *slot) { - HeapTuple tup; + StorageTuple tup; TupleDesc tupdesc; /* Fetch slot's contents in regular-physical-tuple form */ @@ -765,7 +765,7 @@ ExecHeapifySlot(TupleTableSlot *slot) TupleTableSlot * ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot) { - HeapTuple newTuple; + StorageTuple newTuple; MemoryContext oldContext; /* @@ -1085,7 +1085,7 @@ TupleDescGetAttInMetadata(TupleDesc tupdesc) * values is an array of C strings, one for each attribute of the return tuple. * A NULL string pointer indicates we want to create a NULL field. */ -HeapTuple +StorageTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values) { TupleDesc tupdesc = attinmeta->tupdesc; @@ -1093,7 +1093,7 @@ BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values) Datum *dvalues; bool *nulls; int i; - HeapTuple tuple; + StorageTuple tuple; dvalues = (Datum *) palloc(natts * sizeof(Datum)); nulls = (bool *) palloc(natts * sizeof(bool)); diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 42a4ca9..79b74ee 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -181,7 +181,7 @@ static void sqlfunction_destroy(DestReceiver *self); * polymorphic arguments. */ SQLFunctionParseInfoPtr -prepare_sql_fn_parse_info(HeapTuple procedureTuple, +prepare_sql_fn_parse_info(StorageTuple procedureTuple, Node *call_expr, Oid inputCollation) { @@ -597,7 +597,7 @@ init_sql_fcache(FmgrInfo *finfo, Oid collation, bool lazyEvalOK) MemoryContext fcontext; MemoryContext oldcontext; Oid rettype; - HeapTuple procedureTuple; + StorageTuple procedureTuple; Form_pg_proc procedureStruct; SQLFunctionCachePtr fcache; List *raw_parsetree_list; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 0ae5873..d94169c 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -3097,7 +3097,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) Oid inputTypes[FUNC_MAX_ARGS]; int numArguments; int numDirectArgs; - HeapTuple aggTuple; + StorageTuple aggTuple; Form_pg_aggregate aggform; AclResult aclresult; Oid transfn_oid, @@ -3212,7 +3212,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* Check that aggregate owner has permission to call component fns */ { - HeapTuple procTuple; + StorageTuple procTuple; Oid aggOwner; procTuple = SearchSysCache1(PROCOID, diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 60a6cb0..7921025 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -38,6 +38,7 @@ #include #include "access/relscan.h" +#include "access/storageam.h" #include "access/transam.h" #include "executor/execdebug.h" #include "executor/nodeBitmapHeapscan.h" @@ -400,7 +401,7 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) HeapTupleData heapTuple; ItemPointerSet(&tid, page, offnum); - if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, + if (storage_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, &heapTuple, NULL, true)) scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid); } @@ -685,7 +686,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) PlanState *outerPlan = outerPlanState(node); /* rescan to release any page pin */ - heap_rescan(node->ss.ss_currentScanDesc, NULL); + storage_rescan(node->ss.ss_currentScanDesc, NULL); if (node->tbmiterator) tbm_end_iterate(node->tbmiterator); @@ -764,7 +765,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) /* * close heap scan */ - heap_endscan(scanDesc); + storage_endscan(scanDesc); /* * close the heap relation. @@ -865,7 +866,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ - scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, + scanstate->ss.ss_currentScanDesc = storage_beginscan_bm(currentRelation, estate->es_snapshot, 0, NULL); @@ -1023,5 +1024,5 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc) node->pstate = pstate; snapshot = RestoreSnapshot(pstate->phs_snapshot_data); - heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot); + storage_update_snapshot(node->ss.ss_currentScanDesc, snapshot); } diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c index 02f6c81..abec3a9 100644 --- a/src/backend/executor/nodeForeignscan.c +++ b/src/backend/executor/nodeForeignscan.c @@ -62,9 +62,9 @@ ForeignNext(ForeignScanState *node) */ if (plan->fsSystemCol && !TupIsNull(slot)) { - HeapTuple tup = ExecHeapifySlot(slot); - - tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation); + ExecMaterializeSlot(slot); + ExecSlotUpdateTupleTableoid(slot, + RelationGetRelid(node->ss.ss_currentRelation)); } return slot; diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c index 022d75b..5f4a294 100644 --- a/src/backend/executor/nodeGather.c +++ b/src/backend/executor/nodeGather.c @@ -45,7 +45,7 @@ static TupleTableSlot *ExecGather(PlanState *pstate); static TupleTableSlot *gather_getnext(GatherState *gatherstate); -static HeapTuple gather_readnext(GatherState *gatherstate); +static StorageTuple gather_readnext(GatherState *gatherstate); static void ExecShutdownGatherWorkers(GatherState *node); @@ -252,7 +252,7 @@ gather_getnext(GatherState *gatherstate) TupleTableSlot *outerTupleSlot; TupleTableSlot *fslot = gatherstate->funnel_slot; MemoryContext tupleContext = gatherstate->ps.ps_ExprContext->ecxt_per_tuple_memory; - HeapTuple tup; + StorageTuple tup; while (gatherstate->nreaders > 0 || gatherstate->need_to_scan_locally) { @@ -295,7 +295,7 @@ gather_getnext(GatherState *gatherstate) /* * Attempt to read a tuple from one of our parallel workers. */ -static HeapTuple +static StorageTuple gather_readnext(GatherState *gatherstate) { int nvisited = 0; @@ -303,7 +303,7 @@ gather_readnext(GatherState *gatherstate) for (;;) { TupleQueueReader *reader; - HeapTuple tup; + StorageTuple tup; bool readerdone; /* Check for async events, particularly messages from workers. */ diff --git a/src/backend/executor/nodeGatherMerge.c b/src/backend/executor/nodeGatherMerge.c index d20d466..ef9d9f1 100644 --- a/src/backend/executor/nodeGatherMerge.c +++ b/src/backend/executor/nodeGatherMerge.c @@ -44,7 +44,7 @@ */ typedef struct GMReaderTupleBuffer { - HeapTuple *tuple; /* array of length MAX_TUPLE_STORE */ + StorageTuple *tuple; /* array of length MAX_TUPLE_STORE */ int nTuples; /* number of tuples currently stored */ int readCounter; /* index of next tuple to extract */ bool done; /* true if reader is known exhausted */ @@ -53,7 +53,7 @@ typedef struct GMReaderTupleBuffer static TupleTableSlot *ExecGatherMerge(PlanState *pstate); static int32 heap_compare_slots(Datum a, Datum b, void *arg); static TupleTableSlot *gather_merge_getnext(GatherMergeState *gm_state); -static HeapTuple gm_readnext_tuple(GatherMergeState *gm_state, int nreader, +static StorageTuple gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait, bool *done); static void ExecShutdownGatherMergeWorkers(GatherMergeState *node); static void gather_merge_setup(GatherMergeState *gm_state); @@ -399,7 +399,7 @@ gather_merge_setup(GatherMergeState *gm_state) { /* Allocate the tuple array with length MAX_TUPLE_STORE */ gm_state->gm_tuple_buffers[i].tuple = - (HeapTuple *) palloc0(sizeof(HeapTuple) * MAX_TUPLE_STORE); + (StorageTuple *) palloc0(sizeof(StorageTuple) * MAX_TUPLE_STORE); /* Initialize tuple slot for worker */ gm_state->gm_slots[i + 1] = ExecInitExtraTupleSlot(gm_state->ps.state); @@ -617,7 +617,7 @@ static bool gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait) { GMReaderTupleBuffer *tuple_buffer; - HeapTuple tup; + StorageTuple tup; /* * If we're being asked to generate a tuple from the leader, then we just @@ -689,12 +689,12 @@ gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait) /* * Attempt to read a tuple from given worker. */ -static HeapTuple +static StorageTuple gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait, bool *done) { TupleQueueReader *reader; - HeapTuple tup; + StorageTuple tup; MemoryContext oldContext; MemoryContext tupleContext; diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 5351cb8..f770bc4 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -117,7 +117,7 @@ IndexOnlyNext(IndexOnlyScanState *node) */ while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { - HeapTuple tuple = NULL; + StorageTuple tuple = NULL; CHECK_FOR_INTERRUPTS(); @@ -186,7 +186,7 @@ IndexOnlyNext(IndexOnlyScanState *node) /* * Fill the scan tuple slot with data from the index. This might be - * provided in either HeapTuple or IndexTuple format. Conceivably an + * provided in either StorageTuple or IndexTuple format. Conceivably an * index AM might fill both fields, in which case we prefer the heap * format, since it's probably a bit cheaper to fill a slot from. */ diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 638b17b..7330ff9 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -51,7 +51,7 @@ typedef struct { pairingheap_node ph_node; - HeapTuple htup; + StorageTuple htup; Datum *orderbyvals; bool *orderbynulls; } ReorderTuple; @@ -65,9 +65,9 @@ static int cmp_orderbyvals(const Datum *adist, const bool *anulls, IndexScanState *node); static int reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg); -static void reorderqueue_push(IndexScanState *node, HeapTuple tuple, +static void reorderqueue_push(IndexScanState *node, StorageTuple tuple, Datum *orderbyvals, bool *orderbynulls); -static HeapTuple reorderqueue_pop(IndexScanState *node); +static StorageTuple reorderqueue_pop(IndexScanState *node); /* ---------------------------------------------------------------- @@ -84,7 +84,7 @@ IndexNext(IndexScanState *node) ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; - HeapTuple tuple; + StorageTuple tuple; TupleTableSlot *slot; /* @@ -185,7 +185,7 @@ IndexNextWithReorder(IndexScanState *node) EState *estate; ExprContext *econtext; IndexScanDesc scandesc; - HeapTuple tuple; + StorageTuple tuple; TupleTableSlot *slot; ReorderTuple *topmost = NULL; bool was_exact; @@ -483,7 +483,7 @@ reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b, * Helper function to push a tuple to the reorder queue. */ static void -reorderqueue_push(IndexScanState *node, HeapTuple tuple, +reorderqueue_push(IndexScanState *node, StorageTuple tuple, Datum *orderbyvals, bool *orderbynulls) { IndexScanDesc scandesc = node->iss_ScanDesc; @@ -516,10 +516,10 @@ reorderqueue_push(IndexScanState *node, HeapTuple tuple, /* * Helper function to pop the next tuple from the reorder queue. */ -static HeapTuple +static StorageTuple reorderqueue_pop(IndexScanState *node) { - HeapTuple result; + StorageTuple result; ReorderTuple *topmost; int i; diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 6a118d1..04f85e5 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -29,9 +29,9 @@ static void InitScanRelation(SampleScanState *node, EState *estate, int eflags); static TupleTableSlot *SampleNext(SampleScanState *node); static void tablesample_init(SampleScanState *scanstate); -static HeapTuple tablesample_getnext(SampleScanState *scanstate); -static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, - HeapScanDesc scan); +static StorageTuple tablesample_getnext(SampleScanState *scanstate); +static bool SampleTupleVisible(StorageTuple tuple, OffsetNumber tupoffset, + HeapScanDesc scan); //hari /* ---------------------------------------------------------------- * Scan Support @@ -47,7 +47,7 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, static TupleTableSlot * SampleNext(SampleScanState *node) { - HeapTuple tuple; + StorageTuple tuple; TupleTableSlot *slot; /* @@ -66,7 +66,8 @@ SampleNext(SampleScanState *node) if (tuple) ExecStoreTuple(tuple, /* tuple to store */ slot, /* slot to store in */ - node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */ + //harinode->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */ + InvalidBuffer, false); /* don't pfree this pointer */ else ExecClearTuple(slot); @@ -244,7 +245,7 @@ ExecEndSampleScan(SampleScanState *node) * close heap scan */ if (node->ss.ss_currentScanDesc) - heap_endscan(node->ss.ss_currentScanDesc); + storage_endscan(node->ss.ss_currentScanDesc); /* * close the heap relation. @@ -349,7 +350,7 @@ tablesample_init(SampleScanState *scanstate) if (scanstate->ss.ss_currentScanDesc == NULL) { scanstate->ss.ss_currentScanDesc = - heap_beginscan_sampling(scanstate->ss.ss_currentRelation, + storage_beginscan_sampling(scanstate->ss.ss_currentRelation, scanstate->ss.ps.state->es_snapshot, 0, NULL, scanstate->use_bulkread, @@ -358,7 +359,7 @@ tablesample_init(SampleScanState *scanstate) } else { - heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, + storage_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, scanstate->use_bulkread, allow_sync, scanstate->use_pagemode); @@ -376,7 +377,7 @@ tablesample_init(SampleScanState *scanstate) * Note: an awful lot of this is copied-and-pasted from heapam.c. It would * perhaps be better to refactor to share more code. */ -static HeapTuple +static StorageTuple tablesample_getnext(SampleScanState *scanstate) { TsmRoutine *tsm = scanstate->tsmroutine; @@ -554,7 +555,7 @@ tablesample_getnext(SampleScanState *scanstate) * Check visibility of the tuple. */ static bool -SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) +SampleTupleVisible(StorageTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) //hari { if (scan->rs_pageatatime) { diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index d4ac939..839d3a6 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -28,6 +28,7 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/storageam.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" #include "utils/rel.h" @@ -49,8 +50,7 @@ static TupleTableSlot *SeqNext(SeqScanState *node); static TupleTableSlot * SeqNext(SeqScanState *node) { - HeapTuple tuple; - HeapScanDesc scandesc; + StorageScanDesc scandesc; EState *estate; ScanDirection direction; TupleTableSlot *slot; @@ -69,7 +69,7 @@ SeqNext(SeqScanState *node) * We reach here if the scan is not parallel, or if we're executing a * scan that was intended to be parallel serially. */ - scandesc = heap_beginscan(node->ss.ss_currentRelation, + scandesc = storage_beginscan(node->ss.ss_currentRelation, estate->es_snapshot, 0, NULL); node->ss.ss_currentScanDesc = scandesc; @@ -78,26 +78,7 @@ SeqNext(SeqScanState *node) /* * get the next tuple from the table */ - tuple = heap_getnext(scandesc, direction); - - /* - * save the tuple and the buffer returned to us by the access methods in - * our scan tuple slot and return the slot. Note: we pass 'false' because - * tuples returned by heap_getnext() are pointers onto disk pages and were - * not created with palloc() and so should not be pfree()'d. Note also - * that ExecStoreTuple will increment the refcount of the buffer; the - * refcount will not be dropped until the tuple table slot is cleared. - */ - if (tuple) - ExecStoreTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->rs_cbuf, /* buffer associated with this - * tuple */ - false); /* don't pfree this pointer */ - else - ExecClearTuple(slot); - - return slot; + return storage_getnextslot(scandesc, direction, slot); } /* @@ -225,7 +206,7 @@ void ExecEndSeqScan(SeqScanState *node) { Relation relation; - HeapScanDesc scanDesc; + StorageScanDesc scanDesc; /* * get information from node @@ -248,7 +229,7 @@ ExecEndSeqScan(SeqScanState *node) * close heap scan */ if (scanDesc != NULL) - heap_endscan(scanDesc); + storage_endscan(scanDesc); /* * close the heap relation. @@ -270,12 +251,12 @@ ExecEndSeqScan(SeqScanState *node) void ExecReScanSeqScan(SeqScanState *node) { - HeapScanDesc scan; + StorageScanDesc scan; scan = node->ss.ss_currentScanDesc; if (scan != NULL) - heap_rescan(scan, /* scan desc */ + storage_rescan(scan, /* scan desc */ NULL); /* new scan keys */ ExecScanReScan((ScanState *) node); @@ -322,7 +303,7 @@ ExecSeqScanInitializeDSM(SeqScanState *node, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - heap_beginscan_parallel(node->ss.ss_currentRelation, pscan); + storage_beginscan_parallel(node->ss.ss_currentRelation, pscan); } /* ---------------------------------------------------------------- @@ -353,5 +334,5 @@ ExecSeqScanInitializeWorker(SeqScanState *node, shm_toc *toc) pscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - heap_beginscan_parallel(node->ss.ss_currentRelation, pscan); + storage_beginscan_parallel(node->ss.ss_currentRelation, pscan); } diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 80be460..d55a752 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2091,7 +2091,7 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, { Oid inputTypes[FUNC_MAX_ARGS]; int numArguments; - HeapTuple aggTuple; + StorageTuple aggTuple; Form_pg_aggregate aggform; Oid aggtranstype; AttrNumber initvalAttNo; @@ -2159,7 +2159,7 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, /* Check that aggregate owner has permission to call component fns */ { - HeapTuple procTuple; + StorageTuple procTuple; Oid aggOwner; procTuple = SearchSysCache1(PROCOID, diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index afe231f..418c2a6 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -627,11 +627,11 @@ SPI_freeplan(SPIPlanPtr plan) return 0; } -HeapTuple -SPI_copytuple(HeapTuple tuple) +StorageTuple +SPI_copytuple(StorageTuple tuple) { MemoryContext oldcxt; - HeapTuple ctuple; + StorageTuple ctuple; if (tuple == NULL) { @@ -655,7 +655,7 @@ SPI_copytuple(HeapTuple tuple) } HeapTupleHeader -SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc) +SPI_returntuple(StorageTuple tuple, TupleDesc tupdesc) { MemoryContext oldcxt; HeapTupleHeader dtup; @@ -686,7 +686,7 @@ SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc) return dtup; } -HeapTuple +StorageTuple SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum, Datum *Values, const char *Nulls) { @@ -854,7 +854,7 @@ char * SPI_gettype(TupleDesc tupdesc, int fnumber) { Oid typoid; - HeapTuple typeTuple; + StorageTuple typeTuple; char *result; SPI_result = 0; @@ -962,7 +962,7 @@ SPI_datumTransfer(Datum value, bool typByVal, int typLen) } void -SPI_freetuple(HeapTuple tuple) +SPI_freetuple(StorageTuple tuple) { /* No longer need to worry which context tuple was in... */ heap_freetuple(tuple); @@ -1683,7 +1683,7 @@ spi_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo) /* set up initial allocations */ tuptable->alloced = tuptable->free = 128; - tuptable->vals = (HeapTuple *) palloc(tuptable->alloced * sizeof(HeapTuple)); + tuptable->vals = (StorageTuple *) palloc(tuptable->alloced * sizeof(StorageTuple)); tuptable->tupdesc = CreateTupleDescCopy(typeinfo); MemoryContextSwitchTo(oldcxt); @@ -1714,8 +1714,8 @@ spi_printtup(TupleTableSlot *slot, DestReceiver *self) /* Double the size of the pointer array */ tuptable->free = tuptable->alloced; tuptable->alloced += tuptable->free; - tuptable->vals = (HeapTuple *) repalloc_huge(tuptable->vals, - tuptable->alloced * sizeof(HeapTuple)); + tuptable->vals = (StorageTuple *) repalloc_huge(tuptable->vals, + tuptable->alloced * sizeof(StorageTuple)); } tuptable->vals[tuptable->alloced - tuptable->free] = diff --git a/src/backend/executor/tqueue.c b/src/backend/executor/tqueue.c index 81964d7..81d0adc 100644 --- a/src/backend/executor/tqueue.c +++ b/src/backend/executor/tqueue.c @@ -192,12 +192,12 @@ static void TQSendRecordInfo(TQueueDestReceiver *tqueue, int32 typmod, TupleDesc tupledesc); static void TupleQueueHandleControlMessage(TupleQueueReader *reader, Size nbytes, char *data); -static HeapTuple TupleQueueHandleDataMessage(TupleQueueReader *reader, +static StorageTuple TupleQueueHandleDataMessage(TupleQueueReader *reader, Size nbytes, HeapTupleHeader data); -static HeapTuple TQRemapTuple(TupleQueueReader *reader, +static StorageTuple TQRemapTuple(TupleQueueReader *reader, TupleDesc tupledesc, TupleRemapInfo **field_remapinfo, - HeapTuple tuple); + StorageTuple tuple); static Datum TQRemap(TupleQueueReader *reader, TupleRemapInfo *remapinfo, Datum value, bool *changed); static Datum TQRemapArray(TupleQueueReader *reader, ArrayRemapInfo *remapinfo, @@ -682,7 +682,7 @@ DestroyTupleQueueReader(TupleQueueReader *reader) * accumulate bytes from a partially-read message, so it's useful to call * this with nowait = true even if nothing is returned. */ -HeapTuple +StorageTuple TupleQueueReaderNext(TupleQueueReader *reader, bool nowait, bool *done) { shm_mq_result result; @@ -737,7 +737,7 @@ TupleQueueReaderNext(TupleQueueReader *reader, bool nowait, bool *done) /* * Handle a data message - that is, a tuple - from the remote side. */ -static HeapTuple +static StorageTuple TupleQueueHandleDataMessage(TupleQueueReader *reader, Size nbytes, HeapTupleHeader data) @@ -766,11 +766,11 @@ TupleQueueHandleDataMessage(TupleQueueReader *reader, /* * Copy the given tuple, remapping any transient typmods contained in it. */ -static HeapTuple +static StorageTuple TQRemapTuple(TupleQueueReader *reader, TupleDesc tupledesc, TupleRemapInfo **field_remapinfo, - HeapTuple tuple) + StorageTuple tuple) { Datum *values; bool *isnull; @@ -1128,7 +1128,7 @@ TupleQueueHandleControlMessage(TupleQueueReader *reader, Size nbytes, static TupleRemapInfo * BuildTupleRemapInfo(Oid typid, MemoryContext mycontext) { - HeapTuple tup; + StorageTuple tup; Form_pg_type typ; /* This is recursive, so it could be driven to stack overflow. */ diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 776b1c0..fec203d 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -1882,9 +1882,9 @@ get_database_list(void) (void) GetTransactionSnapshot(); rel = heap_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = storage_beginscan_catalog(rel, 0, NULL); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + while (HeapTupleIsValid(tup = storage_getnext(scan, ForwardScanDirection))) { Form_pg_database pgdatabase = (Form_pg_database) GETSTRUCT(tup); avw_dbase *avdb; @@ -1911,7 +1911,7 @@ get_database_list(void) MemoryContextSwitchTo(oldcxt); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); CommitTransactionCommand(); @@ -2042,13 +2042,13 @@ do_autovacuum(void) * wide tables there might be proportionally much more activity in the * TOAST table than in its parent. */ - relScan = heap_beginscan_catalog(classRel, 0, NULL); + relScan = storage_beginscan_catalog(classRel, 0, NULL); /* * On the first pass, we collect main tables to vacuum, and also the main * table relid to TOAST relid mapping. */ - while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) + while ((tuple = storage_getnext(relScan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); PgStat_StatTabEntry *tabentry; @@ -2134,7 +2134,7 @@ do_autovacuum(void) } } - heap_endscan(relScan); + storage_endscan(relScan); /* second pass: check TOAST tables */ ScanKeyInit(&key, @@ -2142,8 +2142,8 @@ do_autovacuum(void) BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(RELKIND_TOASTVALUE)); - relScan = heap_beginscan_catalog(classRel, 1, &key); - while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) + relScan = storage_beginscan_catalog(classRel, 1, &key); + while ((tuple = storage_getnext(relScan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); PgStat_StatTabEntry *tabentry; @@ -2189,7 +2189,7 @@ do_autovacuum(void) table_oids = lappend_oid(table_oids, relid); } - heap_endscan(relScan); + storage_endscan(relScan); heap_close(classRel, AccessShareLock); /* diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index accf302..74113a7 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -36,6 +36,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/transam.h" #include "access/twophase_rmgr.h" #include "access/xact.h" @@ -1221,8 +1222,8 @@ pgstat_collect_oids(Oid catalogid) rel = heap_open(catalogid, AccessShareLock); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = storage_beginscan(rel, snapshot, 0, NULL); + while ((tup = storage_getnext(scan, ForwardScanDirection)) != NULL) { Oid thisoid = HeapTupleGetOid(tup); @@ -1230,7 +1231,7 @@ pgstat_collect_oids(Oid catalogid) (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL); } - heap_endscan(scan); + storage_endscan(scan); UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 6c89442..fca56d4 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -24,6 +24,7 @@ #include "access/heapam.h" #include "access/htup.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/xact.h" #include "catalog/pg_subscription.h" @@ -124,9 +125,9 @@ get_subscription_list(void) (void) GetTransactionSnapshot(); rel = heap_open(SubscriptionRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = storage_beginscan_catalog(rel, 0, NULL); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + while (HeapTupleIsValid(tup = storage_getnext(scan, ForwardScanDirection))) { Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup); Subscription *sub; @@ -152,7 +153,7 @@ get_subscription_list(void) MemoryContextSwitchTo(oldcxt); } - heap_endscan(scan); + storage_endscan(scan); heap_close(rel, AccessShareLock); CommitTransactionCommand(); diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 071b3a9..4924dac 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -17,6 +17,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/storageam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -435,13 +436,13 @@ DefineQueryRewrite(char *rulename, RelationGetRelationName(event_relation)))); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scanDesc = heap_beginscan(event_relation, snapshot, 0, NULL); - if (heap_getnext(scanDesc, ForwardScanDirection) != NULL) + scanDesc = storage_beginscan(event_relation, snapshot, 0, NULL); + if (storage_getnext(scanDesc, ForwardScanDirection) != NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not convert table \"%s\" to a view because it is not empty", RelationGetRelationName(event_relation)))); - heap_endscan(scanDesc); + storage_endscan(scanDesc); UnregisterSnapshot(snapshot); if (event_relation->rd_rel->relhastriggers) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index eb6960d..82c042a 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -21,6 +21,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/storageam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -1208,10 +1209,10 @@ ThereIsAtLeastOneRole(void) pg_authid_rel = heap_open(AuthIdRelationId, AccessShareLock); - scan = heap_beginscan_catalog(pg_authid_rel, 0, NULL); - result = (heap_getnext(scan, ForwardScanDirection) != NULL); + scan = storage_beginscan_catalog(pg_authid_rel, 0, NULL); + result = (storage_getnext(scan, ForwardScanDirection) != NULL); - heap_endscan(scan); + storage_endscan(scan); heap_close(pg_authid_rel, AccessShareLock); return result; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index cdd45ef..4cddd73 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -106,40 +106,16 @@ typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc; */ #define HeapScanIsValid(scan) PointerIsValid(scan) -extern HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key); -extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys, - ScanKey key); -extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync); -extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key); -extern HeapScanDesc heap_beginscan_sampling(Relation relation, - Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode); -extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, - BlockNumber endBlk); extern void heapgetpage(HeapScanDesc scan, BlockNumber page); -extern void heap_rescan(HeapScanDesc scan, ScanKey key); -extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode); -extern void heap_endscan(HeapScanDesc scan); -extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); extern Size heap_parallelscan_estimate(Snapshot snapshot); extern void heap_parallelscan_initialize(ParallelHeapScanDesc target, Relation relation, Snapshot snapshot); extern void heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan); -extern HeapScanDesc heap_beginscan_parallel(Relation, ParallelHeapScanDesc); extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate); -extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation, - Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, - bool *all_dead, bool first_call); -extern bool heap_hot_search(ItemPointer tid, Relation relation, - Snapshot snapshot, bool *all_dead); + extern void setLastTid(const ItemPointer tid); extern BulkInsertState GetBulkInsertState(void); @@ -147,8 +123,6 @@ extern void FreeBulkInsertState(BulkInsertState); extern void ReleaseBulkInsertStatePin(BulkInsertState bistate); extern void heap_inplace_update(Relation relation, HeapTuple tuple); -extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, - TransactionId cutoff_multi); extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); @@ -158,8 +132,6 @@ extern void simple_heap_delete(Relation relation, ItemPointer tid); extern void simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup); -extern void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot); - /* in heap/pruneheap.c */ extern void heap_page_prune_opt(Relation relation, Buffer buffer); extern int heap_page_prune(Relation relation, Buffer buffer, diff --git a/src/include/access/heapam_common.h b/src/include/access/heapam_common.h index 799b4ed..66a96d7 100644 --- a/src/include/access/heapam_common.h +++ b/src/include/access/heapam_common.h @@ -107,6 +107,9 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] = /* Get the LOCKMODE for a given MultiXactStatus */ #define LOCKMODE_from_mxstatus(status) \ (tupleLockExtraInfo[TUPLOCK_from_mxstatus((status))].hwlock) + +extern bool synchronize_seqscans; + extern HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); @@ -136,6 +139,11 @@ extern void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 in extern MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update); extern void heap_inplace_update(Relation relation, HeapTuple tuple); + +extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, + MultiXactId cutoff_multi, Buffer buf); +extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); + extern bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock); diff --git a/src/include/access/storageam.h b/src/include/access/storageam.h index 9502c92..507af71 100644 --- a/src/include/access/storageam.h +++ b/src/include/access/storageam.h @@ -19,6 +19,7 @@ /* A physical tuple coming from a storage AM scan */ typedef void *StorageTuple; +typedef void *StorageScanDesc; typedef union tuple_data { @@ -36,6 +37,34 @@ typedef enum tuple_data_flags CTID } tuple_data_flags; +extern HeapScanDesc storage_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan); + +extern void storage_setscanlimits(HeapScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks); +extern HeapScanDesc storage_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key); +extern HeapScanDesc storage_beginscan_catalog(Relation relation, int nkeys, ScanKey key); +extern HeapScanDesc storage_beginscan_strat(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + bool allow_strat, bool allow_sync); +extern HeapScanDesc storage_beginscan_bm(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key); +extern HeapScanDesc storage_beginscan_sampling(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode); + +extern void storage_endscan(HeapScanDesc scan); +extern void storage_rescan(HeapScanDesc scan, ScanKey key); +extern void storage_rescan_set_params(HeapScanDesc scan, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode); +extern void storage_update_snapshot(HeapScanDesc scan, Snapshot snapshot); + +extern StorageTuple storage_getnext(HeapScanDesc sscan, ScanDirection direction); +extern TupleTableSlot* storage_getnextslot(HeapScanDesc sscan, ScanDirection direction, TupleTableSlot *slot); + +extern void storage_get_latest_tid(Relation relation, + Snapshot snapshot, + ItemPointer tid); + extern bool storage_fetch(Relation relation, ItemPointer tid, Snapshot snapshot, @@ -44,6 +73,15 @@ extern bool storage_fetch(Relation relation, bool keep_buf, Relation stats_relation); +extern bool storage_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, + Snapshot snapshot, HeapTuple heapTuple, + bool *all_dead, bool first_call); +extern bool storage_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot, + bool *all_dead); + +extern bool storage_freeze_tuple(Relation rel, HeapTupleHeader tuple, TransactionId cutoff_xid, + TransactionId cutoff_multi); + extern HTSU_Result storage_lock_tuple(Relation relation, ItemPointer tid, StorageTuple *stuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, @@ -72,10 +110,6 @@ extern bool storage_tuple_is_heaponly(Relation relation, StorageTuple tuple); extern StorageTuple storage_tuple_by_datum(Relation relation, Datum data, Oid tableoid); -extern void storage_get_latest_tid(Relation relation, - Snapshot snapshot, - ItemPointer tid); - extern void storage_sync(Relation rel); #endif diff --git a/src/include/executor/functions.h b/src/include/executor/functions.h index 718d894..7f9bef1 100644 --- a/src/include/executor/functions.h +++ b/src/include/executor/functions.h @@ -22,7 +22,7 @@ typedef struct SQLFunctionParseInfo *SQLFunctionParseInfoPtr; extern Datum fmgr_sql(PG_FUNCTION_ARGS); -extern SQLFunctionParseInfoPtr prepare_sql_fn_parse_info(HeapTuple procedureTuple, +extern SQLFunctionParseInfoPtr prepare_sql_fn_parse_info(StorageTuple procedureTuple, Node *call_expr, Oid inputCollation); diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h index acade7e..d466c99 100644 --- a/src/include/executor/spi.h +++ b/src/include/executor/spi.h @@ -25,7 +25,7 @@ typedef struct SPITupleTable uint64 alloced; /* # of alloced vals */ uint64 free; /* # of free vals */ TupleDesc tupdesc; /* tuple descriptor */ - HeapTuple *vals; /* tuples */ + StorageTuple *vals; /* tuples */ slist_node next; /* link for internal bookkeeping */ SubTransactionId subid; /* subxact in which tuptable was created */ } SPITupleTable; @@ -117,9 +117,9 @@ extern const char *SPI_result_code_string(int code); extern List *SPI_plan_get_plan_sources(SPIPlanPtr plan); extern CachedPlan *SPI_plan_get_cached_plan(SPIPlanPtr plan); -extern HeapTuple SPI_copytuple(HeapTuple tuple); -extern HeapTupleHeader SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc); -extern HeapTuple SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, +extern StorageTuple SPI_copytuple(StorageTuple tuple); +extern HeapTupleHeader SPI_returntuple(StorageTuple tuple, TupleDesc tupdesc); +extern StorageTuple SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum, Datum *Values, const char *Nulls); extern int SPI_fnumber(TupleDesc tupdesc, const char *fname); extern char *SPI_fname(TupleDesc tupdesc, int fnumber); @@ -133,7 +133,7 @@ extern void *SPI_palloc(Size size); extern void *SPI_repalloc(void *pointer, Size size); extern void SPI_pfree(void *pointer); extern Datum SPI_datumTransfer(Datum value, bool typByVal, int typLen); -extern void SPI_freetuple(HeapTuple pointer); +extern void SPI_freetuple(StorageTuple pointer); extern void SPI_freetuptable(SPITupleTable *tuptable); extern Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, diff --git a/src/include/executor/tqueue.h b/src/include/executor/tqueue.h index a717ac6..4156767 100644 --- a/src/include/executor/tqueue.h +++ b/src/include/executor/tqueue.h @@ -27,7 +27,7 @@ extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle); extern TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle, TupleDesc tupledesc); extern void DestroyTupleQueueReader(TupleQueueReader *reader); -extern HeapTuple TupleQueueReaderNext(TupleQueueReader *reader, +extern StorageTuple TupleQueueReaderNext(TupleQueueReader *reader, bool nowait, bool *done); #endif /* TQUEUE_H */ diff --git a/src/include/funcapi.h b/src/include/funcapi.h index 951af2a..ab0e091 100644 --- a/src/include/funcapi.h +++ b/src/include/funcapi.h @@ -229,7 +229,7 @@ extern TupleDesc TypeGetTupleDesc(Oid typeoid, List *colaliases); /* from execTuples.c */ extern TupleDesc BlessTupleDesc(TupleDesc tupdesc); extern AttInMetadata *TupleDescGetAttInMetadata(TupleDesc tupdesc); -extern HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values); +extern StorageTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values); extern Datum HeapTupleHeaderGetDatum(HeapTupleHeader tuple); extern TupleTableSlot *TupleDescGetSlot(TupleDesc tupdesc); -- 2.7.4.windows.1