From f640e1b75b09999644ac41b006ac2c69b81b7ddf Mon Sep 17 00:00:00 2001 From: Claudio Freire Date: Fri, 28 Jul 2017 21:31:59 -0300 Subject: [PATCH 1/4] Vacuum: Update FSM more frequently Make Vacuum update the FSM more frequently, to avoid the case where autovacuum fails to reach the point where it updates the FSM in highly contended tables. Implement heap block range FSM vacuuming, and make vacuumlazy use it to vacuum the affected FSM at intermediate steps. Intermediate FSM vacuums are only supposed to make enough free space visible to avoid extension until the final (non-partial) FSM vacuum. Run partial FSM vacuums after each index pass, which is a point at which whole ranges of the heap have been thorougly cleaned, and we can expect no further updates to those ranges of the FSM save for concurrent activity. When there are no indexes, and thus no index passes, run partial FSM vacuums every 8GB of dirtied pages or 1/8th of the relation, whichever is highest. This allows some partial work to be made visible without incurring quadratic cost. In any case, FSM are small in relation to the table, so even when quadratic cost is involved, it should not be problematic. Index passes already incur quadratic cost, and the addition of the FSM is unlikely to be measurable. --- src/backend/commands/vacuumlazy.c | 57 +++++++++++++++++++++++++- src/backend/storage/freespace/freespace.c | 67 ++++++++++++++++++++++++++++--- src/include/storage/freespace.h | 2 + 3 files changed, 118 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index f9da24c..c44996f 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -85,6 +85,19 @@ #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ /* + * When a table has no indexes, vacuum the FSM at most every 1/Nth + * of the relation has been vacuumed to prevent bloat during long-running + * vacuums. This specifies the N. + */ +#define VACUUM_FSM_EVERY_FRACTION 8 + +/* + * When a table has no indexes, vacuum the FSM at most every 8GB + * worth of dirty pages. + */ +#define VACUUM_FSM_EVERY_PAGES (((Size)8 * 1024 * 1024 * 1024) / BLCKSZ) + +/* * Guesstimation of number of dead tuples per page. This is used to * provide an upper limit to memory allocated when vacuuming small * tables. @@ -465,7 +478,10 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid; TransactionId relminmxid = onerel->rd_rel->relminmxid; BlockNumber empty_pages, - vacuumed_pages; + vacuumed_pages, + freespace_updated_pages, + vacuum_fsm_every_pages, + last_fsm_vacuumed_block; double num_tuples, /* total number of nonremovable tuples */ live_tuples, /* live tuples (reltuples estimate) */ tups_vacuumed, /* tuples cleaned up by vacuum */ @@ -500,8 +516,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, get_namespace_name(RelationGetNamespace(onerel)), relname))); - empty_pages = vacuumed_pages = 0; + empty_pages = vacuumed_pages = freespace_updated_pages = 0; num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0; + last_fsm_vacuumed_block = (BlockNumber) 0; indstats = (IndexBulkDeleteResult **) palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); @@ -513,6 +530,16 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vacrelstats->nonempty_pages = 0; vacrelstats->latestRemovedXid = InvalidTransactionId; + /* + * Vacuum the FSM a few times in the middle if the relation is big + * and has no indexes. Once every some amount of dirtied pages, or + * fraction of the relation, whatever is bigger, to avoid quadratic cost. + * If it has indexes, this is ignored, and the FSM is vacuumed after + * each index pass. + */ + vacuum_fsm_every_pages = nblocks / VACUUM_FSM_EVERY_FRACTION; + vacuum_fsm_every_pages = Max(vacuum_fsm_every_pages, VACUUM_FSM_EVERY_PAGES); + lazy_space_alloc(vacrelstats, nblocks); frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); @@ -752,12 +779,30 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, vacrelstats->num_dead_tuples = 0; vacrelstats->num_index_scans++; + /* + * Vacuum the Free Space Map to make the changes we made visible. + */ + FreeSpaceMapVacuumRange(onerel, last_fsm_vacuumed_block, blkno); + last_fsm_vacuumed_block = blkno; + /* Report that we are once again scanning the heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_SCAN_HEAP); } /* + * If there are no indexes then we should periodically vacuum the FSM + * on huge relations to make free space visible early. + */ + else if (nindexes == 0 && freespace_updated_pages > vacuum_fsm_every_pages) + { + /* Vacuum the Free Space Map */ + FreeSpaceMapVacuumRange(onerel, last_fsm_vacuumed_block, blkno); + freespace_updated_pages = 0; + last_fsm_vacuumed_block = blkno; + } + + /* * Pin the visibility map page in case we need to mark the page * all-visible. In most cases this will be very cheap, because we'll * already have the correct page pinned anyway. However, it's @@ -873,7 +918,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, MarkBufferDirty(buf); UnlockReleaseBuffer(buf); + freespace_updated_pages++; RecordPageWithFreeSpace(onerel, blkno, freespace); + continue; } @@ -912,6 +959,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, } UnlockReleaseBuffer(buf); + freespace_updated_pages++; RecordPageWithFreeSpace(onerel, blkno, freespace); continue; } @@ -1200,6 +1248,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, */ vacrelstats->num_dead_tuples = 0; vacuumed_pages++; + freespace_updated_pages++; } freespace = PageGetHeapFreeSpace(page); @@ -1302,7 +1351,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * taken if there are no indexes.) */ if (vacrelstats->num_dead_tuples == prev_dead_count) + { RecordPageWithFreeSpace(onerel, blkno, freespace); + } } /* report that everything is scanned and vacuumed */ @@ -1425,6 +1476,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * space on their pages. Pages not having dead tuples recorded from * lazy_scan_heap are not visited at all. * + * Returns the maximum amount of free space on vacuumed pages. + * * Note: the reason for doing this as a second pass is we cannot remove * the tuples until we've removed their index entries, and we want to * process index entry removal in batches as large as possible. diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index dd8ae52..6fce3ab 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -108,7 +108,8 @@ static Size fsm_space_cat_to_avail(uint8 cat); static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue); static BlockNumber fsm_search(Relation rel, uint8 min_cat); -static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof); +static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof, + BlockNumber start, BlockNumber end); static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr); static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat); @@ -384,7 +385,25 @@ FreeSpaceMapVacuum(Relation rel) * Traverse the tree in depth-first order. The tree is stored physically * in depth-first order, so this should be pretty I/O efficient. */ - fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy); + fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy, + (BlockNumber) 0, MaxBlockNumber); +} + +/* + * FreeSpaceMapVacuumRange - scan and fix any inconsistencies in the FSM + * + * Only FSM slots covering the given block range will be visited. + */ +void +FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end) +{ + bool dummy; + + /* + * Traverse the tree in depth-first order. The tree is stored physically + * in depth-first order, so this should be pretty I/O efficient. + */ + fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy, start, end); } /******** Internal routines ********/ @@ -783,13 +802,19 @@ fsm_search(Relation rel, uint8 min_cat) /* * Recursive guts of FreeSpaceMapVacuum + * + * If threshold is nonzero, a partial scan is made, skipping branches + * that already contain that much free space recorded. */ static uint8 -fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) +fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p, + BlockNumber start, BlockNumber end) { Buffer buf; Page page; uint8 max_avail; + FSMAddress fsm_start, fsm_end; + uint16 fsm_start_slot, fsm_end_slot; /* Read the page if it exists, or return EOF */ buf = fsm_readbuf(rel, addr, false); @@ -809,10 +834,39 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) */ if (addr.level > FSM_BOTTOM_LEVEL) { - int slot; + int slot, start_slot, end_slot; bool eof = false; - for (slot = 0; slot < SlotsPerFSMPage; slot++) + /* + * Get the location in the FSM of the target block/slot range + * at the right level to know which slots to recurse into + */ + fsm_start = fsm_get_location(start, &fsm_start_slot); + fsm_end = fsm_get_location(end, &fsm_end_slot); + + while (fsm_start.level < addr.level) + { + fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot); + fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot); + } + + Assert(fsm_start.level == addr.level); + + if (fsm_start.logpageno == addr.logpageno) + start_slot = fsm_start_slot; + else if (fsm_start.logpageno > addr.logpageno) + start_slot = SlotsPerFSMPage; + else + start_slot = 0; + + if (fsm_end.logpageno == addr.logpageno) + end_slot = fsm_end_slot+1; + else if (fsm_end.logpageno < addr.logpageno) + end_slot = 0; + else + end_slot = SlotsPerFSMPage; + + for (slot = start_slot; slot < end_slot; slot++) { int child_avail; @@ -820,7 +874,8 @@ fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) /* After we hit end-of-file, just clear the rest of the slots */ if (!eof) - child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof); + child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof, + start, end); else child_avail = 0; diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h index a517d7e..13f6380 100644 --- a/src/include/storage/freespace.h +++ b/src/include/storage/freespace.h @@ -32,6 +32,8 @@ extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks); extern void FreeSpaceMapVacuum(Relation rel); +extern void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, + BlockNumber end); extern void UpdateFreeSpaceMap(Relation rel, BlockNumber startBlkNum, BlockNumber endBlkNum, -- 1.8.4.5