diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 4d8ad754f8..50b55f5d01 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -817,6 +817,18 @@ analyze threshold = analyze base threshold + analyze scale factor * number of tu + For declaratively partitioned tables, only analyze is supported. + The same analyze threshold defined above is used, + but the number of tuples is sum of their childrens' + pg_class.reltuples. + Also, total number of tuples inserted, updated, or deleted since the last + ANALYZE compared with the threshold is calculated by adding up + childrens' number of tuples analyzed in the previous ANALYZE. + This is because partitioned tables don't have any data. So analyze on partitioned + tables are one lap behind their children. + + + Temporary tables cannot be accessed by autovacuum. Therefore, appropriate vacuum and analyze operations should be performed via session SQL commands. diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 569f4c9da7..b4ad435966 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1337,8 +1337,6 @@ WITH ( MODULUS numeric_literal, REM If a table parameter value is set and the equivalent toast. parameter is not, the TOAST table will use the table's parameter value. - Specifying these parameters for partitioned tables is not supported, - but you may specify them for individual leaf partitions. diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 8ccc228a8c..35bc2e5bdb 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -108,7 +108,7 @@ static relopt_bool boolRelOpts[] = { "autovacuum_enabled", "Enables autovacuum in this relation", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST | RELOPT_KIND_PARTITIONED, ShareUpdateExclusiveLock }, true @@ -246,7 +246,7 @@ static relopt_int intRelOpts[] = { "autovacuum_analyze_threshold", "Minimum number of tuple inserts, updates or deletes prior to analyze", - RELOPT_KIND_HEAP, + RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED, ShareUpdateExclusiveLock }, -1, 0, INT_MAX @@ -420,7 +420,7 @@ static relopt_real realRelOpts[] = { "autovacuum_analyze_scale_factor", "Number of tuple inserts, updates or deletes prior to analyze as a fraction of reltuples", - RELOPT_KIND_HEAP, + RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED, ShareUpdateExclusiveLock }, -1, 0.0, 100.0 @@ -1961,13 +1961,12 @@ build_local_reloptions(local_relopts *relopts, Datum options, bool validate) bytea * partitioned_table_reloptions(Datum reloptions, bool validate) { + /* - * There are no options for partitioned tables yet, but this is able to do - * some validation. + * autovacuum_enabled, autovacuum_analyze_threshold and + * autovacuum_analyze_scale_factor are supported for partitioned tables. */ - return (bytea *) build_reloptions(reloptions, validate, - RELOPT_KIND_PARTITIONED, - 0, NULL, 0); + return default_reloptions(reloptions, validate, RELOPT_KIND_PARTITIONED); } /* diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index b140c210bc..d7762aa3eb 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -591,7 +591,7 @@ CREATE VIEW pg_stat_all_tables AS FROM pg_class C LEFT JOIN pg_index I ON C.oid = I.indrelid LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) - WHERE C.relkind IN ('r', 't', 'm') + WHERE C.relkind IN ('r', 't', 'm', 'p') GROUP BY C.oid, N.nspname, C.relname; CREATE VIEW pg_stat_xact_all_tables AS @@ -611,7 +611,7 @@ CREATE VIEW pg_stat_xact_all_tables AS FROM pg_class C LEFT JOIN pg_index I ON C.oid = I.indrelid LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) - WHERE C.relkind IN ('r', 't', 'm') + WHERE C.relkind IN ('r', 't', 'm', 'p') GROUP BY C.oid, N.nspname, C.relname; CREATE VIEW pg_stat_sys_tables AS diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 8af12b5c6b..9feb21f660 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -30,6 +30,7 @@ #include "catalog/catalog.h" #include "catalog/index.h" #include "catalog/indexing.h" +#include "catalog/partition.h" #include "catalog/pg_collation.h" #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" @@ -38,6 +39,7 @@ #include "commands/progress.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" +#include "common/hashfn.h" #include "executor/executor.h" #include "foreign/fdwapi.h" #include "miscadmin.h" @@ -107,6 +109,45 @@ static void update_attstats(Oid relid, bool inh, static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); +typedef struct analyze_oident +{ + Oid oid; + char status; +} analyze_oident; + +StaticAssertDecl(sizeof(Oid) == 4, "oid is not compatible with uint32"); +#define SH_PREFIX analyze_oids +#define SH_ELEMENT_TYPE analyze_oident +#define SH_KEY_TYPE Oid +#define SH_KEY oid +#define SH_HASH_KEY(tb, key) hash_bytes_uint32(key) +#define SH_EQUAL(tb, a, b) (a == b) +#define SH_SCOPE static inline +#define SH_DEFINE +#define SH_DECLARE +#include "lib/simplehash.h" + +#define ANALYZED_OIDS_HASH_SIZE 128 +analyze_oids_hash *analyzed_reloids = NULL; + +void +analyze_init_status(void) +{ + if (analyzed_reloids) + analyze_oids_destroy(analyzed_reloids); + + analyzed_reloids = analyze_oids_create(CurrentMemoryContext, + ANALYZED_OIDS_HASH_SIZE, NULL); +} + +void +analyze_destroy_status(void) +{ + if (analyzed_reloids) + analyze_oids_destroy(analyzed_reloids); + + analyzed_reloids = NULL; +} /* * analyze_rel() -- analyze one relation @@ -312,6 +353,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, Oid save_userid; int save_sec_context; int save_nestlevel; + bool found; if (inh) ereport(elevel, @@ -644,15 +686,70 @@ do_analyze_rel(Relation onerel, VacuumParams *params, } /* - * Report ANALYZE to the stats collector, too. However, if doing - * inherited stats we shouldn't report, because the stats collector only - * tracks per-table stats. Reset the changes_since_analyze counter only - * if we analyzed all columns; otherwise, there is still work for - * auto-analyze to do. + * Report ANALYZE to the stats collector, too. Regarding inherited stats, + * we report only in the case of declarative partitioning. Reset the + * changes_since_analyze counter only if we analyzed all columns; + * otherwise, there is still work for auto-analyze to do. */ - if (!inh) + if (!inh || onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + List *ancestors; + ListCell *lc; + Datum oiddatum = ObjectIdGetDatum(RelationGetRelid(onerel)); + Datum countdatum; + int64 change_count; + + if (onerel->rd_rel->relispartition && + !(onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) + { + + /* collect all ancestors of this relation */ + ancestors = get_partition_ancestors(RelationGetRelid(onerel)); + + /* + * Read current value of n_mod_since_analyze of this relation. This + * might be a bit stale but we don't need such correctness here. + */ + countdatum = + DirectFunctionCall1(pg_stat_get_mod_since_analyze, oiddatum); + change_count = DatumGetInt64(countdatum); + + /* + * To let partitioned relations be analyzed, we need to update + * change_since_analyze also for partitioned relations, which don't + * have storage. We move the count of leaf-relations to ancestors + * before resetting. We could instead bump up the counter of all + * ancestors every time leaf relations are updated but that is too + * complex. + */ + foreach (lc, ancestors) + { + Oid toreloid = lfirst_oid(lc); + + /* + * Don't propagate the count to anscestors that have already been + * analyzed in this analyze command or this iteration of + * autoanalyze. + */ + if (analyze_oids_lookup(analyzed_reloids, toreloid) == NULL) + { + Relation rel; + + rel = table_open(toreloid, AccessShareLock); + pgstat_report_partchanges(rel, change_count); + table_close(rel, AccessShareLock); + } + + } + list_free(ancestors); + } pgstat_report_analyze(onerel, totalrows, totaldeadrows, (va_cols == NIL)); + } + + /* Record this relation as "analyzed" */ + analyze_oids_insert(analyzed_reloids, onerel->rd_id, &found); + /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */ if (!(params->options & VACOPT_VACUUM)) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 98270a1049..336d9e297a 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -437,6 +437,7 @@ vacuum(List *relations, VacuumParams *params, VacuumSharedCostBalance = NULL; VacuumActiveNWorkers = NULL; + analyze_init_status(); /* * Loop to process each selected relation. */ @@ -487,6 +488,7 @@ vacuum(List *relations, VacuumParams *params, { in_vacuum = false; VacuumCostActive = false; + analyze_destroy_status(); } PG_END_TRY(); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 7e28944d2f..3c18602e76 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -75,6 +75,7 @@ #include "catalog/dependency.h" #include "catalog/namespace.h" #include "catalog/pg_database.h" +#include "catalog/pg_inherits.h" #include "commands/dbcommands.h" #include "commands/vacuum.h" #include "lib/ilist.h" @@ -2056,11 +2057,11 @@ do_autovacuum(void) * Scan pg_class to determine which tables to vacuum. * * We do this in two passes: on the first one we collect the list of plain - * relations and materialized views, and on the second one we collect - * TOAST tables. The reason for doing the second pass is that during it we - * want to use the main relation's pg_class.reloptions entry if the TOAST - * table does not have any, and we cannot obtain it unless we know - * beforehand what's the main table OID. + * relations, materialized views and partitioned tables, and on the second + * one we collect TOAST tables. The reason for doing the second pass is that + * during it we want to use the main relation's pg_class.reloptions entry + * if the TOAST table does not have any, and we cannot obtain it unless we + * know beforehand what's the main table OID. * * We need to check TOAST tables separately because in cases with short, * wide tables there might be proportionally much more activity in the @@ -2083,7 +2084,8 @@ do_autovacuum(void) bool wraparound; if (classForm->relkind != RELKIND_RELATION && - classForm->relkind != RELKIND_MATVIEW) + classForm->relkind != RELKIND_MATVIEW && + classForm->relkind != RELKIND_PARTITIONED_TABLE) continue; relid = classForm->oid; @@ -2746,6 +2748,7 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc) Assert(((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_RELATION || ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW || + ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_PARTITIONED_TABLE || ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE); relopts = extractRelOptions(tup, pg_class_desc, NULL); @@ -3161,7 +3164,41 @@ relation_needs_vacanalyze(Oid relid, */ if (PointerIsValid(tabentry) && AutoVacuumingActive()) { - reltuples = classForm->reltuples; + if (classForm->relkind != RELKIND_PARTITIONED_TABLE) + reltuples = classForm->reltuples; + else + { + /* + * If the relation is a partitioned table, we must add up childrens' + * reltuples. + */ + List *children; + ListCell *lc; + + reltuples = 0; + + /* Find all members of inheritance set taking AccessShareLock */ + children = find_all_inheritors(relid, AccessShareLock, NULL); + + foreach(lc, children) + { + Oid childOID = lfirst_oid(lc); + HeapTuple childtuple; + Form_pg_class childclass; + + childtuple = SearchSysCache1(RELOID, ObjectIdGetDatum(childOID)); + childclass = (Form_pg_class) GETSTRUCT(childtuple); + + /* Skip a partitioned table and foreign partitions */ + if (RELKIND_HAS_STORAGE(childclass->relkind)) + { + /* Sum up the child's reltuples for its parent table */ + reltuples += childclass->reltuples; + } + ReleaseSysCache(childtuple); + } + } + vactuples = tabentry->n_dead_tuples; instuples = tabentry->inserts_since_vacuum; anltuples = tabentry->changes_since_analyze; diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 7c75a25d21..76075007bf 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -369,6 +369,7 @@ static void pgstat_recv_resetreplslotcounter(PgStat_MsgResetreplslotcounter *msg static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len); static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len); static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len); +static void pgstat_recv_partchanges(PgStat_MsgPartChanges *msg, int len); static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len); static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); static void pgstat_recv_wal(PgStat_MsgWal *msg, int len); @@ -1567,6 +1568,9 @@ pgstat_report_vacuum(Oid tableoid, bool shared, * * Caller must provide new live- and dead-tuples estimates, as well as a * flag indicating whether to reset the changes_since_analyze counter. + * Exceptional support only changes_since_analyze for partitioned tables, + * though they don't have any data. This counter will tell us whether + * partitioned tables need autoanalyze or not. * -------- */ void @@ -1587,22 +1591,29 @@ pgstat_report_analyze(Relation rel, * off these counts from what we send to the collector now, else they'll * be double-counted after commit. (This approach also ensures that the * collector ends up with the right numbers if we abort instead of - * committing.) + * committing.) However, for partitioned tables, we will not report both + * livetuples and deadtuples because those tables don't have any data. */ if (rel->pgstat_info != NULL) { PgStat_TableXactStatus *trans; - for (trans = rel->pgstat_info->trans; trans; trans = trans->upper) + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + /* If this rel is partitioned, skip modifying */ + livetuples = deadtuples = 0; + else { - livetuples -= trans->tuples_inserted - trans->tuples_deleted; - deadtuples -= trans->tuples_updated + trans->tuples_deleted; + for (trans = rel->pgstat_info->trans; trans; trans = trans->upper) + { + livetuples -= trans->tuples_inserted - trans->tuples_deleted; + deadtuples -= trans->tuples_updated + trans->tuples_deleted; + } + /* count stuff inserted by already-aborted subxacts, too */ + deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples; + /* Since ANALYZE's counts are estimates, we could have underflowed */ + livetuples = Max(livetuples, 0); + deadtuples = Max(deadtuples, 0); } - /* count stuff inserted by already-aborted subxacts, too */ - deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples; - /* Since ANALYZE's counts are estimates, we could have underflowed */ - livetuples = Max(livetuples, 0); - deadtuples = Max(deadtuples, 0); } pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE); @@ -1617,6 +1628,30 @@ pgstat_report_analyze(Relation rel, } /* -------- + * pgstat_report_partchanges() - + * + * + * Called when a leaf partition is analyzed to tell the collector about + * its parent's changed_tuples. + * -------- + */ +void +pgstat_report_partchanges(Relation rel, PgStat_Counter changed_tuples) +{ + PgStat_MsgPartChanges msg; + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_PARTCHANGES); + msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId; + msg.m_tableoid = RelationGetRelid(rel); + msg.m_changed_tuples = changed_tuples; + pgstat_send(&msg, sizeof(msg)); +} + + +/* -------- * pgstat_report_recovery_conflict() - * * Tell the collector about a Hot Standby recovery conflict. @@ -1932,7 +1967,8 @@ pgstat_initstats(Relation rel) char relkind = rel->rd_rel->relkind; /* We only count stats for things that have storage */ - if (!RELKIND_HAS_STORAGE(relkind)) + if (!RELKIND_HAS_STORAGE(relkind) && + relkind != RELKIND_PARTITIONED_TABLE) { rel->pgstat_info = NULL; return; @@ -4870,6 +4906,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_analyze(&msg.msg_analyze, len); break; + case PGSTAT_MTYPE_PARTCHANGES: + pgstat_recv_partchanges(&msg.msg_partchanges, len); + break; + case PGSTAT_MTYPE_ARCHIVER: pgstat_recv_archiver(&msg.msg_archiver, len); break; @@ -6739,6 +6779,18 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len) } } +static void +pgstat_recv_partchanges(PgStat_MsgPartChanges *msg, int len) +{ + PgStat_StatDBEntry *dbentry; + PgStat_StatTabEntry *tabentry; + + dbentry = pgstat_get_db_entry(msg->m_databaseid, true); + + tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true); + + tabentry->changes_since_analyze += msg->m_changed_tuples; +} /* ---------- * pgstat_recv_archiver() - diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index a4cd721400..c6dcf23898 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -280,6 +280,8 @@ extern Relation vacuum_open_relation(Oid relid, RangeVar *relation, int options, bool verbose, LOCKMODE lmode); /* in commands/analyze.c */ +extern void analyze_init_status(void); +extern void analyze_destroy_status(void); extern void analyze_rel(Oid relid, RangeVar *relation, VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5954068dec..d5a0ec6467 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -60,6 +60,7 @@ typedef enum StatMsgType PGSTAT_MTYPE_AUTOVAC_START, PGSTAT_MTYPE_VACUUM, PGSTAT_MTYPE_ANALYZE, + PGSTAT_MTYPE_PARTCHANGES, PGSTAT_MTYPE_ARCHIVER, PGSTAT_MTYPE_BGWRITER, PGSTAT_MTYPE_WAL, @@ -419,6 +420,18 @@ typedef struct PgStat_MsgAnalyze PgStat_Counter m_dead_tuples; } PgStat_MsgAnalyze; +/* ---------- + * PgStat_MsgPartChanges Sent by the autovacuum deamon + * after ANALYZE of leaf partitions + * ---------- + */ +typedef struct PgStat_MsgPartChanges +{ + PgStat_MsgHdr m_hdr; + Oid m_databaseid; + Oid m_tableoid; + PgStat_Counter m_changed_tuples; +} PgStat_MsgPartChanges; /* ---------- * PgStat_MsgArchiver Sent by the archiver to update statistics. @@ -643,6 +656,7 @@ typedef union PgStat_Msg PgStat_MsgAutovacStart msg_autovacuum_start; PgStat_MsgVacuum msg_vacuum; PgStat_MsgAnalyze msg_analyze; + PgStat_MsgPartChanges msg_partchanges; PgStat_MsgArchiver msg_archiver; PgStat_MsgBgWriter msg_bgwriter; PgStat_MsgWal msg_wal; @@ -1393,7 +1407,7 @@ extern void pgstat_report_vacuum(Oid tableoid, bool shared, extern void pgstat_report_analyze(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, bool resetcounter); - +extern void pgstat_report_partchanges(Relation rel, PgStat_Counter changed_tuples); extern void pgstat_report_recovery_conflict(int reason); extern void pgstat_report_deadlock(void); extern void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 6293ab57bc..e44f82ec73 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1804,7 +1804,7 @@ pg_stat_all_tables| SELECT c.oid AS relid, FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) - WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char"])) + WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char", 'p'::"char"])) GROUP BY c.oid, n.nspname, c.relname; pg_stat_archiver| SELECT s.archived_count, s.last_archived_wal, @@ -2175,7 +2175,7 @@ pg_stat_xact_all_tables| SELECT c.oid AS relid, FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) - WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char"])) + WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char", 'p'::"char"])) GROUP BY c.oid, n.nspname, c.relname; pg_stat_xact_sys_tables| SELECT pg_stat_xact_all_tables.relid, pg_stat_xact_all_tables.schemaname,