diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 277639f..75c3473 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -157,7 +157,7 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, FileFdwPlanState *fdw_private, Cost *startup_cost, Cost *total_cost); static int file_acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows); @@ -1071,9 +1071,10 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, */ static int file_acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows) { + int targrows = *(int *) target_info; int numrows = 0; double rowstoskip = -1; /* -1 means not set yet */ ReservoirStateData rstate; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index e8cb2d0..215aa7a 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -391,7 +391,7 @@ static void process_query_params(ExprContext *econtext, List *param_exprs, const char **param_values); static int postgresAcquireSampleRowsFunc(Relation relation, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows); static void analyze_row_processor(PGresult *res, int row, @@ -3551,10 +3551,11 @@ postgresAnalyzeForeignTable(Relation relation, */ static int postgresAcquireSampleRowsFunc(Relation relation, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows) { + int targrows = *(int *) target_info; PgFdwAnalyzeState astate; ForeignTable *table; ForeignServer *server; diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index e930731..a5b2f7f 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -521,6 +521,13 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser VACUUM, showing current progress. See . + + + pg_stat_progress_analyzepg_stat_progress_analyze + One row for each backend running + ANALYZE, showing current progress. + See . + @@ -3175,9 +3182,8 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, PostgreSQL has the ability to report the progress of - certain commands during command execution. Currently, the only command - which supports progress reporting is VACUUM. This may be - expanded in the future. + certain commands during command execution. Currently, the supported progress + reporting commands are VACUUM and ANALYZE. @@ -3367,7 +3373,149 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, + + + + ANALYZE Progress Reporting + + + Whenever ANALYZE is running, the + pg_stat_progress_analyze view will contain + one row for each backend that is currently analyzing. + The tables below describe the information that will be reported and + provide information about how to interpret it. + + + + <structname>pg_stat_progress_analyze</structname> View + + + + Column + Type + Description + + + + + pid + integer + Process ID of backend. + + + datid + oid + OID of the database to which this backend is connected. + + + datname + name + Name of the database to which this backend is connected. + + + relid + oid + OID of the table being analyzed. + + + phase + text + + Current processing phase of analyze. See . + + + + num_target_sample_rows + bigint + + Total number of sample rows of the relation. The sample it reads + is taken randomly. Its size depends on the + parameter value. + + + + num_rows_sampled + bigint + + Total number of rows sampled so far. If the table being analyzed + has one or more children, ANALYZE will gather sample + rows twice. Once on the rows of parent table only and second time + on the rows of parent table with all of its children. + + + + num_rows_processed + bigint + + Total number of rows processed so far. + + + + +
+ + + ANALYZE phases + + + + Phase + Description + + + + + + initializing + + ANALYZE is preparing to collect sample rows. + + + + collecting sample rows + + ANALYZE is currently collecting the sample rows of + single relation. The sample it reads is taken randomly. Its size + depends on the + parameter value. + + + + collecting inherited sample rows + + ANALYZE is currently collecting inherited sample rows. + If the table being analyzed has one or more children, this phase will + collect sample rows of parent table and all associated child relation. + The sample it reads is taken randomly. Its size depends on + the parameter value. + + + + computing statistics + + On the collected sample ANALYZE is currently computing some + statistical informations, such as the percentage of NULL values, + the average width of a row, the number of distinct values etc. + It also stores the most common values and their frequencies. + The number of these values depends on the value of the + parameter. + + + + computing inherited statistics + + On the collected sample ANALYZE is currently computing some + statistical informations of child tables, such as the percentage of NULL values, + the average width of a row, the number of distinct values etc. + It also stores the most common values and their frequencies. + The number of these values depends on the value of the + parameter. + + + + +
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 80d1429..8b6e4c0 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -876,6 +876,22 @@ CREATE VIEW pg_stat_progress_vacuum AS FROM pg_stat_get_progress_info('VACUUM') AS S LEFT JOIN pg_database D ON S.datid = D.oid; +CREATE VIEW pg_stat_progress_analyze AS + SELECT + S.pid AS pid, S.datid AS datid, D.datname AS datname, + S.relid AS relid, + CASE S.param1 WHEN 0 THEN 'initializing' + WHEN 1 THEN 'collecting sample rows' + WHEN 2 THEN 'collecting inherited sample rows' + WHEN 3 THEN 'computing statistics' + WHEN 4 THEN 'computing inherited statistics' + END AS phase, + S.param2 AS num_target_sample_rows, + S.param3 AS num_rows_sampled, + S.param4 AS num_rows_processed + FROM pg_stat_get_progress_info('ANALYZE') AS S + LEFT JOIN pg_database D ON S.datid = D.oid; + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 055338f..8e18c23 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -31,6 +31,7 @@ #include "commands/dbcommands.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" +#include "commands/progress.h" #include "executor/executor.h" #include "foreign/fdwapi.h" #include "miscadmin.h" @@ -70,6 +71,12 @@ typedef struct AnlIndexData /* Default statistics target (GUC parameter) */ int default_statistics_target = 100; +typedef struct AcquireSampleRowsInfo +{ + int rows_collected; + int rows_to_collect; +} AcquireSampleRowsInfo; + /* A few variables that don't seem worth passing around as parameters */ static MemoryContext anl_context = NULL; static BufferAccessStrategy vac_strategy; @@ -86,7 +93,7 @@ static void compute_index_stats(Relation onerel, double totalrows, static VacAttrStats *examine_attribute(Relation onerel, int attnum, Node *index_expr); static int acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows); static int compare_rows(const void *a, const void *b); static int acquire_inherited_sample_rows(Relation onerel, int elevel, @@ -257,10 +264,16 @@ analyze_rel(Oid relid, RangeVar *relation, int options, MyPgXact->vacuumFlags |= PROC_IN_ANALYZE; LWLockRelease(ProcArrayLock); + /* Report that we are going to start analyzing onerel. */ + pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE, + RelationGetRelid(onerel)); + /* * Do the normal non-recursive ANALYZE. We can skip this for partitioned * tables, which don't contain any rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COLLECT_SAMPLE_ROWS); if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages, false, in_outer_xact, elevel); @@ -269,8 +282,15 @@ analyze_rel(Oid relid, RangeVar *relation, int options, * If there are child tables, do recursive ANALYZE. */ if (onerel->rd_rel->relhassubclass) + { + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COLLECT_INH_SAMPLE_ROWS); do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages, true, in_outer_xact, elevel); + } + + /* We're done analyzing. */ + pgstat_progress_end_command(); /* * Close source relation now, but keep lock so that no one deletes it @@ -487,15 +507,24 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, /* * Acquire the sample rows */ + + /* Report the number of target sample rows */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_TARGET_SAMPLE_ROWS, + targrows); + rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); if (inh) numrows = acquire_inherited_sample_rows(onerel, elevel, rows, targrows, &totalrows, &totaldeadrows); else + { + AcquireSampleRowsInfo target = {0, targrows}; + numrows = (*acquirefunc) (onerel, elevel, - rows, targrows, + rows, (void *) &target, &totalrows, &totaldeadrows); + } /* * Compute the statistics. Temporary results during the calculations for @@ -503,6 +532,15 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, * responsible to make sure that whatever they store into the VacAttrStats * structure is allocated in anl_context. */ + + /* Report that statistics will now be computed. */ + if (inh) + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COMPUTE_INH_STATS); + else + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COMPUTE_STATS); + if (numrows > 0) { MemoryContext col_context, @@ -540,6 +578,9 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, } MemoryContextResetAndDeleteChildren(col_context); + + /* Reset rows processed to zero for the next column */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_PROCESSED, 0); } if (hasindex) @@ -982,7 +1023,7 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr) */ static int acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows) { int numrows = 0; /* # rows now in reservoir */ @@ -994,8 +1035,9 @@ acquire_sample_rows(Relation onerel, int elevel, TransactionId OldestXmin; BlockSamplerData bs; ReservoirStateData rstate; + AcquireSampleRowsInfo *target = (AcquireSampleRowsInfo *) target_info; - Assert(targrows > 0); + Assert(target->rows_to_collect > 0); totalblocks = RelationGetNumberOfBlocks(onerel); @@ -1003,9 +1045,9 @@ acquire_sample_rows(Relation onerel, int elevel, OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM); /* Prepare for sampling block numbers */ - BlockSampler_Init(&bs, totalblocks, targrows, random()); + BlockSampler_Init(&bs, totalblocks, target->rows_to_collect, random()); /* Prepare for sampling rows */ - reservoir_init_selection_state(&rstate, targrows); + reservoir_init_selection_state(&rstate, target->rows_to_collect); /* Outer loop over blocks to sample */ while (BlockSampler_HasMore(&bs)) @@ -1142,7 +1184,7 @@ acquire_sample_rows(Relation onerel, int elevel, * we've passed over so far, so when we fall off the end of * the relation we're done. */ - if (numrows < targrows) + if (numrows < target->rows_to_collect) rows[numrows++] = heap_copytuple(&targtuple); else { @@ -1153,7 +1195,8 @@ acquire_sample_rows(Relation onerel, int elevel, * t. */ if (rowstoskip < 0) - rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); + rowstoskip = reservoir_get_next_S(&rstate, samplerows, + target->rows_to_collect); if (rowstoskip <= 0) { @@ -1161,9 +1204,10 @@ acquire_sample_rows(Relation onerel, int elevel, * Found a suitable tuple, so save it, replacing one * old tuple at random */ - int k = (int) (targrows * sampler_random_fract(rstate.randstate)); + int k = (int) (target->rows_to_collect * + sampler_random_fract(rstate.randstate)); - Assert(k >= 0 && k < targrows); + Assert(k >= 0 && k < target->rows_to_collect); heap_freetuple(rows[k]); rows[k] = heap_copytuple(&targtuple); } @@ -1172,6 +1216,10 @@ acquire_sample_rows(Relation onerel, int elevel, } samplerows += 1; + + /* Report number of rows sampled so far */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_SAMPLED, + target->rows_collected + numrows); } } @@ -1187,7 +1235,7 @@ acquire_sample_rows(Relation onerel, int elevel, * (itempointer). It's not worth worrying about corner cases where the * tuples are already sorted. */ - if (numrows == targrows) + if (numrows == target->rows_to_collect) qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); /* @@ -1417,13 +1465,14 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, childtargrows = Min(childtargrows, targrows - numrows); if (childtargrows > 0) { + AcquireSampleRowsInfo target = {numrows, childtargrows}; int childrows; double trows, tdrows; /* Fetch a random sample of the child's rows */ childrows = (*acquirefunc) (childrel, elevel, - rows + numrows, childtargrows, + rows + numrows, (void *) &target, &trows, &tdrows); /* We may need to convert from child's rowtype to parent's */ @@ -1828,6 +1877,9 @@ compute_trivial_stats(VacAttrStatsP stats, vacuum_delay_point(); + /* Update the number of processed rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_PROCESSED, i+1); + value = fetchfunc(stats, i, &isnull); /* Check for null/nonnull */ @@ -1944,6 +1996,9 @@ compute_distinct_stats(VacAttrStatsP stats, vacuum_delay_point(); + /* Update the number of processed rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_PROCESSED, i+1); + value = fetchfunc(stats, i, &isnull); /* Check for null/nonnull */ @@ -2301,6 +2356,9 @@ compute_scalar_stats(VacAttrStatsP stats, vacuum_delay_point(); + /* Update the number of processed rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_PROCESSED, i+1); + value = fetchfunc(stats, i, &isnull); /* Check for null/nonnull */ diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 017435c..e9f85a6 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -18,7 +18,7 @@ #include "commands/vacuum.h" #include "tsearch/ts_type.h" #include "utils/builtins.h" - +#include "commands/progress.h" /* A hash key for lexemes */ typedef struct @@ -206,6 +206,10 @@ compute_tsvector_stats(VacAttrStats *stats, vacuum_delay_point(); + /* Update the number of processed rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_PROCESSED, + vector_no+1); + value = fetchfunc(stats, vector_no, &isnull); /* diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c index 85b7a43..89f4766 100644 --- a/src/backend/utils/adt/array_typanalyze.c +++ b/src/backend/utils/adt/array_typanalyze.c @@ -22,7 +22,7 @@ #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/typcache.h" - +#include "commands/progress.h" /* * To avoid consuming too much memory, IO and CPU load during analysis, and/or @@ -318,6 +318,10 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, vacuum_delay_point(); + /* Update the number of processed rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_PROCESSED, + array_no+1); + value = fetchfunc(stats, array_no, &isnull); if (isnull) { diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index a987d0d..eb1dd63 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -465,6 +465,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS) /* Translate command name into command type code. */ if (pg_strcasecmp(cmd, "VACUUM") == 0) cmdtype = PROGRESS_COMMAND_VACUUM; + else if (pg_strcasecmp(cmd, "ANALYZE") == 0) + cmdtype = PROGRESS_COMMAND_ANALYZE; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index a8d585c..a63581d 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -29,6 +29,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rangetypes.h" +#include "commands/progress.h" static int float8_qsort_cmp(const void *a1, const void *a2); static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg); @@ -129,6 +130,9 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, vacuum_delay_point(); + /* Update the number of processed rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_PROCESSED, range_no+1); + value = fetchfunc(stats, range_no, &isnull); if (isnull) { diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 9472ecc..acad833 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -34,4 +34,16 @@ #define PROGRESS_VACUUM_PHASE_TRUNCATE 5 #define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP 6 +/* Progress parameters for analyze */ +#define PROGRESS_ANALYZE_PHASE 0 +#define PROGRESS_ANALYZE_NUM_TARGET_SAMPLE_ROWS 1 +#define PROGRESS_ANALYZE_NUM_ROWS_SAMPLED 2 +#define PROGRESS_ANALYZE_NUM_ROWS_PROCESSED 3 + +/* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */ +#define PROGRESS_ANALYZE_PHASE_COLLECT_SAMPLE_ROWS 1 +#define PROGRESS_ANALYZE_PHASE_COLLECT_INH_SAMPLE_ROWS 2 +#define PROGRESS_ANALYZE_PHASE_COMPUTE_STATS 3 +#define PROGRESS_ANALYZE_PHASE_COMPUTE_INH_STATS 4 + #endif diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 6ca44f7..8349268 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -132,9 +132,10 @@ typedef void (*ExplainDirectModify_function) (ForeignScanState *node, struct ExplainState *es); typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, - double *totaldeadrows); + HeapTuple *rows, + void *target_info, + double *totalrows, + double *totaldeadrows); typedef bool (*AnalyzeForeignTable_function) (Relation relation, AcquireSampleRowsFunc *func, diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 2015625..3ceeac0 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -892,7 +892,8 @@ typedef enum typedef enum ProgressCommandType { PROGRESS_COMMAND_INVALID, - PROGRESS_COMMAND_VACUUM + PROGRESS_COMMAND_VACUUM, + PROGRESS_COMMAND_ANALYZE } ProgressCommandType; #define PGSTAT_NUM_PROGRESS_PARAM 10 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c4c8450..5b211f4 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1795,6 +1795,23 @@ pg_stat_database_conflicts| SELECT d.oid AS datid, pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin, pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock FROM pg_database d; +pg_stat_progress_analyze| SELECT s.pid, + s.datid, + d.datname, + s.relid, + CASE s.param1 + WHEN 0 THEN 'initializing'::text + WHEN 1 THEN 'collecting sample rows'::text + WHEN 2 THEN 'collecting inherited sample rows'::text + WHEN 3 THEN 'computing statistics'::text + WHEN 4 THEN 'computing inherited statistics'::text + ELSE NULL::text + END AS phase, + s.param2 AS num_target_sample_rows, + s.param3 AS num_rows_sampled, + s.param4 AS num_rows_processed + FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10) + LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_vacuum| SELECT s.pid, s.datid, d.datname,