diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 735b794..4af317f 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -157,7 +157,7 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, FileFdwPlanState *fdw_private, Cost *startup_cost, Cost *total_cost); static int file_acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows); @@ -1069,9 +1069,10 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, */ static int file_acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows) { + int targrows = *(int *) target_info; int numrows = 0; double rowstoskip = -1; /* -1 means not set yet */ ReservoirStateData rstate; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index e8cb2d0..215aa7a 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -391,7 +391,7 @@ static void process_query_params(ExprContext *econtext, List *param_exprs, const char **param_values); static int postgresAcquireSampleRowsFunc(Relation relation, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows); static void analyze_row_processor(PGresult *res, int row, @@ -3551,10 +3551,11 @@ postgresAnalyzeForeignTable(Relation relation, */ static int postgresAcquireSampleRowsFunc(Relation relation, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows) { + int targrows = *(int *) target_info; PgFdwAnalyzeState astate; ForeignTable *table; ForeignServer *server; diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index dcb2d33..f97e8bf 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -521,6 +521,13 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser VACUUM, showing current progress. See . + + + pg_stat_progress_analyzepg_stat_progress_analyze + One row for each backend running + ANALYZE, showing current progress. + See . + @@ -3095,9 +3102,8 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, PostgreSQL has the ability to report the progress of - certain commands during command execution. Currently, the only command - which supports progress reporting is VACUUM. This may be - expanded in the future. + certain commands during command execution. Currently, the supported progress + reporting commands are VACUUM and ANALYZE. @@ -3287,7 +3293,131 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, + + + + ANALYZE Progress Reporting + + + Whenever ANALYZE is running, the + pg_stat_progress_analyze view will contain + one row for each backend that is currently analyzing. + The tables below describe the information that will be reported and + provide information about how to interpret it. + + + + <structname>pg_stat_progress_analyze</structname> View + + + + Column + Type + Description + + + + + + pid + integer + Process ID of backend. + + + datid + oid + OID of the database to which this backend is connected. + + + datname + name + Name of the database to which this backend is connected. + + + relid + oid + OID of the table being analyzed. + + + phase + text + + Current processing phase of analyze. See . + + + + num_target_sample_rows + bigint + + Total number of sample rows of the relation. The sample it reads + is taken randomly. Its size depends on the + parameter value. + + + + num_rows_sampled + bigint + + Total number of rows sampled so far. If the table being analyzed + has one or more children, ANALYZE will gather sample + rows twice. Once on the rows of parent table only and second time + on the rows of parent table with all of its children. + + + + +
+ + + ANALYZE phases + + + + Phase + Description + + + + + initializing + + ANALYZE is preparing to collect sample rows. + + + + collecting sample rows + + ANALYZE is currently collecting the sample rows of + single relation. The sample it reads is taken randomly. Its size + depends on the + parameter value. + + + + collecting inherited sample rows + + ANALYZE is currently collecting inherited sample rows. + If the table being analyzed has one or more children, this phase will + collect sample rows of parent table and all associated child relation. + The sample it reads is taken randomly. Its size depends on + the parameter value. + + + + computing statistics + + On the collected sample ANALYZE is currently computing some + statistical informations, such as the percentage of NULL values, + the average width of a row, the number of distinct values etc. + It also stores the most common values and their frequencies. + The number of these values depends on the value of the + parameter. + + + + +
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index b6552da..30ffe62 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -872,6 +872,20 @@ CREATE VIEW pg_stat_progress_vacuum AS FROM pg_stat_get_progress_info('VACUUM') AS S LEFT JOIN pg_database D ON S.datid = D.oid; +CREATE VIEW pg_stat_progress_analyze AS + SELECT + S.pid AS pid, S.datid AS datid, D.datname AS datname, + S.relid AS relid, + CASE S.param1 WHEN 0 THEN 'initializing' + WHEN 1 THEN 'collecting sample rows' + WHEN 2 THEN 'collecting inherited sample rows' + WHEN 3 THEN 'computing statistics' + END AS phase, + S.param2 AS num_target_sample_rows, + S.param3 AS num_rows_sampled + FROM pg_stat_get_progress_info('ANALYZE') AS S + LEFT JOIN pg_database D ON S.datid = D.oid; + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index b91df98..7bcb519 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -31,6 +31,7 @@ #include "commands/dbcommands.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" +#include "commands/progress.h" #include "executor/executor.h" #include "foreign/fdwapi.h" #include "miscadmin.h" @@ -70,6 +71,12 @@ typedef struct AnlIndexData /* Default statistics target (GUC parameter) */ int default_statistics_target = 100; +typedef struct AcquireSampleRowsInfo +{ + int rows_collected; + int rows_to_collect; +} AcquireSampleRowsInfo; + /* A few variables that don't seem worth passing around as parameters */ static MemoryContext anl_context = NULL; static BufferAccessStrategy vac_strategy; @@ -86,7 +93,7 @@ static void compute_index_stats(Relation onerel, double totalrows, static VacAttrStats *examine_attribute(Relation onerel, int attnum, Node *index_expr); static int acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows); static int compare_rows(const void *a, const void *b); static int acquire_inherited_sample_rows(Relation onerel, int elevel, @@ -257,10 +264,16 @@ analyze_rel(Oid relid, RangeVar *relation, int options, MyPgXact->vacuumFlags |= PROC_IN_ANALYZE; LWLockRelease(ProcArrayLock); + /* Report that we are going to start analyzing onerel. */ + pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE, + RelationGetRelid(onerel)); + /* * Do the normal non-recursive ANALYZE. We can skip this for partitioned * tables, which don't contain any rows. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COLLECT_SAMPLE_ROWS); if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages, false, in_outer_xact, elevel); @@ -269,8 +282,15 @@ analyze_rel(Oid relid, RangeVar *relation, int options, * If there are child tables, do recursive ANALYZE. */ if (onerel->rd_rel->relhassubclass) + { + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COLLECT_INH_SAMPLE_ROWS); do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages, true, in_outer_xact, elevel); + } + + /* We're done analyzing. */ + pgstat_progress_end_command(); /* * Close source relation now, but keep lock so that no one deletes it @@ -487,15 +507,24 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, /* * Acquire the sample rows */ + + /* Report the number of target sample rows */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_TARGET_SAMPLE_ROWS, + targrows); + rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); if (inh) numrows = acquire_inherited_sample_rows(onerel, elevel, rows, targrows, &totalrows, &totaldeadrows); else + { + AcquireSampleRowsInfo target = {0, targrows}; + numrows = (*acquirefunc) (onerel, elevel, - rows, targrows, + rows, (void *) &target, &totalrows, &totaldeadrows); + } /* * Compute the statistics. Temporary results during the calculations for @@ -503,6 +532,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, * responsible to make sure that whatever they store into the VacAttrStats * structure is allocated in anl_context. */ + + /* Report that statistics will now be computed. */ + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COMPUTE_STATS); if (numrows > 0) { MemoryContext col_context, @@ -982,7 +1015,7 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr) */ static int acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, + HeapTuple *rows, void *target_info, double *totalrows, double *totaldeadrows) { int numrows = 0; /* # rows now in reservoir */ @@ -994,8 +1027,9 @@ acquire_sample_rows(Relation onerel, int elevel, TransactionId OldestXmin; BlockSamplerData bs; ReservoirStateData rstate; + AcquireSampleRowsInfo *target = (AcquireSampleRowsInfo *) target_info; - Assert(targrows > 0); + Assert(target->rows_to_collect > 0); totalblocks = RelationGetNumberOfBlocks(onerel); @@ -1003,9 +1037,9 @@ acquire_sample_rows(Relation onerel, int elevel, OldestXmin = GetOldestXmin(onerel, true); /* Prepare for sampling block numbers */ - BlockSampler_Init(&bs, totalblocks, targrows, random()); + BlockSampler_Init(&bs, totalblocks, target->rows_to_collect, random()); /* Prepare for sampling rows */ - reservoir_init_selection_state(&rstate, targrows); + reservoir_init_selection_state(&rstate, target->rows_to_collect); /* Outer loop over blocks to sample */ while (BlockSampler_HasMore(&bs)) @@ -1142,7 +1176,7 @@ acquire_sample_rows(Relation onerel, int elevel, * we've passed over so far, so when we fall off the end of * the relation we're done. */ - if (numrows < targrows) + if (numrows < target->rows_to_collect) rows[numrows++] = heap_copytuple(&targtuple); else { @@ -1153,7 +1187,8 @@ acquire_sample_rows(Relation onerel, int elevel, * t. */ if (rowstoskip < 0) - rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); + rowstoskip = reservoir_get_next_S(&rstate, samplerows, + target->rows_to_collect); if (rowstoskip <= 0) { @@ -1161,9 +1196,10 @@ acquire_sample_rows(Relation onerel, int elevel, * Found a suitable tuple, so save it, replacing one * old tuple at random */ - int k = (int) (targrows * sampler_random_fract(rstate.randstate)); + int k = (int) (target->rows_to_collect * + sampler_random_fract(rstate.randstate)); - Assert(k >= 0 && k < targrows); + Assert(k >= 0 && k < target->rows_to_collect); heap_freetuple(rows[k]); rows[k] = heap_copytuple(&targtuple); } @@ -1172,6 +1208,10 @@ acquire_sample_rows(Relation onerel, int elevel, } samplerows += 1; + + /* Report number of rows sampled so far */ + pgstat_progress_update_param(PROGRESS_ANALYZE_NUM_ROWS_SAMPLED, + target->rows_collected + numrows); } } @@ -1187,7 +1227,7 @@ acquire_sample_rows(Relation onerel, int elevel, * (itempointer). It's not worth worrying about corner cases where the * tuples are already sorted. */ - if (numrows == targrows) + if (numrows == target->rows_to_collect) qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); /* @@ -1417,13 +1457,14 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, childtargrows = Min(childtargrows, targrows - numrows); if (childtargrows > 0) { + AcquireSampleRowsInfo target = {numrows, childtargrows}; int childrows; double trows, tdrows; /* Fetch a random sample of the child's rows */ childrows = (*acquirefunc) (childrel, elevel, - rows + numrows, childtargrows, + rows + numrows, (void *) &target, &trows, &tdrows); /* We may need to convert from child's rowtype to parent's */ diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index a987d0d..eb1dd63 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -465,6 +465,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS) /* Translate command name into command type code. */ if (pg_strcasecmp(cmd, "VACUUM") == 0) cmdtype = PROGRESS_COMMAND_VACUUM; + else if (pg_strcasecmp(cmd, "ANALYZE") == 0) + cmdtype = PROGRESS_COMMAND_ANALYZE; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 9472ecc..6718c06 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -34,4 +34,14 @@ #define PROGRESS_VACUUM_PHASE_TRUNCATE 5 #define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP 6 +/* Progress parameters for analyze */ +#define PROGRESS_ANALYZE_PHASE 0 +#define PROGRESS_ANALYZE_NUM_TARGET_SAMPLE_ROWS 1 +#define PROGRESS_ANALYZE_NUM_ROWS_SAMPLED 2 + +/* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */ +#define PROGRESS_ANALYZE_PHASE_COLLECT_SAMPLE_ROWS 1 +#define PROGRESS_ANALYZE_PHASE_COLLECT_INH_SAMPLE_ROWS 2 +#define PROGRESS_ANALYZE_PHASE_COMPUTE_STATS 3 + #endif diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 6ca44f7..8349268 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -132,9 +132,10 @@ typedef void (*ExplainDirectModify_function) (ForeignScanState *node, struct ExplainState *es); typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, - double *totaldeadrows); + HeapTuple *rows, + void *target_info, + double *totalrows, + double *totaldeadrows); typedef bool (*AnalyzeForeignTable_function) (Relation relation, AcquireSampleRowsFunc *func, diff --git a/src/include/pgstat.h b/src/include/pgstat.h index f2daf32..7dc3392 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -890,7 +890,8 @@ typedef enum typedef enum ProgressCommandType { PROGRESS_COMMAND_INVALID, - PROGRESS_COMMAND_VACUUM + PROGRESS_COMMAND_VACUUM, + PROGRESS_COMMAND_ANALYZE } ProgressCommandType; #define PGSTAT_NUM_PROGRESS_PARAM 10 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index bd13ae6..7ab2832 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1795,6 +1795,21 @@ pg_stat_database_conflicts| SELECT d.oid AS datid, pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin, pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock FROM pg_database d; +pg_stat_progress_analyze| SELECT s.pid, + s.datid, + d.datname, + s.relid, + CASE s.param1 + WHEN 0 THEN 'initializing'::text + WHEN 1 THEN 'collecting sample rows'::text + WHEN 2 THEN 'collecting inherited sample rows'::text + WHEN 3 THEN 'computing statistics'::text + ELSE NULL::text + END AS phase, + s.param2 AS num_target_sample_rows, + s.param3 AS num_rows_sampled + FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10) + LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_vacuum| SELECT s.pid, s.datid, d.datname,