diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 8cd3d690..ba923a2b 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7388,6 +7388,27 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + track_wal_io_timing (boolean) + + track_wal_io_timing configuration parameter + + + + + Enables timing of WAL I/O calls. This parameter is off by default, + because it will repeatedly query the operating system for + the current time, which may cause significant overhead on some + platforms. You can use the tool to + measure the overhead of timing on your system. + I/O timing information is + displayed in + pg_stat_wal. Only superusers can + change this setting. + + + + track_functions (enum) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 52a69a53..ce4f652d 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3479,7 +3479,51 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i wal_buffers_full bigint - Number of times WAL data was written to the disk because WAL buffers got full + Total number of times WAL data was written to the disk because WAL buffers got full + + + + + + wal_write bigint + + + Total number of times WAL data was written to the disk + + + + + + wal_write_time double precision + + + Total amount of time that has been spent in the portion of + WAL data was written to disk, in milliseconds + (if is enabled, otherwise zero). + To avoid standby server's performance degradation, they don't collect + this statistics + + + + + + wal_sync bigint + + + Total number of times WAL data was synced to the disk + + + + + + wal_sync_time double precision + + + Total amount of time that has been spent in the portion of + WAL data was synced to disk, in milliseconds + (if is enabled, otherwise zero). + To avoid standby server's performance degradation, they don't collect + this statistics diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7e81ce4f..18d6ecc5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -109,6 +109,7 @@ int CommitDelay = 0; /* precommit delay in microseconds */ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ int wal_retrieve_retry_interval = 5000; int max_slot_wal_keep_size_mb = -1; +bool track_wal_io_timing = false; #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -970,6 +971,8 @@ static void WALInsertLockAcquireExclusive(void); static void WALInsertLockRelease(void); static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); +static bool fsyncMethodCalled(); + /* * Insert an XLOG record represented by an already-constructed chain of data * chunks. This is a low-level routine; to construct the WAL record header @@ -2528,6 +2531,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) Size nbytes; Size nleft; int written; + instr_time start; + instr_time duration; /* OK to write the page(s) */ from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; @@ -2536,9 +2541,24 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) do { errno = 0; + + /* Measure i/o timing to write WAL data */ + if (track_wal_io_timing) + INSTR_TIME_SET_CURRENT(start); + pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); written = pg_pwrite(openLogFile, from, nleft, startoffset); pgstat_report_wait_end(); + + if (track_wal_io_timing) + { + INSTR_TIME_SET_CURRENT(duration); + INSTR_TIME_SUBTRACT(duration, start); + WalStats.m_wal_write_time += INSTR_TIME_GET_MILLISEC(duration); + } + + WalStats.m_wal_write++; + if (written <= 0) { char xlogfname[MAXFNAMELEN]; @@ -10433,6 +10453,27 @@ assign_xlog_sync_method(int new_sync_method, void *extra) } } +/* + * Check if fsync mothod is called. + */ +static bool +fsyncMethodCalled() +{ + if (!enableFsync) + return false; + + switch (sync_method) + { + case SYNC_METHOD_FSYNC: + case SYNC_METHOD_FSYNC_WRITETHROUGH: + case SYNC_METHOD_FDATASYNC: + return true; + default: + /* others don't have a specific fsync method */ + return false; + } +} + /* * Issue appropriate kind of fsync (if any) for an XLOG output file. @@ -10443,8 +10484,19 @@ assign_xlog_sync_method(int new_sync_method, void *extra) void issue_xlog_fsync(int fd, XLogSegNo segno) { + instr_time start; + instr_time duration; char *msg = NULL; + /* + * Measure i/o timing to fsync WAL data. + * + * The wal receiver skip to collect it to avoid performance degradation of standy servers. + * If sync_method doesn't have its fsync method, to skip too. + */ + if (!AmWalReceiverProcess() && track_wal_io_timing && fsyncMethodCalled()) + INSTR_TIME_SET_CURRENT(start); + pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC); switch (sync_method) { @@ -10488,6 +10540,19 @@ issue_xlog_fsync(int fd, XLogSegNo segno) } pgstat_report_wait_end(); + + /* increment the i/o timing and the number of times to fsync WAL data */ + if (fsyncMethodCalled()) + { + if (!AmWalReceiverProcess() && track_wal_io_timing) + { + INSTR_TIME_SET_CURRENT(duration); + INSTR_TIME_SUBTRACT(duration, start); + WalStats.m_wal_sync_time += INSTR_TIME_GET_MILLISEC(duration); + } + + WalStats.m_wal_sync++; + } } /* diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index b140c210..da4e8139 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -997,6 +997,10 @@ CREATE VIEW pg_stat_wal AS w.wal_fpi, w.wal_bytes, w.wal_buffers_full, + w.wal_write, + w.wal_write_time, + w.wal_sync, + w.wal_sync_time, w.stats_reset FROM pg_stat_get_wal() w; diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 7c75a25d..4bf83e4c 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -6801,6 +6801,10 @@ pgstat_recv_wal(PgStat_MsgWal *msg, int len) walStats.wal_fpi += msg->m_wal_fpi; walStats.wal_bytes += msg->m_wal_bytes; walStats.wal_buffers_full += msg->m_wal_buffers_full; + walStats.wal_write += msg->m_wal_write; + walStats.wal_write_time += msg->m_wal_write_time; + walStats.wal_sync += msg->m_wal_sync; + walStats.wal_sync_time += msg->m_wal_sync_time; } /* ---------- diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index a52832fe..ce9f4b7c 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -253,6 +253,9 @@ WalWriterMain(void) else if (left_till_hibernate > 0) left_till_hibernate--; + /* Send WAL statistics */ + pgstat_send_wal(); + /* * Sleep until we are signaled or WalWriterDelay has elapsed. If we * haven't done anything useful for quite some time, lengthen the diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 6afe1b6f..711a30ab 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1703,7 +1703,7 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) Datum pg_stat_get_wal(PG_FUNCTION_ARGS) { -#define PG_STAT_GET_WAL_COLS 5 +#define PG_STAT_GET_WAL_COLS 9 TupleDesc tupdesc; Datum values[PG_STAT_GET_WAL_COLS]; bool nulls[PG_STAT_GET_WAL_COLS]; @@ -1724,7 +1724,15 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) NUMERICOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "wal_buffers_full", INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 5, "stats_reset", + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "wal_write", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "wal_write_time", + FLOAT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "wal_sync", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "wal_sync_time", + FLOAT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "stats_reset", TIMESTAMPTZOID, -1, 0); BlessTupleDesc(tupdesc); @@ -1744,7 +1752,17 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) Int32GetDatum(-1)); values[3] = Int64GetDatum(wal_stats->wal_buffers_full); - values[4] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); + values[4] = Int64GetDatum(wal_stats->wal_write); + + /* time is already in msec, just convert to double for presentation */ + values[5] = Float8GetDatum((double) wal_stats->wal_write_time); + + values[6] = Int64GetDatum(wal_stats->wal_sync); + + /* time is already in msec, just convert to double for presentation */ + values[7] = Float8GetDatum((double) wal_stats->wal_sync_time); + + values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 635d91d5..fc24745a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1482,6 +1482,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"track_wal_io_timing", PGC_SUSET, STATS_COLLECTOR, + gettext_noop("Collects timing statistics for WAL I/O activity."), + NULL + }, + &track_wal_io_timing, + false, + NULL, NULL, NULL + }, { {"update_process_title", PGC_SUSET, PROCESS_TITLE, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 9c9091e6..64da738b 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -583,6 +583,7 @@ #track_activities = on #track_counts = on #track_io_timing = off +#track_wal_io_timing = off #track_functions = none # none, pl, all #track_activity_query_size = 1024 # (change requires restart) #stats_temp_directory = 'pg_stat_tmp' diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 221af87e..ce695708 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -131,6 +131,7 @@ extern int recovery_min_apply_delay; extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; +extern bool track_wal_io_timing; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index fc2202b8..91673385 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5497,13 +5497,13 @@ proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' }, -{ oid => '1136', descr => 'statistics: information about WAL activity', - proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', - proparallel => 'r', prorettype => 'record', proargtypes => '', - proallargtypes => '{int8,int8,numeric,int8,timestamptz}', - proargmodes => '{o,o,o,o,o}', - proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,stats_reset}', - prosrc => 'pg_stat_get_wal' }, + { oid => '1136', descr => 'statistics: information about WAL activity', + proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', + proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{int8,int8,numeric,int8,int8,float8,int8,float8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o}', + proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_write_time,wal_sync,wal_sync_time,stats_reset}', + prosrc => 'pg_stat_get_wal' }, { oid => '2306', descr => 'statistics: information about SLRU caches', proname => 'pg_stat_get_slru', prorows => '100', proisstrict => 'f', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5954068d..a63619bc 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -463,6 +463,10 @@ typedef struct PgStat_MsgWal PgStat_Counter m_wal_fpi; uint64 m_wal_bytes; PgStat_Counter m_wal_buffers_full; + PgStat_Counter m_wal_write; + PgStat_Counter m_wal_write_time; /* accumulate times in milliseconds */ + PgStat_Counter m_wal_sync; + PgStat_Counter m_wal_sync_time; /* accumulate times in milliseconds */ } PgStat_MsgWal; /* ---------- @@ -805,6 +809,10 @@ typedef struct PgStat_WalStats PgStat_Counter wal_fpi; uint64 wal_bytes; PgStat_Counter wal_buffers_full; + PgStat_Counter wal_write; + PgStat_Counter wal_write_time; + PgStat_Counter wal_sync; + PgStat_Counter wal_sync_time; TimestampTz stat_reset_timestamp; } PgStat_WalStats; diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 6293ab57..b3bf1216 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2142,8 +2142,12 @@ pg_stat_wal| SELECT w.wal_records, w.wal_fpi, w.wal_bytes, w.wal_buffers_full, + w.wal_write, + w.wal_write_time, + w.wal_sync, + w.wal_sync_time, w.stats_reset - FROM pg_stat_get_wal() w(wal_records, wal_fpi, wal_bytes, wal_buffers_full, stats_reset); + FROM pg_stat_get_wal() w(wal_records, wal_fpi, wal_bytes, wal_buffers_full, wal_write, wal_write_time, wal_sync, wal_sync_time, stats_reset); pg_stat_wal_receiver| SELECT s.pid, s.status, s.receive_start_lsn,