From 9c67eb101bbbdebf28b8bbb8a8a186f0b1c69669 Mon Sep 17 00:00:00 2001 From: Vignesh C Date: Wed, 27 Jan 2021 18:20:13 +0530 Subject: [PATCH v6] Print backtrace of specified postgres process. The idea here is to implement & expose pg_print_backtrace function, internally what this function does is, the connected backend will send SIGUSR1 signal by setting PROCSIG_PRINT_BACKTRACE to postgres backend whose pid matches the specified process id. Once the backend process receives this signal it will print the backtrace of the process to the log file based on the logging configuration, if logging is disabled backtrace will be printed to the console where postmaster was started. --- doc/src/sgml/func.sgml | 77 ++++++++++++ src/backend/postmaster/autovacuum.c | 7 ++ src/backend/postmaster/checkpointer.c | 8 ++ src/backend/postmaster/interrupt.c | 8 ++ src/backend/storage/ipc/procsignal.c | 18 +++ src/backend/storage/ipc/signalfuncs.c | 134 +++++++++++++++++---- src/backend/tcop/postgres.c | 7 ++ src/backend/utils/error/elog.c | 20 ++- src/backend/utils/init/globals.c | 1 + src/include/catalog/pg_proc.dat | 5 + src/include/miscadmin.h | 2 + src/include/storage/procsignal.h | 2 + src/include/utils/elog.h | 2 + .../test_misc/t/002_print_backtrace_validation.pl | 73 +++++++++++ 14 files changed, 334 insertions(+), 30 deletions(-) create mode 100644 src/test/modules/test_misc/t/002_print_backtrace_validation.pl diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index f30eaa3..750a0ba 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -24739,6 +24739,33 @@ SELECT collation for ('foo' COLLATE "de_DE"); however only superusers can terminate superuser backends. + + + + + pg_print_backtrace + + pg_print_backtrace ( pid integer ) + boolean + + + Prints the backtrace whose backend process has the specified process ID. + Backtrace will be printed based on the log configuration set. See + for more information. This + will help in identifying where exactly the backend process is currently + executing. This information will be useful in reporting the hangs and + also helps the developers in diagonising the problems. This is + allowed if the calling role is a member of the role whose backend + backtrace is being printed or the calling role has been granted + pg_signal_backend, however only superusers can + print backtrace of superuser backends. This feature is not supported + for postmaster, logging and statistics process. This feature will be + available, if the installer was generated on a platform which had + backtrace capturing capability. If not available it will return false + by throwing the following warning "WARNING: backtrace generation is + not supported by this installation". + + @@ -24758,6 +24785,56 @@ SELECT collation for ('foo' COLLATE "de_DE"); pg_stat_activity view. + + pg_print_backtrace can be used to print backtrace of + a backend process. For example: + +postgres=# select pg_print_backtrace(pg_backend_pid()); + pg_print_backtrace +-------------------- + t +(1 row) + +The backtrace will be logged to the log file if logging is enabled, if logging +is disabled backtrace will be logged to the console where the postmaster was +started. For example: +2021-01-27 11:33:50.247 IST [111735] LOG: current backtrace: + postgres: postgresdba postgres [local] SELECT(set_backtrace+0x38) [0xae06c5] + postgres: postgresdba postgres [local] SELECT(ProcessInterrupts+0x788) [0x950c34] + postgres: postgresdba postgres [local] SELECT() [0x761e89] + postgres: postgresdba postgres [local] SELECT() [0x71bbda] + postgres: postgresdba postgres [local] SELECT() [0x71e380] + postgres: postgresdba postgres [local] SELECT(standard_ExecutorRun+0x1d6) [0x71c1fe] + postgres: postgresdba postgres [local] SELECT(ExecutorRun+0x55) [0x71c026] + postgres: postgresdba postgres [local] SELECT() [0x953fc5] + postgres: postgresdba postgres [local] SELECT(PortalRun+0x262) [0x953c7e] + postgres: postgresdba postgres [local] SELECT() [0x94db78] + postgres: postgresdba postgres [local] SELECT(PostgresMain+0x7d7) [0x951e72] + postgres: postgresdba postgres [local] SELECT() [0x896b2f] + postgres: postgresdba postgres [local] SELECT() [0x8964b5] + postgres: postgresdba postgres [local] SELECT() [0x892a79] + postgres: postgresdba postgres [local] SELECT(PostmasterMain+0x116b) [0x892350] + postgres: postgresdba postgres [local] SELECT() [0x795f72] + /lib64/libc.so.6(__libc_start_main+0xf5) [0x7f2107bbd505] + postgres: postgresdba postgres [local] SELECT() [0x4842a9] + + + You can get the file name and line number by using gdb/addr2line in + linux platforms, as a prerequisite users must ensure gdb/addr2line is + already installed: + +1) "info line *address" from gdb on postgres executable. For example: +gdb ./postgres +(gdb) info line *0x71c25d +Line 378 of "execMain.c" starts at address 0x71c25d <standard_ExecutorRun+470> and ends at 0x71c263 <standard_ExecutorRun+476>. +OR +2) Using "addr2line -e postgres address", For example: +addr2line -e ./postgres 0x71c25d +/home/postgresdba/src/backend/executor/execMain.c:378 + + + + diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 47e60ca..3f72929 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -836,6 +836,13 @@ HandleAutoVacLauncherInterrupts(void) if (ProcSignalBarrierPending) ProcessProcSignalBarrier(); + /* Process printing backtrace */ + if (PrintBacktracePending) + { + PrintBacktracePending = false; + set_backtrace(NULL, 0); + } + /* Process sinval catchup interrupts that happened while sleeping */ ProcessCatchupInterrupt(); } diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 54a818b..5fae328 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -57,6 +57,7 @@ #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" +#include "tcop/tcopprot.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner.h" @@ -547,6 +548,13 @@ HandleCheckpointerInterrupts(void) if (ProcSignalBarrierPending) ProcessProcSignalBarrier(); + /* Process printing backtrace */ + if (PrintBacktracePending) + { + PrintBacktracePending = false; + set_backtrace(NULL, 0); + } + if (ConfigReloadPending) { ConfigReloadPending = false; diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index dd9136a..59aff6c 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -21,6 +21,7 @@ #include "storage/ipc.h" #include "storage/latch.h" #include "storage/procsignal.h" +#include "tcop/tcopprot.h" #include "utils/guc.h" volatile sig_atomic_t ConfigReloadPending = false; @@ -41,6 +42,13 @@ HandleMainLoopInterrupts(void) ProcessConfigFile(PGC_SIGHUP); } + /* Process printing backtrace */ + if (PrintBacktracePending) + { + PrintBacktracePending = false; + set_backtrace(NULL, 0); + } + if (ShutdownRequestPending) proc_exit(0); } diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index c43cdd6..f2f0fe7 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -453,6 +453,21 @@ HandleProcSignalBarrierInterrupt(void) } /* + * Handle receipt of an interrupt indicating a print backtrace. + * + * Note: this is called within a signal handler! All we can do is set + * a flag that will cause the next CHECK_FOR_INTERRUPTS to invoke + * set_backtrace function which will log the backtrace. + */ +static void +HandlePrintBacktraceInterrupt(void) +{ + InterruptPending = true; + PrintBacktracePending = true; + /* latch will be set by procsignal_sigusr1_handler */ +} + +/* * Perform global barrier related interrupt checking. * * Any backend that participates in ProcSignal signaling must arrange to @@ -686,6 +701,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + if (CheckProcSignal(PROCSIG_PRINT_BACKTRACE)) + HandlePrintBacktraceInterrupt(); + SetLatch(MyLatch); latch_sigusr1_handler(); diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c index 69fe23a..c29f765 100644 --- a/src/backend/storage/ipc/signalfuncs.c +++ b/src/backend/storage/ipc/signalfuncs.c @@ -22,6 +22,7 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -40,12 +41,22 @@ * be emitted. For permission errors, doing that is the responsibility of * the caller. */ -#define SIGNAL_BACKEND_SUCCESS 0 -#define SIGNAL_BACKEND_ERROR 1 -#define SIGNAL_BACKEND_NOPERMISSION 2 -#define SIGNAL_BACKEND_NOSUPERUSER 3 -static int -pg_signal_backend(int pid, int sig) +typedef enum SignalBackendStatus +{ + SIGNAL_BACKEND_SUCCESS = 0, + SIGNAL_BACKEND_ERROR, + SIGNAL_BACKEND_NOPERMISSION, + SIGNAL_BACKEND_NOSUPERUSER +} SignalBackendStatus; + +/* + * check_privilege_to_send_singal + * + * Check if the specified pid is a valid backend process and the user has + * permissions to signal to the process. + */ +static SignalBackendStatus +check_privilege_to_send_singal(int pid) { PGPROC *proc = BackendPidGetProc(pid); @@ -77,6 +88,17 @@ pg_signal_backend(int pid, int sig) !has_privs_of_role(GetUserId(), DEFAULT_ROLE_SIGNAL_BACKENDID)) return SIGNAL_BACKEND_NOPERMISSION; + return SIGNAL_BACKEND_SUCCESS; +} + +static SignalBackendStatus +pg_signal_backend(int pid, int sig) +{ + SignalBackendStatus ret = check_privilege_to_send_singal(pid); + + if (ret != SIGNAL_BACKEND_SUCCESS) + return ret; + /* * Can the process we just validated above end, followed by the pid being * recycled for a new process, before reaching here? Then we'd be trying @@ -110,19 +132,23 @@ pg_signal_backend(int pid, int sig) Datum pg_cancel_backend(PG_FUNCTION_ARGS) { - int r = pg_signal_backend(PG_GETARG_INT32(0), SIGINT); + SignalBackendStatus r = pg_signal_backend(PG_GETARG_INT32(0), SIGINT); - if (r == SIGNAL_BACKEND_NOSUPERUSER) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be a superuser to cancel superuser query"))); + switch (r) + { + case SIGNAL_BACKEND_NOSUPERUSER: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a superuser to cancel superuser query"))); - if (r == SIGNAL_BACKEND_NOPERMISSION) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be a member of the role whose query is being canceled or member of pg_signal_backend"))); + case SIGNAL_BACKEND_NOPERMISSION: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a member of the role whose query is being canceled or member of pg_signal_backend"))); - PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS); + default: + PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS); + } } /* @@ -134,19 +160,23 @@ pg_cancel_backend(PG_FUNCTION_ARGS) Datum pg_terminate_backend(PG_FUNCTION_ARGS) { - int r = pg_signal_backend(PG_GETARG_INT32(0), SIGTERM); + SignalBackendStatus r = pg_signal_backend(PG_GETARG_INT32(0), SIGTERM); - if (r == SIGNAL_BACKEND_NOSUPERUSER) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be a superuser to terminate superuser process"))); + switch (r) + { + case SIGNAL_BACKEND_NOSUPERUSER: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a superuser to terminate superuser process"))); - if (r == SIGNAL_BACKEND_NOPERMISSION) - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend"))); + case SIGNAL_BACKEND_NOPERMISSION: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend"))); - PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS); + default: + PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS); + } } /* @@ -215,3 +245,55 @@ pg_rotate_logfile_v2(PG_FUNCTION_ARGS) SendPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE); PG_RETURN_BOOL(true); } + +/* + * pg_print_backtrace - print backtrace of backend process. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_print_backtrace(PG_FUNCTION_ARGS) +{ +#ifdef HAVE_BACKTRACE_SYMBOLS + int pid = PG_GETARG_INT32(0); + SignalBackendStatus r = check_privilege_to_send_singal(pid); + + switch (r) + { + case SIGNAL_BACKEND_NOSUPERUSER: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a superuser to print backtrace of superuser process"))); + + case SIGNAL_BACKEND_NOPERMISSION: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be a member of the role whose backtrace is being logged or member of pg_signal_backend"))); + + case SIGNAL_BACKEND_SUCCESS: + + /* + * Send SIGUSR1 to postgres backend whose pid matches pid by + * setting PROCSIG_PRINT_BACKTRACE, the backend process will print + * the backtrace once the signal is received. + */ + if (!SendProcSignal(pid, PROCSIG_PRINT_BACKTRACE, InvalidBackendId)) + PG_RETURN_BOOL(true); + else + { + ereport(WARNING, + (errmsg("failed to send signal to postmaster: %m"))); + break; /* Return false */ + } + + default: + break; /* Not a supported process, return false */ + } +#else + ereport(WARNING, + (errmsg("backtrace generation is not supported by this installation"))); +#endif + + PG_RETURN_BOOL(false); +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index cb5a961..8f2c4f7 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3274,6 +3274,13 @@ ProcessInterrupts(void) if (ParallelMessagePending) HandleParallelMessages(); + + /* Process printing backtrace */ + if (PrintBacktracePending) + { + PrintBacktracePending = false; + set_backtrace(NULL, 0); + } } diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 80c2672..a98a048 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -172,7 +172,6 @@ static char formatted_log_time[FORMATTED_TS_LEN]; static const char *err_gettext(const char *str) pg_attribute_format_arg(1); -static pg_noinline void set_backtrace(ErrorData *edata, int num_skip); static void set_errdata_field(MemoryContextData *cxt, char **ptr, const char *str); static void write_console(const char *line, int len); static void setup_formatted_log_time(void); @@ -955,9 +954,10 @@ errbacktrace(void) * Compute backtrace data and add it to the supplied ErrorData. num_skip * specifies how many inner frames to skip. Use this to avoid showing the * internal backtrace support functions in the backtrace. This requires that - * this and related functions are not inlined. + * this and related functions are not inlined. If edata pointer is valid + * backtrace information will set in edata. */ -static void +void set_backtrace(ErrorData *edata, int num_skip) { StringInfoData errtrace; @@ -984,7 +984,19 @@ set_backtrace(ErrorData *edata, int num_skip) "backtrace generation is not supported by this installation"); #endif - edata->backtrace = errtrace.data; + if (edata) + edata->backtrace = errtrace.data; + else + { + /* + * LOG_SERVER_ONLY is used intentionally to make sure this information + * is not sent to client based on client_min_messages. We don't want + * to mess up a different session as pg_print_backtrace will be + * sending SIGNAL to a different backend. + */ + elog(LOG_SERVER_ONLY, "current backtrace:%s", errtrace.data); + pfree(errtrace.data); + } } /* diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index a5976ad..e2b5ad5 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -37,6 +37,7 @@ volatile sig_atomic_t ProcSignalBarrierPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; volatile uint32 CritSectionCount = 0; +volatile sig_atomic_t PrintBacktracePending = false; int MyProcPid; pg_time_t MyStartTime; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index f817406..9668786 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11379,4 +11379,9 @@ proname => 'is_normalized', prorettype => 'bool', proargtypes => 'text text', prosrc => 'unicode_is_normalized' }, +# function to get the backtrace of server process +{ oid => '6105', descr => 'print backtrace of process', + proname => 'pg_print_backtrace', provolatile => 'v', prorettype => 'bool', + proargtypes => 'int4', prosrc => 'pg_print_backtrace' }, + ] diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 1bdc97e..0d00ff9 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -92,6 +92,8 @@ extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount; extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount; extern PGDLLIMPORT volatile uint32 CritSectionCount; +extern PGDLLIMPORT volatile sig_atomic_t PrintBacktracePending; + /* in tcop/postgres.c */ extern void ProcessInterrupts(void); diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 4ae7dc3..1c7532c 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -43,6 +43,8 @@ typedef enum PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + PROCSIG_PRINT_BACKTRACE, /* ask backend to print the current backtrace */ + NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index 3c0e576..5fa108b 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -450,4 +450,6 @@ extern void set_syslog_parameters(const char *ident, int facility); */ extern void write_stderr(const char *fmt,...) pg_attribute_printf(1, 2); +extern pg_noinline void set_backtrace(ErrorData *edata, int num_skip); + #endif /* ELOG_H */ diff --git a/src/test/modules/test_misc/t/002_print_backtrace_validation.pl b/src/test/modules/test_misc/t/002_print_backtrace_validation.pl new file mode 100644 index 0000000..00b2cae --- /dev/null +++ b/src/test/modules/test_misc/t/002_print_backtrace_validation.pl @@ -0,0 +1,73 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 2; +use Time::HiRes qw(usleep); + +# Set up node with logging collector +my $node = get_new_node('primary'); +$node->init(); +$node->append_conf( + 'postgresql.conf', qq( +logging_collector = on +lc_messages = 'C' +)); + +$node->start(); + +# Verify that log output gets to the file +$node->psql('postgres', 'select pg_print_backtrace(pg_backend_pid())'); + +# might need to retry if logging collector process is slow... +my $max_attempts = 180 * 10; + +my $current_logfiles; +for (my $attempts = 0; $attempts < $max_attempts; $attempts++) +{ + eval { + $current_logfiles = slurp_file($node->data_dir . '/current_logfiles'); + }; + last unless $@; + usleep(100_000); +} +die $@ if $@; + +note "current_logfiles = $current_logfiles"; + +like( + $current_logfiles, + qr|^stderr log/postgresql-.*log$|, + 'current_logfiles is sane'); + +my $lfname = $current_logfiles; +$lfname =~ s/^stderr //; +chomp $lfname; + +my $first_logfile; +my $bt_occurence_count; + +# Verify that the backtraces of the processes are logged into logfile. +for (my $attempts = 0; $attempts < $max_attempts; $attempts++) +{ + $first_logfile = $node->data_dir . '/' . $lfname; + chomp $first_logfile; + print "file is $first_logfile"; + open my $fh, '<', $first_logfile + or die "Could not open '$first_logfile' $!"; + while (my $line = <$fh>) + { + chomp $line; + if ($line =~ m/current backtrace/) + { + $bt_occurence_count++; + } + } + last if $bt_occurence_count == 1; + usleep(100_000); +} + +is($bt_occurence_count, 1, 'found expected backtrace in the log file'); + +$node->stop(); -- 1.8.3.1