From b48849d091302dfdac4fc004195b3c532fdb9dcb Mon Sep 17 00:00:00 2001 From: Vignesh C Date: Sat, 25 Jul 2020 06:38:40 +0530 Subject: [PATCH v3] Fix for Parallel worker hangs while handling errors. Worker is not able to receive the signals while processing error flow. Worker hangs in this case because when the worker is started the signals will be masked using sigprocmask. Unblocking of signals is done by calling BackgroundWorkerUnblockSignals in ParallelWorkerMain. Now due to error handling the worker has jumped to setjmp in StartBackgroundWorker function. Here the signals are in blocked state, hence the signal is not received by the worker process. Authors: Vignesh C, Bharath Rupireddy --- src/backend/postmaster/bgworker.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index beb5e85..0b9f214 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -671,6 +671,23 @@ bgworker_sigusr1_handler(SIGNAL_ARGS) } /* + * update_parallel_worker_sigmask - add or remove a signal from sigmask. + */ +static void +update_parallel_worker_sigmask(BackgroundWorker *worker, int signum, + bool isadd) +{ + if ((worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) + { + if (isadd) + sigaddset(&BlockSig, signum); + else + sigdelset(&BlockSig, signum); + PG_SETMASK(&BlockSig); + } +} + +/* * Start a new background worker * * This is the main entry point for background worker, to be called from @@ -747,6 +764,16 @@ StartBackgroundWorker(void) */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { + /* + * In case of parallel workers, unblock SIGUSR1 signal, it was blocked + * when the postmaster forked us. Leader process will send SIGUSR1 signal + * to the worker process(worker process will be in waiting state as + * there is no space available) to indicate shared memory space is freed + * up. Once the signal is received worker process will start populating + * the error message further. + */ + update_parallel_worker_sigmask(worker, SIGUSR1, false); + /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; @@ -757,6 +784,14 @@ StartBackgroundWorker(void) EmitErrorReport(); /* + * Undo the unblocking of SIGUSR1 which was done above, as to + * not cause any further issues from unblocking SIGUSR1 during + * the execution of callbacks and other processing that will be + * done during proc_exit(). + */ + update_parallel_worker_sigmask(worker, SIGUSR1, true); + + /* * Do we need more cleanup here? For shmem-connected bgworkers, we * will call InitProcess below, which will install ProcKill as exit * callback. That will take care of releasing locks, etc. -- 1.8.3.1