From b2a58ae322922aa6266bdf2ff2f7eb64f4f6e9e8 Mon Sep 17 00:00:00 2001 From: Alexander Kukushkin Date: Wed, 21 Jan 2026 14:17:36 +0100 Subject: [PATCH] Fix pg_cron fast shutdown hang with sync replication Fast shutdown may hang indefinitely when `synchronous_standby_names` requirement cannot be satisfied due to an insufficient number of synchronous replicas. In this situation, pg_cron can block waiting for a synchronous replication acknowledgment. Example: ``` postgres -D testdb --shared_preload_libraries=pg_cron --synchronous_standby_names=foobar \_ postgres: io worker 0 \_ postgres: io worker 1 \_ postgres: io worker 2 \_ postgres: checkpointer \_ postgres: pg_cron launcher waiting for 0/A2DDC88 ``` gdb: ``` (gdb) bt #0 0x00007f7b2a5b3e5a in epoll_wait (epfd=5, events=0x56096e95dc08, maxevents=1, timeout=timeout@entry=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30 #1 0x0000560942c65aa6 in WaitEventSetWaitBlock (set=set@entry=0x56096e95dba0, cur_timeout=cur_timeout@entry=-1, occurred_events=occurred_events@entry=0x7fff16aa23d0, nevents=nevents@entry=1) at waiteventset.c:1191 #2 0x0000560942c664b5 in WaitEventSetWait (set=0x56096e95dba0, timeout=timeout@entry=-1, occurred_events=occurred_events@entry=0x7fff16aa23d0, nevents=nevents@entry=1, wait_event_info=wait_event_info@entry=134217780) at waiteventset.c:1139 #3 0x0000560942c5884a in WaitLatch (latch=, wakeEvents=wakeEvents@entry=17, timeout=timeout@entry=-1, wait_event_info=wait_event_info@entry=134217780) at latch.c:196 #4 0x0000560942c0f6c4 in SyncRepWaitForLSN (lsn=170777736, commit=commit@entry=true) at syncrep.c:388 #5 0x00005609428d87cd in RecordTransactionCommit () at xact.c:1557 #6 0x00005609428d88f2 in CommitTransaction () at xact.c:2365 #7 0x00005609428d9831 in CommitTransactionCommandInternal () at xact.c:3202 #8 0x00005609428d9bbb in CommitTransactionCommand () at xact.c:3163 #9 0x00007f7b2b4b3b19 in MarkPendingRunsAsFailed () at src/job_metadata.c:1456 #10 0x00007f7b2b4b66a4 in PgCronLauncherMain (arg=) at src/pg_cron.c:588 #11 0x0000560942bc1798 in BackgroundWorkerMain (startup_data=, startup_data_len=) at bgworker.c:879 #12 0x0000560942bc3a4b in postmaster_child_launch (child_type=child_type@entry=B_BG_WORKER, child_slot=238, startup_data=startup_data@entry=0x56096e9f67b0, startup_data_len=startup_data_len@entry=1472, client_sock=client_sock@entry=0x0) at launch_backend.c:290 #13 0x0000560942bc5bf2 in StartBackgroundWorker (rw=rw@entry=0x56096e9f67b0) at postmaster.c:4164 #14 0x0000560942bc5e43 in maybe_start_bgworkers () at postmaster.c:4330 #15 0x0000560942bc6be3 in LaunchMissingBackgroundProcesses () at postmaster.c:3404 #16 0x0000560942bc89f9 in ServerLoop () at postmaster.c:1717 #17 0x0000560942bc9e08 in PostmasterMain (argc=argc@entry=5, argv=argv@entry=0x56096e95d2e0) at postmaster.c:1400 #18 0x0000560942acfc06 in main (argc=5, argv=0x56096e95d2e0) at main.c:227 ``` This happens because pg_cron installs a custom `SIGTERM` handler that does not set `ProcDiePending`, causing `SyncRepWaitForLSN()` to never exit its wait loop. Fix this by switching to the standard `SIGTERM` handler (`die()`). Additionally, remove the custom `SIGHUP` handler and rely on `SignalHandlerForConfigReload()` instead. --- src/pg_cron.c | 67 +++++++++++---------------------------------------- 1 file changed, 14 insertions(+), 53 deletions(-) diff --git a/src/pg_cron.c b/src/pg_cron.c index e202d14..0426071 100644 --- a/src/pg_cron.c +++ b/src/pg_cron.c @@ -72,6 +72,11 @@ #include "mb/pg_wchar.h" #include "parser/analyze.h" #include "pgstat.h" +#if PG_VERSION_NUM >= 130000 +#include "postmaster/interrupt.h" +#else +#define SignalHandlerForConfigReload PostgresSigHupHandler +#endif #include "postmaster/postmaster.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -120,8 +125,6 @@ typedef enum /* forward declarations */ void _PG_init(void); void _PG_fini(void); -static void pg_cron_sigterm(SIGNAL_ARGS); -static void pg_cron_sighup(SIGNAL_ARGS); PGDLLEXPORT void PgCronLauncherMain(Datum arg); PGDLLEXPORT void CronBackgroundWorker(Datum arg); @@ -154,10 +157,6 @@ static void bgw_generate_returned_message(StringInfoData *display_msg, ErrorData char *CronTableDatabaseName = "postgres"; static bool CronLogStatement = true; static bool CronLogRun = true; -static bool CronReloadConfig = false; - -/* flags set by signal handlers */ -static volatile sig_atomic_t got_sigterm = false; /* global variables */ static int CronTaskStartTimeout = 10000; /* maximum connection time */ @@ -348,39 +347,6 @@ _PG_init(void) } -/* - * Signal handler for SIGTERM - * Set a flag to let the main loop to terminate, and set our latch to wake - * it up. - */ -static void -pg_cron_sigterm(SIGNAL_ARGS) -{ - got_sigterm = true; - - if (MyProc != NULL) - { - SetLatch(&MyProc->procLatch); - } -} - - -/* - * Signal handler for SIGHUP - * Set a flag to tell the main loop to reload the cron jobs. - */ -static void -pg_cron_sighup(SIGNAL_ARGS) -{ - CronJobCacheValid = false; - CronReloadConfig = true; - - if (MyProc != NULL) - { - SetLatch(&MyProc->procLatch); - } -} - /* * pg_cron_cmdTuples - * mainly copy/pasted from PQcmdTuples @@ -563,9 +529,9 @@ PgCronLauncherMain(Datum arg) struct rlimit limit; /* Establish signal handlers before unblocking signals. */ - pqsignal(SIGHUP, pg_cron_sighup); + pqsignal(SIGHUP, SignalHandlerForConfigReload); pqsignal(SIGINT, SIG_IGN); - pqsignal(SIGTERM, pg_cron_sigterm); + pqsignal(SIGTERM, die); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); @@ -630,7 +596,7 @@ PgCronLauncherMain(Datum arg) MemoryContextSwitchTo(CronLoopContext); - while (!got_sigterm) + for (;;) { List *taskList = NIL; TimestampTz currentTime = 0; @@ -639,20 +605,18 @@ PgCronLauncherMain(Datum arg) AcceptInvalidationMessages(); - if (CronReloadConfig) + if (ConfigReloadPending) { /* set the desired log_min_messages */ ProcessConfigFile(PGC_SIGHUP); SetConfigOption("log_min_messages", cron_error_severity(CronLogMinMessages), PGC_POSTMASTER, PGC_S_OVERRIDE); - CronReloadConfig = false; + ConfigReloadPending = false; + + /* Some settings might have changed, force RefreshTaskHash() */ + CronJobCacheValid = false; } - /* - * Both CronReloadConfig and CronJobCacheValid are triggered by SIGHUP. - * ProcessConfigFile should come first, because RefreshTaskHash depends - * on settings that might have changed. - */ if (!CronJobCacheValid) { RefreshTaskHash(); @@ -669,10 +633,7 @@ PgCronLauncherMain(Datum arg) MemoryContextReset(CronLoopContext); } - ereport(LOG, (errmsg("pg_cron scheduler shutting down"))); - - /* return error code to trigger restart */ - proc_exit(1); + /* Not reachable */ }