You've already forked pgbackrest
							
							
				mirror of
				https://github.com/pgbackrest/pgbackrest.git
				synced 2025-10-30 23:37:45 +02:00 
			
		
		
		
	Prevent defunct processes in asynchronous archive commands.
The main improvement is a double-fork to prevent zombie processes if the parent process exits after the (child) async process. This is a real possibility since the parent process sticks around to monitor the results of the async process. In the first fork, ignore SIGCHLD in the very unlikely case that the async process exits before the first fork. This is probably only possible if the async process exits immediately, perhaps due to a chdir() failure. Set SIGCHLD back to default in the async process so waitpid() will work as expected. Also update the comment on chdir() to more accurately reflect what is happening. Finally, add a test in certain debug builds to ensure the first fork exits very quickly. This only works when valgrind is not in use because valgrind makes forking so slow that it is hard to tell if the async process performed work or not (in the case that the second fork goes missing and the async process is a direct child).
This commit is contained in:
		| @@ -15,6 +15,16 @@ | ||||
|         <release date="XXXX-XX-XX" version="2.24dev" title="UNDER DEVELOPMENT"> | ||||
|             <release-core-list> | ||||
|                 <release-bug-list> | ||||
|                     <release-item> | ||||
|                         <release-item-contributor-list> | ||||
|                              <release-item-ideator id="adam.brusselback"/> | ||||
|                              <release-item-ideator id="ejberdecia"/> | ||||
|                              <release-item-reviewer id="stephen.frost"/> | ||||
|                         </release-item-contributor-list> | ||||
|  | ||||
|                         <p>Prevent defunct processes in asynchronous archive commands.</p> | ||||
|                     </release-item> | ||||
|  | ||||
|                     <release-item> | ||||
|                         <release-item-contributor-list> | ||||
|                             <release-item-ideator id="christian.roux"/> | ||||
|   | ||||
| @@ -6,6 +6,7 @@ Archive Common | ||||
| #include <stdint.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <sys/wait.h> | ||||
| #include <unistd.h> | ||||
|  | ||||
| #include "command/archive/common.h" | ||||
| @@ -242,7 +243,9 @@ archiveAsyncExec(ArchiveMode archiveMode, const StringList *commandExec) | ||||
|     ASSERT(commandExec != NULL); | ||||
|  | ||||
|     // Fork off the async process | ||||
|     if (forkSafe() == 0) | ||||
|     pid_t pid = forkSafe(); | ||||
|  | ||||
|     if (pid == 0) | ||||
|     { | ||||
|         // Disable logging and close log file | ||||
|         logClose(); | ||||
| @@ -256,6 +259,26 @@ archiveAsyncExec(ArchiveMode archiveMode, const StringList *commandExec) | ||||
|             "unable to execute asynchronous '%s'", archiveMode == archiveModeGet ? CFGCMD_ARCHIVE_GET : CFGCMD_ARCHIVE_PUSH); | ||||
|     } | ||||
|  | ||||
| #ifdef DEBUG_EXEC_TIME | ||||
|     // Get the time to measure how long it takes for the forked process to exit | ||||
|     TimeMSec timeBegin = timeMSec(); | ||||
| #endif | ||||
|  | ||||
|     // The process that was just forked should return immediately | ||||
|     int processStatus; | ||||
|  | ||||
|     THROW_ON_SYS_ERROR(waitpid(pid, &processStatus, 0) == -1, ExecuteError, "unable to wait for forked process"); | ||||
|  | ||||
|     // The first fork should exit with success.  If not, something went wrong during the second fork. | ||||
|     CHECK(WIFEXITED(processStatus) && WEXITSTATUS(processStatus) == 0); | ||||
|  | ||||
| #ifdef DEBUG_EXEC_TIME | ||||
|     // If the process does not exit immediately then something probably went wrong with the double fork.  It's possible that this | ||||
|     // test will fail on very slow systems so it may need to be tuned.  The idea is to make sure that the waitpid() above is not | ||||
|     // waiting on the async process. | ||||
|     ASSERT(timeMSec() - timeBegin < 10); | ||||
| #endif | ||||
|  | ||||
|     FUNCTION_LOG_RETURN_VOID(); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -3,6 +3,8 @@ Fork Handler | ||||
| ***********************************************************************************************************************************/ | ||||
| #include "build.auto.h" | ||||
|  | ||||
| #include <signal.h> | ||||
| #include <stdlib.h> | ||||
| #include <unistd.h> | ||||
|  | ||||
| #include "common/debug.h" | ||||
| @@ -33,12 +35,27 @@ forkDetach(void) | ||||
| { | ||||
|     FUNCTION_LOG_VOID(logLevelTrace); | ||||
|  | ||||
|     // Change the working directory to / so we won't error if the old working directory goes away | ||||
|     THROW_ON_SYS_ERROR(chdir("/") == -1, PathMissingError, "unable to change directory to '/'"); | ||||
|  | ||||
|     // Make this process a group leader so the parent process won't block waiting for it to finish | ||||
|     THROW_ON_SYS_ERROR(setsid() == -1, KernelError, "unable to create new session group"); | ||||
|  | ||||
|     // The process should never receive a SIGHUP but ignore it just in case | ||||
|     signal(SIGHUP, SIG_IGN); | ||||
|  | ||||
|     // There should be no way the child process can exit first (after the next fork) but just in case ignore SIGCHLD.  This means | ||||
|     // that the child process will automatically be reaped by the kernel should it finish first rather than becoming defunct. | ||||
|     signal(SIGCHLD, SIG_IGN); | ||||
|  | ||||
|     // Fork again and let the parent process terminate to ensure that we get rid of the session leading process. Only session | ||||
|     // leaders may get a TTY again. | ||||
|     if (forkSafe() != 0) | ||||
|         exit(0); | ||||
|  | ||||
|     // Reset SIGCHLD to the default handler so waitpid() calls in the process will work as expected | ||||
|     signal(SIGCHLD, SIG_DFL); | ||||
|  | ||||
|     // Change the working directory to / so there is no dependency on the original working directory | ||||
|     THROW_ON_SYS_ERROR(chdir("/") == -1, PathMissingError, "unable to change directory to '/'"); | ||||
|  | ||||
|     // Close standard file handles | ||||
|     THROW_ON_SYS_ERROR(close(STDIN_FILENO) == -1, FileCloseError, "unable to close stdin"); | ||||
|     THROW_ON_SYS_ERROR(close(STDOUT_FILENO) == -1, FileCloseError, "unable to close stdout"); | ||||
|   | ||||
| @@ -461,7 +461,8 @@ sub run | ||||
|                     ($self->{oTest}->{&TEST_CDEF} ? " $self->{oTest}->{&TEST_CDEF}" : '') . | ||||
|                     (vmCoverageC($self->{oTest}->{&TEST_VM}) && $self->{bCoverageUnit} ? ' -DDEBUG_COVERAGE' : '') . | ||||
|                     ($self->{bDebug} && $self->{oTest}->{&TEST_TYPE} ne TESTDEF_PERFORMANCE ? '' : ' -DNDEBUG') . | ||||
|                     ($self->{bDebugTestTrace} && $self->{bDebug} ? ' -DDEBUG_TEST_TRACE' : ''); | ||||
|                     ($self->{bDebugTestTrace} && $self->{bDebug} ? ' -DDEBUG_TEST_TRACE' : '') . | ||||
|                     ($self->{oTest}->{&TEST_VM} eq VM_CO6 ? ' -DDEBUG_EXEC_TIME' : ''); | ||||
|  | ||||
|                 # Flags used to build harness files | ||||
|                 my $strHarnessFlags = | ||||
|   | ||||
		Reference in New Issue
	
	Block a user