[tpop3d-discuss] pop locks-

Chris Lightfoot chris at ex-parrot.com
Mon, 1 Jul 2002 15:23:37 +0100


On Mon, Jul 01, 2002 at 10:00:38AM -0400, Michael Klatsky wrote:
> Chris-
> 
> No -didn't get any further, as I took the easy way out and recompiled
> without the pop- locking mechanism- just using OS level locking which
> seems to be fine.

OK. I take it that no stale locks are now occurring? If
so, this indicates that it was a tpop3d problem. Could you
try applying the following patch, which will make the main
daemon report log a message if any child dies with a signal
and see whether it reports anything useful:

diff -u -r1.17 signals.c
--- signals.c   1 Jul 2002 14:06:05 -0000       1.17
+++ signals.c   1 Jul 2002 14:20:48 -0000
@@ -65,7 +65,7 @@
         0};
     int terminate_signals[] = {SIGINT, SIGTERM, 0};
     int restart_signals[]   = {SIGHUP, 0};
-    int die_signals[]       = {SIGQUIT, SIGABRT, SIGSEGV, SIGBUS, 0};
+    int die_signals[]       = {SIGQUIT, SIGABRT, SIGSEGV, SIGBUS, SIGILL, 0};
     int *i;
     struct sigaction sa, saz = {0};
 
@@ -144,6 +144,10 @@
 extern int authchild_wr, authchild_rd, authchild_status;
 #endif /* AUTH_OTHER */
 
+/* Save information about any child which dies with a signal. */
+pid_t child_died;
+int child_died_signal;
+
 void child_signal_handler(const int i) {
     pid_t pid;
     int e, status;
@@ -163,7 +167,13 @@
                 close(authchild_rd);
             } else
 #endif /* AUTH_OTHER */
+            {
                 --num_running_children;
+                if (WIFSIGNALED(status)) {
+                    child_died = pid;
+                    child_died_signal = WTERMSIG(status);
+                }
+            }
         } else if (pid == 0 || (pid == -1 && errno != EINTR)) {
             errno = e;
             return;
diff -u -r1.72 main.c
--- main.c      1 Jul 2002 14:06:05 -0000       1.72
+++ main.c      1 Jul 2002 14:20:48 -0000
@@ -391,18 +391,19 @@
  * setuid() and fork() when appropriate. */
 volatile int foad = 0, restart = 0; /* Flags used to indicate that we should exit or should re-exec. */
 
-#ifdef AUTH_OTHER
-extern pid_t authchild_died;
-extern int authchild_status;
-#endif /* AUTH_OTHER */
 
 void net_loop(void) {
     connection *J;
 #ifdef AUTH_OTHER
+    extern pid_t authchild_died;
+    extern int authchild_status;
+#endif /* AUTH_OTHER */
+    extern pid_t child_died;
+    extern int child_died_signal;
     sigset_t chmask;
+    
     sigemptyset(&chmask);
     sigaddset(&chmask, SIGCHLD);
-#endif /* AUTH_OTHER */
 
     /* 2 * max_running_children is a reasonable ball-park figure. */
     max_connections = 2 * max_running_children;
@@ -433,17 +434,24 @@
             connections_post_select(&readfds, NULL, NULL);
         }
 
+        sigprocmask(SIG_BLOCK, &chmask, NULL);
+        
 #ifdef AUTH_OTHER
         /* It may be that the authentication child died; log the message here
-         * to avoid doing something we shouldn't in the signal handler. We
-         * block SIGCHLD while doing this. */
-        sigprocmask(SIG_BLOCK, &chmask, NULL);
+         * to avoid doing something we shouldn't in the signal handler. */
         if (authchild_died) {
             log_print(LOG_WARNING, _("net_loop: authentication child %d terminated with status %d"), (int)authchild_died, authchild_status);
             authchild_died = 0;
         }
-        sigprocmask(SIG_UNBLOCK, &chmask, NULL);
 #endif /* AUTH_OTHER */
+        
+        /* Also log a message if a child process died with a signal. */
+        if (child_died) {
+            log_print(LOG_ERR, _("net_loop: child process %d killed by signal %d (shouldn't happen)"), (int)child_died, child_died_signal);
+            child_died = 0;
+        }
+        
+        sigprocmask(SIG_UNBLOCK, &chmask, NULL);
     }
 
     /* Termination request received; we should close all connections in an
---------- end of patch ------------

-- 
``... and I live at number 5 Five Drive.''
  (`How'd The Date End?', The Mr. T Experience).