[tpop3d-discuss] Tpop 1.4.2 memory leak?: fork_child: fork: Not enough space

Chris Lightfoot chris at ex-parrot.com
Thu, 9 Jan 2003 23:09:08 +0000


On Thu, Jan 09, 2003 at 10:05:31PM +0000, Chris Lightfoot wrote:
> On Thu, Jan 09, 2003 at 01:04:01PM -0800, Rich, WhidbeyNet NOC wrote:
> > 
> > We've been using PAM, so I ran the pamtest.c (gcc -lpam -o pamtest
> > pamtest.c) for an hour on a test Solaris 9 machine. "vmstat" and
> > "/usr/ucb/ps" showed that the "pamtest" process consumed an average of
> > 15 Kb of memory per second, about 1 Mb/minute. After an hour, it was
> > using 60mb of memory, and continued to grow.
> > 
> > The memory was released back to the system when the process was killed.
> > If left to continue, would that steady consumption of memory by
> > "pamtest" constitute a memory leak in pam?
> 
> My understanding is that pamtest.c is a correct PAM
> program which should not leak memory, and that if it does
> so, that indicates a leak in PAM.
> 
> Cf. also
> 
>     http://groups.google.com/groups?q=solaris+pam+memory+leak&hl=en&lr=&ie=UTF-8&selm=3vsoaxi963.fsf%40ratbert.iconnet.net&rnum=1
> 
> -- this is a real pain in the arse. I think that some
> Linux PAM implementations are now leak-free, but the basic
> issue is that the PAM developers didn't expect their code
> to be used this way and so didn't make the effort. Typical
> applications use PAM after fork()ing, but tpop3d isn't
> built that way. I suppose it's possible that I could fork
> a special process to do each PAM authentication, but
> that's *really* ugly. Yuk.

I've put together a patch which implements this, but

    (a) it's against the CVS version, not 1.4.2, though it
    should apply against the latter;

    (b) it's only been minimally tested;

    (c) you'll have to manually alter configuration.h to
    compile it in.

Patch:


Index: auth_pam.c
===================================================================
RCS file: /home/chris/vcvs/repos/tpop3d/auth_pam.c,v
retrieving revision 1.27
diff -u -r1.27 auth_pam.c
--- auth_pam.c	8 Aug 2002 15:20:41 -0000	1.27
+++ auth_pam.c	9 Jan 2003 23:07:46 -0000
@@ -15,12 +15,14 @@
 
 #include <sys/types.h> /* BSD needs this here, it seems. */
 
+#include <errno.h>
 #include <grp.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
+#include <unistd.h>
 
 #include <security/pam_appl.h>
 
@@ -52,24 +54,73 @@
     return PAM_SUCCESS;
 }
 
+/* auth_pam_do_authentication FACILITY USER PASSWORD
+ * Tries to authenticate USER with PASSWORD using the named PAM FACILITY,
+ * returning nonzero on success or zero on failure. */
+static int auth_pam_do_authentication(const char *facility, const char *user, const char *pass, const char *clienthost) {
+    struct pam_conv conv;
+    pam_handle_t *pamh = NULL;
+    int result = 0, r;
+    
+    /* This will generate a warning on Solaris; I can't see an easy fix. */
+    conv.conv = auth_pam_conversation;
+    conv.appdata_ptr = (void*)pass;
+    
+    r = pam_start(facility, user, &conv, &pamh);
+
+    if (r != PAM_SUCCESS) {
+        log_print(LOG_ERR, "auth_pam_new_user_pass: pam_start: %s", pam_strerror(pamh, r));
+        return 0;
+    }
+    
+    /* We want to be able to test against the client IP; make the remote host
+     * information available to the PAM stack. */
+    r = pam_set_item(pamh, PAM_RHOST, clienthost);
+    
+    if (r != PAM_SUCCESS) {
+        log_print(LOG_ERR, "auth_pam_new_user_pass: pam_start: %s", pam_strerror(pamh, r));
+        return 0;
+    }
+
+    /* Authenticate user. */
+    r = pam_authenticate(pamh, 0);
+
+    if (r == PAM_SUCCESS) {
+        /* OK, is the account presently allowed to log in? */
+        r = pam_acct_mgmt(pamh, PAM_SILENT);
+        if (r == PAM_SUCCESS)
+            /* Succeeded. */
+            result = 1;
+        else
+            /* Failed; account is disabled or something. */
+            log_print(LOG_ERR, "auth_pam_new_user_pass: pam_acct_mgmt(%s): %s", user, pam_strerror(pamh, r));
+    } else
+        /* User did not authenticate. */
+        log_print(LOG_ERR, "auth_pam_new_user_pass: pam_authenticate(%s): %s", user, pam_strerror(pamh, r));
+
+    r = pam_end(pamh, r);
+    if (r != PAM_SUCCESS) log_print(LOG_ERR, "auth_pam_new_user_pass: pam_end: %s", pam_strerror(pamh, r));
+
+    return result;
+}
+
 /* auth_pam_new_user_pass:
  * Attempt to authenticate user and pass using PAM. This is not a
  * virtual-domains authenticator, so it only looks at user. */
-authcontext auth_pam_new_user_pass(const char *user, const char *local_part, const char *domain, const char *pass, const char *clienthost /* unused */, const char *serverhost) {
-    pam_handle_t *pamh = NULL;
+#ifdef REALLY_UGLY_PAM_HACK
+pid_t auth_pam_child_pid;
+#endif
+authcontext auth_pam_new_user_pass(const char *user, const char *local_part, const char *domain, const char *pass, const char *clienthost, const char *serverhost) {
     struct passwd pw, *pw2;
-    int r, n = PAM_SUCCESS;
-    authcontext a = NULL;
-    struct pam_conv conv;
-    char *facility;
     char *s;
     int use_gid = 0;
     gid_t gid = 99;
+    static const char *facility;
+    int authenticated = 0;
 
     /* Check the this isn't a virtual-domain user. */
     if (local_part) return NULL;
 
-
     /* It is possible to use PAM to authenticate users who do not exist as
      * system users. We support this by defining an auth-pam-mail-user
      * configuration option which is used to obtain the user information
@@ -97,9 +148,8 @@
     /* pw now contains either the data for the real UNIX user named or the UNIX
      * user given by the auth-pam-mail-user config option. */
 
-
     /* Obtain facility name. */
-    if (!(facility = config_get_string("auth-pam-facility")))
+    if (!facility && !(facility = config_get_string("auth-pam-facility")))
         facility = AUTH_PAM_FACILITY;
 
     /* Obtain gid to use */
@@ -111,43 +161,69 @@
         use_gid = 1;
     }
 
-    /* This will generate a warning on Solaris; I can't see an easy fix. */
-    conv.conv = auth_pam_conversation;
-    conv.appdata_ptr = (void*)pass;
-    
-    r = pam_start(facility, user, &conv, &pamh);
-
-    if (r != PAM_SUCCESS) {
-        log_print(LOG_ERR, "auth_pam_new_user_pass: pam_start: %s", pam_strerror(pamh, r));
-        return NULL;
+    /* 
+     * On many systems, PAM leaks memory, which is a problem for a daemon like
+     * tpop3d which does all authentication in the main daemon. So we
+     * optionally implement a really ugly hack where we fork a process in
+     * which to interact with PAM.
+     */
+
+#ifdef REALLY_UGLY_PAM_HACK
+    {
+        int pfd[2];
+        char res = 0;
+        ssize_t n;
+        
+        /* 
+         * The child process writes a byte zero into the pipe on failure or a
+         * one on success. Don't use the exit value because we don't want to
+         * have to piss about with the SIGCHLD handler.
+         */
+        
+        if (pipe(pfd) == -1)
+            log_print(LOG_ERR, "auth_pam_new_user_pass: pipe: %m");
+        else {
+            switch (auth_pam_child_pid = fork()) {
+                case 0:
+                    close(pfd[0]);
+                    if (xwrite(pfd[1], auth_pam_do_authentication(facility, user, pass, clienthost) ? "\001" : "\0", 1) == -1)
+                        /* This is really bad. The parent may hang waiting for us. */
+                        log_print(LOG_ERR, _("auth_pam_new_user_pass: (child process): write: %m"));
+                    close(pfd[1]);
+                    _exit(0);
+
+                case -1:
+                    close(pfd[0]);
+                    close(pfd[1]);
+                    log_print(LOG_ERR, "auth_pam_new_user_pass: fork: %m");
+                    break;
+
+                default:
+                    close(pfd[1]);
+                    while ((n = read(pfd[0], &res, 1)) == -1 && errno == EINTR);
+                    close(pfd[0]);
+                    if (n <= 0) {
+                        /* Bad. Read error, child probably crashed. */
+                        if (n == -1)
+                            log_print(LOG_ERR, "auth_pam_new_user_pass: read: %m");
+                        else 
+                            log_print(LOG_ERR, _("auth_pam_new_user_pass: authentication child did not send status (shouldn't happen)"));
+                        authenticated = 0;
+                    } else
+                        /* Good. Byte returned. */
+                        authenticated = res;
+                    break;
+            }
+        }
     }
+#else
+    authenticated = auth_pam_do_authentication(facility, user, pass, clienthost);
+#endif  /* REALLY_UGLY_PAM_HACK */
     
-    /* We want to be able to test against the client IP; make the remote host
-     * information available to the PAM stack. */
-    r = pam_set_item(pamh, PAM_RHOST, clienthost);
-    
-    if (r != PAM_SUCCESS) {
-        log_print(LOG_ERR, "auth_pam_new_user_pass: pam_start: %s", pam_strerror(pamh, r));
+    if (authenticated)
+        return authcontext_new(pw.pw_uid, use_gid ? gid : pw.pw_gid, NULL, NULL, pw.pw_dir);
+    else
         return NULL;
-    }
-
-    /* Authenticate user. */
-    r = pam_authenticate(pamh, 0);
-
-    if (r == PAM_SUCCESS) {
-        /* OK, is the account presently allowed to log in? */
-        r = pam_acct_mgmt(pamh, PAM_SILENT);
-        if (r == PAM_SUCCESS) {
-            /* Succeeded; figure out the mailbox name later. */
-            a = authcontext_new(pw.pw_uid, use_gid ? gid : pw.pw_gid, NULL, NULL, pw.pw_dir);
-        } else log_print(LOG_ERR, "auth_pam_new_user_pass: pam_acct_mgmt(%s): %s", user, pam_strerror(pamh, r));
-    } else log_print(LOG_ERR, "auth_pam_new_user_pass: pam_authenticate(%s): %s", user, pam_strerror(pamh, r));
-
-    r = pam_end(pamh, n);
-
-    if (r != PAM_SUCCESS) log_print(LOG_ERR, "auth_pam_new_user_pass: pam_end: %s", pam_strerror(pamh, r));
-
-    return a;
 }
 
 #endif /* AUTH_PAM */
Index: auth_other.c
===================================================================
RCS file: /home/chris/vcvs/repos/tpop3d/auth_other.c,v
retrieving revision 1.24
diff -u -r1.24 auth_other.c
--- auth_other.c	25 Jun 2002 20:28:00 -0000	1.24
+++ auth_other.c	9 Jan 2003 23:07:46 -0000
@@ -72,15 +72,15 @@
 #define MAX_DATA_SIZE   4096
 
 char *auth_program;
-time_t authchild_start_time;
-uid_t authchild_uid;
-gid_t authchild_gid;
-volatile pid_t authchild_pid, authchild_died;
-volatile int authchild_status;
-struct timeval authchild_timeout;
+time_t auth_other_childstart_time;
+uid_t auth_other_childuid;
+gid_t auth_other_childgid;
+volatile pid_t auth_other_child_pid, auth_other_childdied;
+volatile int auth_other_childstatus;
+struct timeval auth_other_childtimeout;
 
 /* File descriptors used to talk to child. */
-volatile int authchild_wr = -1, authchild_rd = -1;
+volatile int auth_other_childwr = -1, auth_other_childrd = -1;
 
 /* dump:
  * Debugging method. */
@@ -163,13 +163,13 @@
         return 0;
     }
    
-    switch (authchild_pid = fork()) {
+    switch (auth_other_child_pid = fork()) {
         case 0:
-            if (setgid(authchild_gid) == -1) {
-                log_print(LOG_ERR, "auth_other_start_child: setgid(%d): %m", (int)authchild_gid);
+            if (setgid(auth_other_childgid) == -1) {
+                log_print(LOG_ERR, "auth_other_start_child: setgid(%d): %m", (int)auth_other_childgid);
                 _exit(1);
-            } else if (setuid(authchild_uid) == -1) {
-                log_print(LOG_ERR, "auth_other_start_child: setuid(%d): %m", (int)authchild_uid);
+            } else if (setuid(auth_other_childuid) == -1) {
+                log_print(LOG_ERR, "auth_other_start_child: setuid(%d): %m", (int)auth_other_childuid);
                 _exit(1);
             }
             
@@ -196,8 +196,8 @@
 
         default:
             /* Parent. */
-            authchild_wr = p1[1];
-            authchild_rd = p2[0];
+            auth_other_childwr = p1[1];
+            auth_other_childrd = p2[0];
  
             close(p1[0]);
             close(p2[1]);
@@ -215,23 +215,23 @@
  * SIGKILL-shaped violence if that fails. */
 void auth_other_kill_child() {
     struct timeval deadline, now;
-    if (authchild_pid == 0) return; /* Already dead. */
-    kill(authchild_pid, SIGTERM);
+    if (auth_other_child_pid == 0) return; /* Already dead. */
+    kill(auth_other_child_pid, SIGTERM);
     
     /* Wait for it to expire. */
     gettimeofday(&now, NULL);
     deadline = now;
-    tvadd(&deadline, &authchild_timeout);
+    tvadd(&deadline, &auth_other_childtimeout);
     
-    while (authchild_pid && tvcmp(&now, &deadline) < 0) {
+    while (auth_other_child_pid && tvcmp(&now, &deadline) < 0) {
         struct timespec delay = {0, 100000000}; /* 0.1s; note nano not microseconds */
         nanosleep(&delay, NULL);
         gettimeofday(&now, NULL);
     }
 
-    if (authchild_pid) {
+    if (auth_other_child_pid) {
         log_print(LOG_WARNING, _("auth_other_kill_child: child failed to die; killing with SIGKILL"));
-        kill(authchild_pid, SIGKILL);
+        kill(auth_other_child_pid, SIGKILL);
         /* Assume this works; it ought to! */
     }
 }
@@ -279,14 +279,14 @@
             f = 0.75;
     }
 
-    authchild_timeout.tv_sec  = (long)floor(f);
-    authchild_timeout.tv_usec = (long)((f - floor(f)) * 1e6);
+    auth_other_childtimeout.tv_sec  = (long)floor(f);
+    auth_other_childtimeout.tv_usec = (long)((f - floor(f)) * 1e6);
 
     /* Find out user and group under which program will run. */
     if (!(s = config_get_string("auth-other-user"))) {
         log_print(LOG_ERR, _("auth_other_init: no user specified"));
         return 0;
-    } else if (!parse_uid(s, &authchild_uid)) {
+    } else if (!parse_uid(s, &auth_other_childuid)) {
         log_print(LOG_ERR, _("auth_other_init: auth-other-user directive `%s' does not make sense"), s);
         return 0;
     }
@@ -294,12 +294,12 @@
     if (!(s = config_get_string("auth-other-group"))) {
         log_print(LOG_ERR, _("auth_other_init: no group specified"));
         return 0;
-    } else if (!parse_gid(s, &authchild_gid)) {
+    } else if (!parse_gid(s, &auth_other_childgid)) {
         log_print(LOG_ERR, _("auth_other_init: auth-other-group directive `%s' does not make sense"), s);
         return 0;
     }
 
-    log_print(LOG_INFO, "auth_other_init: %s: will run as uid %d, gid %d", auth_program, (int)authchild_uid, (int)authchild_gid);
+    log_print(LOG_INFO, "auth_other_init: %s: will run as uid %d, gid %d", auth_program, (int)auth_other_childuid, (int)auth_other_childgid);
 
     if (!auth_other_start_child()) {
         log_print(LOG_ERR, _("auth_other_init: failed to start authentication child for first time"));
@@ -312,8 +312,8 @@
 /* auth_other_postfork:
  * Post-fork cleanup: close our copies of the file descriptors. */
 void auth_other_postfork() {
-    close(authchild_wr);
-    close(authchild_rd);
+    close(auth_other_childwr);
+    close(auth_other_childrd);
 }
 
 /* auth_other_close:
@@ -332,7 +332,7 @@
     char *p;
     size_t nn;
     
-    if (!authchild_pid) return 0;
+    if (!auth_other_child_pid) return 0;
 
     va_start(ap, nvars);
 
@@ -360,7 +360,7 @@
     /* Since write operations are atomic, this will either succeed entirely or
      * fail. In the latter case, it may be with EAGAIN because the child
      * process is blocking; we interpret this as a protocol error. */
-    if (try_write(authchild_wr, buffer, nn)) ret = 1;
+    if (try_write(auth_other_childwr, buffer, nn)) ret = 1;
     else {
         if (errno == EAGAIN)
             log_print(LOG_ERR, _("auth_other_send_request: write: write on pipe blocked; killing child"));
@@ -384,10 +384,10 @@
     char *p, *q, *r, *s;
     struct timeval deadline;
 
-    if (!authchild_pid) return NULL;
+    if (!auth_other_child_pid) return NULL;
 
     gettimeofday(&deadline, 0);
-    tvadd(&deadline, &authchild_timeout);
+    tvadd(&deadline, &auth_other_childtimeout);
 
     p = buffer;
     do {
@@ -396,7 +396,7 @@
         fd_set readfds;
 
         FD_ZERO(&readfds);
-        FD_SET(authchild_rd, &readfds);
+        FD_SET(auth_other_childrd, &readfds);
         gettimeofday(&tt, NULL);
         if (tvcmp(&deadline, &tt) == -1) {
             log_print(LOG_ERR, _("auth_other_recv_response: timed out waiting for a response; killing child"));
@@ -404,9 +404,9 @@
         }
         tvsub(&timeout, &tt);
 
-        switch (select(authchild_rd + 1, &readfds, NULL, NULL, &timeout)) {
+        switch (select(auth_other_childrd + 1, &readfds, NULL, NULL, &timeout)) {
             case 1:
-                rr = read(authchild_rd, p, (buffer + sizeof(buffer) - p));
+                rr = read(auth_other_childrd, p, (buffer + sizeof(buffer) - p));
 
                 switch (rr) {
                     case 0:
@@ -482,7 +482,7 @@
     item *I;
     authcontext a = NULL;
  
-    if (!authchild_pid) auth_other_start_child();
+    if (!auth_other_child_pid) auth_other_start_child();
     
     for (p = digeststr, q = digest; q < digest + 16; p += 2, ++q)
         sprintf(p, "%02x", (unsigned int)*q);
@@ -546,7 +546,7 @@
     item *I;
     authcontext a = NULL;
 
-    if (!authchild_pid) auth_other_start_child();
+    if (!auth_other_child_pid) auth_other_start_child();
 
     if (local_part && domain) {
         if (!auth_other_send_request(7, "method", "PASS", "user", user, "local_part", local_part, "domain", domain, "pass", pass, "clienthost", clienthost, "serverhost", serverhost))
@@ -605,7 +605,7 @@
     stringmap S;
     item *I;
 
-    if (!authchild_pid) auth_other_start_child();
+    if (!auth_other_child_pid) auth_other_start_child();
 
     if (!auth_other_send_request(6, "method", "ONLOGIN", "user", A->user, "local_part", A->local_part, "domain", A->domain, "clienthost", clienthost, "serverhost", serverhost)
         || !(S = auth_other_recv_response()))
Index: signals.c
===================================================================
RCS file: /home/chris/vcvs/repos/tpop3d/signals.c,v
retrieving revision 1.21
diff -u -r1.21 signals.c
--- signals.c	13 Nov 2002 23:31:43 -0000	1.21
+++ signals.c	9 Jan 2003 23:07:46 -0000
@@ -127,8 +127,8 @@
 extern int num_running_children; /* in main.c */
 
 #ifdef AUTH_OTHER
-extern pid_t authchild_pid, authchild_died; /* in auth_other.c */
-extern int authchild_wr, authchild_rd, authchild_status;
+extern pid_t auth_other_child_pid, auth_other_childdied; /* in auth_other.c */
+extern int auth_other_childwr, auth_other_childrd, auth_other_childstatus;
 #endif /* AUTH_OTHER */
 
 /* Save information about any child which dies with a signal. */
@@ -145,13 +145,19 @@
     while (1) {
         pid = waitpid(-1, &status, WNOHANG);
         if (pid > 0) {
+#ifdef REALLY_UGLY_PAM_HACK
+            extern auth_pam_child_pid; /* in auth_pam.c */
+            if (pid == auth_pam_child_pid)
+                ; /* Do nothing; in principle we should check to see if it crashed. */
+            else
+#endif /* REALLY_UGLY_PAM_HACK */
 #ifdef AUTH_OTHER
-            if (pid == authchild_pid) {
-                authchild_pid = 0;
-                authchild_died = pid;
-                authchild_status = status;
-                close(authchild_wr);
-                close(authchild_rd);
+            if (pid == auth_other_child_pid) {
+                auth_other_child_pid = 0;
+                auth_other_childdied = pid;
+                auth_other_childstatus = status;
+                close(auth_other_childwr);
+                close(auth_other_childrd);
             } else
 #endif /* AUTH_OTHER */
             {

-- 
``Do you think it's a little dangerous handing out guns in a bank?''
  (Michael Moore, to teller in bank which gives out hand-guns to
   customers opening new accounts)