[tpop3d-discuss][PATCH] Simplify migration from qpopper to tpop3d

Martin Blapp mb at imp.ch
Wed, 2 Feb 2005 01:29:04 +0100 (CET)


This patch actually works quite good. If a migration is done
without this patch and customers have stored a lot of big
mailboxes on the server, they will have a big surprise the
next time when they download their mail. All UIDL from qpopper
are substantically different from tpop3d UIDLs and thus they
get all stored email again.

The patch does try to get stored UIDL's once and does only
copute md5 checksums if a X-UIDL headerline is missing.

--- mailbox.h	Thu Jan  9 23:59:37 2003
+++ mailbox.h	Mon Jan 31 16:17:21 2005
@@ -54,7 +54,7 @@
     size_t  offset, length, msglength;  /* Offsets, length for mailspools.  */
     time_t  mtime;                      /* Modified time used for maildirs. */
     char    deleted;
-    unsigned char hash[16];
+    unsigned char hash[32];
 };

 /* mailbox:
--- pop3.c.orig	Thu Nov  6 02:19:27 2003
+++ pop3.c	Tue Feb  1 09:11:43 2005
@@ -255,7 +255,15 @@
             connection_sendresponse(c, 0, _("That message is no more."));
         else {
             char response[64] = {0};
-            snprintf(response, 63, "%d %s", 1 + msg_num, hex_digest(curmsg->hash));
+	    if (strncmp(curmsg->hash, "QPOP", 4) == 0) {
+		    snprintf(response, 63, "%d %s", 1 + msg_num, curmsg->hash+4);
+		    if (verbose)
+			    log_print(LOG_DEBUG, _("uidlcheck_1: sent %s"), curmsg->hash);
+	    } else {
+		    snprintf(response, 63, "%d %s", 1 + msg_num, hex_digest(curmsg->hash));
+		    if (verbose)
+			    log_print(LOG_DEBUG, _("uidlcheck_1: sent hex %s"), hex_digest(curmsg->hash));
+	    }
             connection_sendresponse(c, 1, response);
         }
     } else {
@@ -266,7 +274,15 @@
         for (m = c->m->index; m < c->m->index + c->m->num; ++m) {
             if (!m->deleted) {
                 char response[64] = {0};
-                snprintf(response, 63, "%d %s", 1 + m - c->m->index, hex_digest(m->hash));
+		if (strncmp(m->hash, "QPOP", 4) == 0) {
+			snprintf(response, 63, "%d %s", 1 + m - c->m->index, m->hash);
+			if (verbose)
+				log_print(LOG_DEBUG, _("uidlcheck_2: sent %s"), m->hash+4);
+		} else {
+			snprintf(response, 63, "%d %s", 1 + m - c->m->index, hex_digest(m->hash));
+			if (verbose)
+				log_print(LOG_DEBUG, _("uidlcheck_2: sent hex %s"), hex_digest(m->hash));
+		}
                 if (!connection_sendline(c, response))
                     return;
                 ++nn;
--- mailspool.c.orig	Thu Nov  6 02:19:27 2003
+++ mailspool.c	Mon Jan 31 21:04:27 2005
@@ -51,6 +51,8 @@
 #include "stringmap.h"
 #include "util.h"

+extern int verbose;
+
 #ifdef MBOX_BSD_SAVE_INDICES
 /* Stuff to support a metadata cache. */
 int mailspool_save_indices;
@@ -129,7 +131,7 @@
     x->offset = offset;
     x->length = length;
     x->msglength = msglength;
-    if (hash) memcpy(x->hash, hash, 16);
+    if (hash) memcpy(x->hash, hash, 32);
 }

 /* mailspool_new_from_file FILENAME
@@ -307,12 +309,36 @@
         t->msglength = M->st.st_size - t->offset;

         /* We generate "unique" IDs by hashing the first 512 or so bytes of the
-         * data in each message. Only do this for newly found messages. */
+         * data in each message. Only do this for newly found messages. If an
+	 * old X-UIDL string is found, we read 16k and try to us this one.
+         */
         for (t = M->index; t < M->index + M->num; ++t) {
-            size_t n = 512;
+            size_t n = 16000;
+	    char *uidlp;
+
+            if (n > t->msglength)
+		n = t->msglength;

-            if (n > t->msglength) n = t->msglength;
-            md5_digest((void*)(filemem + t->offset), n, t->hash);
+	    uidlp = memstr(filemem + t->offset, n, "\nX-UIDL:", 8);
+	    if (uidlp != NULL) {
+		int count;
+	    	memcpy((t->hash)+4, uidlp+9, 20);
+		t->hash[0] = 'Q';
+		t->hash[1] = 'P';
+		t->hash[2] = 'O';
+		t->hash[3] = 'P';
+		t->hash[24] = '\0';
+		for (count = 3; count < 24; count++) {
+			if (t->hash[count] == '\n') {
+				t->hash[count] = '\0';
+			}
+		}
+	    } else {
+		n = 512;
+		if (n > t->msglength)
+			n = t->msglength;
+		md5_digest((void*)(filemem + t->offset), n, t->hash);
+	    }
         }
     }

@@ -644,7 +670,10 @@

         while (I < End) {
             if (!I->deleted) {
-                fprintf(fp, "%08x %08x %08x %s\n", (unsigned int)offset, (unsigned int)I->length, (unsigned int)I->msglength, hex_digest(I->hash)); /* XXX error return? */
+		if (strncmp(I->hash, "QPOP", 4) == 0)
+			fprintf(fp, "%08x %08x %08x %s\n", (unsigned int)offset, (unsigned int)I->length, (unsigned int)I->msglength, I->hash); /* XXX error return? */
+		else
+			fprintf(fp, "%08x %08x %08x %s\n", (unsigned int)offset, (unsigned int)I->length, (unsigned int)I->msglength, hex_digest(I->hash)); /* XXX error return? */
                 offset += I->msglength;
             }
             ++I;
@@ -677,6 +706,7 @@
     size_t mappedlen;
     int num, r;
     int index_missing = 0;
+    char linebuf[80];

     if (!m || m->fd == -1) goto fail;

@@ -719,31 +749,49 @@
         goto fail;
     }

-    while (fscanf(fp, "%8x %8x %8x %32[0-9a-f]", &offset, &length, &msglength, hexdigest) == 4) {
+    while (fgets(linebuf, sizeof(linebuf), fp)) {
         struct indexpoint x;
         size_t n = 512;
-        unsigned char realhash[16];
-
-        /* XXX check validity here. */
-        mailspool_make_indexpoint(&x, offset, length, msglength, NULL);
-        unhex_digest(hexdigest, x.hash);
+        unsigned char realhash[32];
+	if (verbose)
+	log_print(LOG_DEBUG, _("mailspool_load_index: analyze line (%s)"), linebuf);
+
+	if(sscanf(linebuf, "%8x %8x %8x %32[0-9a-f]", &offset, &length, &msglength, hexdigest) == 4) {
+		if (verbose)
+			log_print(LOG_DEBUG, _("mailspool_load_index: have md5_hash (%32s)"), hexdigest);
+
+        	/* XXX check validity here. */
+        	mailspool_make_indexpoint(&x, offset, length, msglength, NULL);
+        	unhex_digest(hexdigest, x.hash);
+
+        	/* Compute MD5 */
+        	md5_digest(filemem + x.offset, n, realhash);
+        	/* No match; stop. */
+        	if (memcmp(realhash, x.hash, 16) != 0) {
+			/* Get rid of any preceding record: we will have to re-index that
+			* one, too. */
+			if (m->num > 0)
+				--m->num;
+			break;
+		}
+	} else if(sscanf(linebuf, "%8x %8x %8x %s\n", &offset, &length, &msglength, hexdigest) == 4) {
+		/* Security: Do not accept long UIDL's */
+		if (strlen(hexdigest) > 24) {
+			break;
+		}
+		if (verbose)
+			log_print(LOG_DEBUG, _("mailspool_load_index: have qpopper uidl (%24s)"), hexdigest);
+        	mailspool_make_indexpoint(&x, offset, length, msglength, NULL);
+        	memcpy(x.hash, hexdigest, 24);
+	} else {
+		break;
+	}

         if (x.offset + x.msglength > m->st.st_size || memcmp(filemem + x.offset, "From ", 5) != 0)
             break;

         if (n > x.msglength) n = x.msglength;

-        /* Compute MD5 */
-        md5_digest(filemem + x.offset, n, realhash);
-
-        /* No match; stop. */
-        if (memcmp(realhash, x.hash, 16) != 0) {
-            /* Get rid of any preceding record: we will have to re-index that
-             * one, too. */
-            if (m->num > 0)
-                --m->num;
-            break;
-        }

         /* OK, this message seems to have been indexed correctly.... */
         mailbox_add_indexpoint(m, &x);


Martin Blapp, <mb@imp.ch> <mbr@FreeBSD.org>
------------------------------------------------------------------
ImproWare AG, UNIXSP & ISP, Zurlindenstrasse 29, 4133 Pratteln, CH
Phone: +41 61 826 93 00 Fax: +41 61 826 93 01
PGP: <finger -l mbr@freebsd.org>
PGP Fingerprint: B434 53FC C87C FE7B 0A18 B84C 8686 EF22 D300 551E
------------------------------------------------------------------