[tpop3d-discuss]Early proof of concept for Pgsql mailbox driver

Chris Travers chris at travelamericas.com
Sat, 13 Mar 2004 13:56:55 -0800


Disclaimer:  I am not even sure if this will compile yet.  It is merely=20
offered for feedback purposes.  My C skills are still under active=20
development, as well, and it may be some time before this is=20
production-ready.  That and my time is currently limited.

This also assumes that you move a few of the auth_pgsql utility functions=
 into=20
another module called util_pgsql.

Should be pretty straight-forward to follow.  If anyone has any feedback,=
 I=20
would love to hear it.

/*
 *   mailpgsql.c"
 *   Mailboxes from a PostgreSQL
 *
 *   Copyright (c) 2004 Chris Travers
 *   All rights reserved except as under the GNU General Public License (=
GPL)
 *
 *   chris@travelamericas.com
 *
 *  Notes:  Currently, rather than implement the complex interface found =
in=20
 *  auth_pgsql.c, I have opted to use a more flexible, if obtuse, structu=
re=20
and
 *  require that the connection string be specified in the tpop3d.conf fi=
le.
 *  The auth_pgsql interface may be supported at a later time.  Also, I w=
ill
 *  likely patch auth_pgsql to allow it to use the pg_cnx_string configur=
ation
 *  parameter as well, but this has not happened yet.
 * =20
 */

#ifdef HAVE_CONFIG_H
#include "configuration.h"
#endif /* HAVE_CONFIG_H */

#ifdef MBOX_PGSQL
#include <stdio.h>
#include <errno.h>
#include <syslog.h>
#include <unistd.h>
#include <utime.h>
#include <time.h>
#include <stdlib.h>

#include "mailbox.h"
#include "connection.h"
#include "config.h"
#include "util.h"
#include "vector.h"
#include "libpq-fe.h"

/* Internal Status Codes:
 *
 * Note that all internal functions returning an int and wrapping databas=
e ops
 * return one of these unless they are returning data.
 */

#define MB_OK=090
#define MBERR_BAD_CNX=091
#define MBERR_DB_OP_FAILED=092
#define MBERR_NO_MBOX=094


/* Definitions for Postgresql data type lengths in text */
#define PG_LEN_BIGINT=0918

/* Setting up the query templates for a dedicated schema.  Should be easy=
 to=20
 * implement using views for any database with sufficient information.
 */

char *mailpgsql_mailbox_query_template =3D=20
=09"SELECT message_id, message_header, message_body, "
=09"char_length(message_header || '\n' || message_body) as message_length=
 "
=09"FROM tpop3d.messages "
=09"WHERE deliver_to =3D $1";

char *mailpgsql_mailbox_create_temp =3D=20
=09"CREATE TEMPORARY TABLE tpop3_maildrop "
=09"(message_id BIGINT, "
=09"message_headers TEXT, "
=09"message_body TEXT, "
=09"message_length INT) "
=09"ON COMMIT DROP ";

/* pgsql_dbop_test PGRESULT
 *=20
 * tests to see if the operation was successful.  Returns MB_OK (0) or
 * MBERR_DB_OP_FAILED */
function pgsql_dbop_test (PGresult *test_result){

=09int retval;

=09if ((PGRES_TUPLES_OK =3D=3D PQresultStatus(test_result)) ||=20
=09=09=09(PGRES_COMMAND_OK =3D=3D PQresultStatus(test_result))){
=09=09retval =3D 0;
=09} else {
=09=09retval =3D MBERR_DB_OP_FAILED;
=09}

=09return retval;
}

/* pgsql_dbop_test_only PGRESULT
 *
 * Tests to see if the operation committed successfully.  If so, it frees=
 the
 * result and returns 0.  This is needed because of the large number of=20
 * database operations which do not return anything. If the op was not
 * successful, it still frees the result but returns MBERR_DB_OP_FAILED. =
*/

int pgsql_dbop_test_only (PGresult *test_result){
=09int retval;
=09retval =3D pgsql_dbop_test(test_result);
=09PQClear(test_result);

=09return retval;
}
=09
/* mailpgsql_lock PGCONNECTION
 *
 * Locks are implemented in mailpgsql using temporary tables.  This allow=
s for
 * exclusive, nonblocking access to the maildrop.  In other words, exclus=
ive
 * mailboxes are materialized on demand and do not block other attempts t=
o
 * access the same content.  This is used to simulate REPEATABLE READ=20
 * transaction isolation level (not yet implemented in PostgreSQL as of 7=
=2E4).
 *
 * Still trying to decide if this is the right way to go. */

static int mailpgsql_lock(mailbox *M){
=09char *param_values[1];
=09PGresult *pg_result;
=09int retval =3D MB_OK;

=09if (CONNECTION_BAD =3D=3D PQstatus(M->pg_cnx)){
=09=09return MBERR_BAD_CNX;
=09}

=09param_values[0] =3D M->name;

=09if (retval =3D pgsql_dbop_test_only(PGExec(M->pg_cnx, "BEGIN"i))){
=09=09goto end_func;
=09}
=09
=09if (retval =3D pgsql_dbop_test_only(
=09=09=09PGExec(M->pg_cnx, mailpgsql_mailbox_create_temp)
=09)){
=09=09goto end_func;
=09}
=09if (retval =3D pgsql_dbop_test_only(PGExecParams(M->pg_cnx,=20
=09=09sprintf("INSERT INTO tpop3_maildrop %s", =20
=09=09=09mailpgsql_mailbox_query_template),
=09=091,
=09=09NULL,
=09=09param_values,
=09=09NULL,
=09=09NULL,
=09=091
=09))){
=09=09goto end_func;
=09}

end_func:
=09return retval;
}


/* The following 2 functions are different ways of undoing a lock.  At th=
e
 * moment, only mailpgsql_commit will likely be used, but there may be ca=
ses
 * when an error occurs and if so, we should rollback the transaction.
 */

/* mailpgsql_commit PGCONNECTION
 * Ends a transaction block by issuing a COMMIT statement. */

static int mailpgsql_commit(PGconn *cnx){
=09return pgsql_dbop_test_only(PGexec(cnx, "COMMIT"));
}

/* mailpgsql_rollback PGCONNECTION
 * Ends a transaction block by issuing a ROLLBACK statement. */

static int mailpgsql_rollback(PGconn *cnx){
=09return pgsql_dbop_test_only(PGexec(cnx, "ROLLBACK"));
}

/* mailpgsql_make_indexpoint
 * makes in index point for a mailpgsql mailbox. */

static void mailpgsql_make_indexpoint(
=09=09struct indexpoint *indexp,
=09=09const char *messgid,
=09=09off_t size,
=09=09time_t mtime){
=09memset(indexp, 0, sizeof(struct indexpoint));

=09indexp->filename =3D strdup(messgid);
=09if (!m->filename){
=09=09return;
=09}
=09indexp->offset =3D 0;
=09indexp->length =3D 0;
=09indexp->deleted =3D 0;
=09indexp->msgsize =3D size;
=09indexp->mtime =3D mtime;

=09md5_digest(messgid, strcspn(messgid, ":"), indexp->hash);=20
}


/* mailpgsql_build_index MAILBOX, TIME
 *
 * Builds the index for the mailgpsql mailbox */

static int mailpgsql_build_index (mailbox M, time_t mtime){
=09int retval =3D MB_OK;
=09unsigned long mr_length;
=09char *messgid;
=09PGresult *mb_row;

=09messgid =3D xmalloc(PG_LEN_BIGINT + 2); /* TO BE SAFE */
=09if (!M){
=09=09log_print(LOG_ERR, "mailpgsql build index failed: No MBOX: %m");
=09=09return MBERR_NO_MBOX;
=09}
=09retval =3D pgsql_dbop_test_only(PGExec(
=09=09"DECLARE mb_index CURSOR FOR SELECT * FROM tpop3_maildrop"
=09));
=09if (retval !=3D MB_OK){
=09=09return retval;
=09}
=09while(1){
=09=09mb_row =3D PGexec("FETCH mb_index");
=09=09if (0 =3D=3D PQntuples(mb_row)){
=09=09=09break;
=09=09}
=09=09mr_length =3D atoi(PQgetvalue(mbrow, 0, 3));

=09=09sprintf(messgid, "%s", PQgetvalue(mb_row, 0, 0));
=09=09struct indexpoint ipt;
=09=09mailpgsql_make_indexpoint(&ipt, mr_length, NULL);
=09=09mailbox_add_indexpoint(M, &ipt);
=09=09M->totalsize +=3D mr_length;=20
=09}
=09xfree messgid;
}

/* mailpgsql_new EMAIL_ADDRESS=20
 * Constructor for mailbox object.
 *
 * For mailpgsql, the mailboxes actually consist of temporary tables whic=
h=20
 * are cleared when the database transaction is closed.  This is how=20
exclusive,
 * non-blocking locks are achieved.  However, deleting the emails is done=
=20
 * against the master relation.  This allows duplicate deletions to occur=
=20
 * without error. */
mailbox mailpgsql_new_from_addr(const char *email_addr){
=09mailbox M, FailM =3D NULL;
=09char *s, cnx_string =3D NULL;

=09if (s =3D config_get_string("pg-cnx-string")){
=09=09cnx_string =3D s;
=09}=20
=09
=09alloc_struct(_mailbox, M);

=09M->pg_cnx =3D PQconnectdb(cnx_string);
=09M->delete =3D mailpgsql_delete;
=09M->apply_changes =3D mailpgsql_apply_changes;
=09M->sendmessage =3D mailpgsql_sendmessage;

=09/* Allocate space for the index. */
=09M->index =3D (struct indexpoint*)xcalloc(32, sizeof(struct indexpoint)=
);
=09M->size =3D 32;

=09if (mailpgsql_lock(M) !=3D MB_OK){ // Try to get non-blocking exclusiv=
e
=09=09=09=09=09// lock.
=09=09goto fail;
=09}
=09return M;

fail:
=09if (M){
=09=09mailpgsql_rollback(M->pg_cnx);
=09=09if (M->name) xfree(M->name);
=09=09if (M->index) xfree(M->index);
=09=09xfree(M);
=09}
=09return failM;
}

/* mailpgsql_delete MAILBOX
 * Inserts runs a transaction rollback into the current transaction in or=
der=20
to=20
 * terminate any current transaction code.  This is will result in the=20
 * database-side mailbox being dropped, and then it can safely deallocate=
 the
 * client-side object.  Note that this means that it is the responsibilit=
y of
 * the commit_changes function to commit the transaction and begin a new =
dummy
 * transaction. */
void mailpgsql_delete (mailbox M){
=09mailpgsql_rollback(M->pg_cnx);
=09mailbox_delete(M);
}

/* mailpgsql_sendmessage MAILBOX CONNECTION MSGNUM LINES
 * writes an +OK or -ERR message followed by the headers and a specified=20
number
 * of LINES of the body.
 *
 * This is a complex problem, as we do NOT assume that the messages use t=
he=20
 * proper \r\n EOL handling (PostgreSQL uses \n only).  We assume that \n=
 may
 * be used by itself, and we discount the possibility that the EOL could =
be=20
\r.
 * We also properly escape the beginning . in any lines.  Finally, we att=
empt
 * to handle all this inside the query using regular expressions and repl=
ace()
 * calls, which results in a particularly complex SQL query. Returns 0 or=
 -1.=20
*/
int mailpgsql_sendmessage(mailbox M, connection c,=20
=09=09=09const int msgnum, const int lines){
=09struct indexpoint ipoint;
=09int status;

=09char *getmsgquery;
=09char *repeat_clause;
=09char *msg_text;
=09PGresult *query_result;

=09/* Template for query handles EOL errors (except for EOL being "\r" as
 =09 * MacOS) and escaping lines which begin with . as per rfc1939.
=09 * Finally, we leave room for the ability to limit the resulting msg
=09 * body length. */
=09char *getmsg_query_template =3D=20
=09=09"SELECT replace(replace(replace(message_header, "
=09=09=09=09=09"'\r\n', '\n'), "
=09=09=09=09"'\n.', '\n..'), "
=09=09=09"'\n', '\r\n') || \r\n || "
=09=09=09"replace(replace(%sreplace(message_body, "
=09=09=09=09=09 =09"'\r\n', '\n') "
=09=09=09=09=09"%s, "
=09=09=09=09"'\n.', '\n..'), "
=09=09=09"'\n', '\r\n') || %s AS message "
=09=09"FROM tpop3_maildrop LIMIT 1 OFFSET %d";
=09/* fill in the offset */
=09sprintf (getmsg_query_template, getmsg_query_template, msgnum - 1);

=09/* Try first limiting the lines returned. */
=09sprintf(repeat_clause, "FROM repeat('[^\n]*\n', %d))", lines);
=09sprintf(getmsgquery, getmsg_query_template,=20
=09=09=09"substring(", repeat_clause, ".");

=09query_result =3D PGexec(getmsgquery);

=09msg_text =3D PQgetvalue(query_result, 0, 0);

=09if(msg_text !=3D NULL || *msg_text !=3D ""){
=09=09/* We didn't get anything.  Try without the line limit */
=09=09sprintf(getmsgquery, getmsg_query_template, "", "", "\r\n.");
=09=09query_result =3D PGexec(getmsgquery);
=09=09msg_text =3D PQgetvalue(query_result, 0, 0);
=09}
=09/* If we still don't have a message, we have a problem */
=09if(msg_text !=3D NULL || *msg_text !=3D ""){
=09=09connection_sendresponse(c, 0,=20
=09=09=09"Sorry, the operation failed. Please try again later.");
=09=09return -1;
=09}

=09/* Now we just need to send the message to the client. */
=09connection_sendline(c, msg_text);
=09return 0;
}
=09

#endif /* MBOX_PGSQL */=20