From 7a634e48b13c5d5d295b8fed9b429e1b2109a333 Mon Sep 17 00:00:00 2001 From: "Neal H. Walfield" Date: Sun, 30 Oct 2016 19:02:36 -0700 Subject: [PATCH] g10: Avoid gratuitious SQLite aborts and starving writers. * g10/tofu.c: Include , , and . (tofu_dbs_s): Add fields want_lock_file and want_lock_file_ctime. (begin_transaction): Only yield if DBS->WANT_LOCK_FILE_CTIME has changed since we took the lock. Don't use gpgrt_yield to yield, but sleep for 100ms. After taking the batch lock, update DBS->WANT_LOCK_FILE_CTIME. Also take the batch lock the first time we take the real lock. When taking the real lock, use immediate not deferred mode to avoid gratuitious aborts. (end_transaction): When dropping the outermost real lock, drop the batch lock. (busy_handler): New function. (opendbs): Set the busy handler to it when opening the DB. Initialize CTRL->TOFU.DBS->WANT_LOCK_FILE. (tofu_closedbs): Free DBS->WANT_LOCK_FILE. -- Signed-off-by: Neal H. Walfield By default, SQLite defers transactions until they are actually needed. A consequence of this is that if we have two readers and both decide to do a write, then one has to abort. To avoid this problem, we can make the outermost transaction an immediate transaction. This has the disadvantage that we only allow a single reader at a time, but at least we don't have gratuitous aborts anymore. A second problem is that SQLite apparently doesn't actually create a queue of waiters. The result is that doing a sched_yield between dropping and retaking the batch transaction is not enough to allow the other process to make progress. Instead, we need to wait a while (emperically: 100ms seems reasonable). To avoid waiting when there is no contention, we use a new file's timestamp to signal that there is a waiter. --- g10/tofu.c | 96 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 15 deletions(-) diff --git a/g10/tofu.c b/g10/tofu.c index a0f80b3d4..8280321d7 100644 --- a/g10/tofu.c +++ b/g10/tofu.c @@ -28,6 +28,10 @@ #include #include #include +#include +#include +#include +#include #include "gpg.h" #include "types.h" @@ -65,6 +69,8 @@ struct tofu_dbs_s { sqlite3 *db; + char *want_lock_file; + time_t want_lock_file_ctime; struct { @@ -185,21 +191,39 @@ begin_transaction (ctrl_t ctrl, int only_batch) /* And some time has gone by since it was started. */ && dbs->batch_update_started != gnupg_get_time ()) { + struct stat statbuf; + struct timespec ts; + /* If we are in a batch update, then batch updates better have been enabled. */ log_assert (ctrl->tofu.batch_updated_wanted); - end_transaction (ctrl, 2); + /* Check if another process wants to run. (We just ignore any + * stat failure. A waiter might have to wait a bit longer, but + * otherwise there should be no impact.) */ + if (stat (dbs->want_lock_file, &statbuf) == 0 + && statbuf.st_ctime != dbs->want_lock_file_ctime) + { + end_transaction (ctrl, 2); - /* Yield to allow another process a chance to run. */ - gpgrt_yield (); + /* Yield to allow another process a chance to run. Note: + * testing suggests that anything less than a 100ms tends to + * not result in the other process getting the lock. */ + memset (&ts, 0, sizeof (ts)); + ts.tv_nsec = 100 * 1000 * 1000; + nanosleep (&ts, &ts); + } + else + dbs->batch_update_started = gnupg_get_time (); } - if (/* Batch mode is enabled. */ - ctrl->tofu.batch_updated_wanted - /* But we don't have an open batch transaction. */ - && !dbs->in_batch_transaction) + if (/* We don't have an open batch transaction. */ + !dbs->in_batch_transaction + && (/* Batch mode is enabled or we are starting a new transaction. */ + ctrl->tofu.batch_updated_wanted || dbs->in_transaction == 0)) { + struct stat statbuf; + /* We are in batch mode, but we don't have an open batch * transaction. Since the batch save point must be the outer * save point, it must be taken before the inner save point. */ @@ -207,7 +231,7 @@ begin_transaction (ctrl_t ctrl, int only_batch) rc = gpgsql_stepx (dbs->db, &dbs->s.savepoint_batch, NULL, NULL, &err, - "savepoint batch;", GPGSQL_ARG_END); + "begin immediate transaction;", GPGSQL_ARG_END); if (rc) { log_error (_("error beginning transaction on TOFU database: %s\n"), @@ -218,12 +242,15 @@ begin_transaction (ctrl_t ctrl, int only_batch) dbs->in_batch_transaction = 1; dbs->batch_update_started = gnupg_get_time (); + + if (stat (dbs->want_lock_file, &statbuf) == 0) + dbs->want_lock_file_ctime = statbuf.st_ctime; } if (only_batch) return 0; - log_assert(dbs->in_transaction >= 0); + log_assert (dbs->in_transaction >= 0); dbs->in_transaction ++; rc = gpgsql_exec_printf (dbs->db, NULL, NULL, &err, @@ -252,14 +279,15 @@ end_transaction (ctrl_t ctrl, int only_batch) int rc; char *err = NULL; - if (only_batch) + if (only_batch || (! only_batch && dbs->in_transaction == 1)) { if (!dbs) return 0; /* Shortcut to allow for easier cleanup code. */ /* If we are releasing the batch transaction, then we better not be in a normal transaction. */ - log_assert (dbs->in_transaction == 0); + if (only_batch) + log_assert (dbs->in_transaction == 0); if (/* Batch mode disabled? */ (!ctrl->tofu.batch_updated_wanted || only_batch == 2) @@ -269,10 +297,11 @@ end_transaction (ctrl_t ctrl, int only_batch) /* The batch transaction is still in open, but we've left * batch mode. */ dbs->in_batch_transaction = 0; + dbs->in_transaction = 0; rc = gpgsql_stepx (dbs->db, &dbs->s.savepoint_batch_commit, NULL, NULL, &err, - "release batch;", GPGSQL_ARG_END); + "commit transaction;", GPGSQL_ARG_END); if (rc) { log_error (_("error committing transaction on TOFU database: %s\n"), @@ -284,7 +313,8 @@ end_transaction (ctrl_t ctrl, int only_batch) return 0; } - return 0; + if (only_batch) + return 0; } log_assert (dbs); @@ -689,6 +719,36 @@ initdb (sqlite3 *db) } } +static int +busy_handler (void *cookie, int call_count) +{ + ctrl_t ctrl = cookie; + tofu_dbs_t dbs = ctrl->tofu.dbs; + + (void) call_count; + + /* Update the lock file time stamp so that the current owner knows + that we want the lock. */ + if (dbs) + { + /* Note: we don't fail if we can't create the lock file: this + process will have to wait a bit longer, but otherwise nothing + horrible should happen. */ + + int fd = open (dbs->want_lock_file, O_CREAT); + if (fd == -1) + log_debug ("TOFU: Error opening '%s': %s\n", + dbs->want_lock_file, strerror (errno)); + else + { + utime (dbs->want_lock_file, NULL); + close (fd); + } + } + + /* Call again. */ + return 1; +} /* Create a new DB handle. Returns NULL on error. */ /* FIXME: Change to return an error code for better reporting by the @@ -713,12 +773,14 @@ opendbs (ctrl_t ctrl) sqlite3_close (db); db = NULL; } - xfree (filename); /* If a DB is locked wait up to 5 seconds for the lock to be cleared before failing. */ if (db) - sqlite3_busy_timeout (db, 5 * 1000); + { + sqlite3_busy_timeout (db, 5 * 1000); + sqlite3_busy_handler (db, busy_handler, ctrl); + } if (db && initdb (db)) { @@ -730,7 +792,10 @@ opendbs (ctrl_t ctrl) { ctrl->tofu.dbs = xmalloc_clear (sizeof *ctrl->tofu.dbs); ctrl->tofu.dbs->db = db; + ctrl->tofu.dbs->want_lock_file = xasprintf ("%s-want-lock", filename); } + + xfree (filename); } else log_assert (ctrl->tofu.dbs->db); @@ -761,6 +826,7 @@ tofu_closedbs (ctrl_t ctrl) sqlite3_finalize (*statements); sqlite3_close (dbs->db); + xfree (dbs->want_lock_file); xfree (dbs); ctrl->tofu.dbs = NULL; }