Remove stale lockfiles if created on the same node.

This commit is contained in:
Werner Koch 2006-06-28 15:14:09 +00:00
parent 98c6970ad1
commit 6c4ae71b5d
2 changed files with 348 additions and 257 deletions

View File

@ -1,3 +1,15 @@
2006-06-28 Werner Koch <wk@g10code.com>
* dotlock.c (make_dotlock, release_dotlock, read_lockfile)
(maybe_deadlock, destroy_dotlock, create_dotlock): Re-indented.
(create_dotlock): Repalces some log_fatal by log_error as it was
not intended that they should terminate. Write the nodename to
the locking file. Code cleanups.
(read_lockfile): Reworked to read the node name.
(make_dotlock): Test for identical node name and delete lock stale
file.
(release_dotlock): Likewise.
2006-05-23 Werner Koch <wk@g10code.com>
* libjnlib-config.h (JNLIB_NEED_UTF8CONV): Fixed typo in name.

View File

@ -1,6 +1,6 @@
/* dotlock.c - dotfile locking
* Copyright (C) 1998, 2000, 2001, 2003, 2004,
* 2005 Free Software Foundation, Inc.
* 2005, 2006 Free Software Foundation, Inc.
*
* This file is part of GnuPG.
*
@ -55,32 +55,35 @@
#endif
struct dotlock_handle {
struct dotlock_handle *next;
char *tname; /* name of lockfile template */
char *lockname; /* name of the real lockfile */
int locked; /* lock status */
int disable; /* locking */
struct dotlock_handle
{
struct dotlock_handle *next;
char *tname; /* Name of lockfile template. */
size_t nodename_off; /* Offset in TNAME of the nodename part. */
size_t nodename_len; /* Length of the nodename part. */
char *lockname; /* Name of the real lockfile. */
int locked; /* Lock status. */
int disable; /* When true, locking is disabled. */
};
static volatile DOTLOCK all_lockfiles;
static int never_lock;
static int read_lockfile( const char *name );
static int read_lockfile (DOTLOCK h, int *same_node);
void
disable_dotlock(void)
{
never_lock = 1;
never_lock = 1;
}
/****************
* Create a lockfile with the given name and return an object of
* type DOTLOCK which may be used later to actually do the lock.
* A cleanup routine gets installed to cleanup left over locks
* or other files used together with the lockmechanism.
* Althoug the function is called dotlock, this does not necessarily
* or other files used together with the lock mechanism.
* Although the function is called dotlock, this does not necessarily
* mean that real lockfiles are used - the function may decide to
* use fcntl locking. Calling the function with NULL only install
* the atexit handler and maybe used to assure that the cleanup
@ -94,160 +97,165 @@ disable_dotlock(void)
DOTLOCK
create_dotlock( const char *file_to_lock )
{
static int initialized;
DOTLOCK h;
int fd = -1;
char pidstr[16];
#ifndef HAVE_DOSISH_SYSTEM
struct utsname utsbuf;
#endif
const char *nodename;
const char *dirpart;
int dirpartlen;
if( !initialized ) {
atexit( dotlock_remove_lockfiles );
initialized = 1;
}
if( !file_to_lock )
return NULL;
h = jnlib_xcalloc( 1, sizeof *h );
if( never_lock ) {
h->disable = 1;
#ifdef _REENTRANT
/* fixme: aquire mutex on all_lockfiles */
static int initialized;
DOTLOCK h;
int fd = -1;
char pidstr[16];
const char *nodename;
const char *dirpart;
int dirpartlen;
#ifndef HAVE_DOSISH_SYSTEM
struct utsname utsbuf;
#endif
h->next = all_lockfiles;
all_lockfiles = h;
return h;
if ( !initialized )
{
atexit( dotlock_remove_lockfiles );
initialized = 1;
}
if ( !file_to_lock )
return NULL; /* Only initialization was requested. */
h = jnlib_xcalloc ( 1, sizeof *h );
if( never_lock )
{
h->disable = 1;
#ifdef _REENTRANT
/* fixme: aquire mutex on all_lockfiles */
#endif
h->next = all_lockfiles;
all_lockfiles = h;
return h;
}
#ifndef HAVE_DOSISH_SYSTEM
sprintf( pidstr, "%10d\n", (int)getpid() );
/* fixme: add the hostname to the second line (FQDN or IP addr?) */
/* create a temporary file */
if( uname( &utsbuf ) )
nodename = "unknown";
else
nodename = utsbuf.nodename;
sprintf (pidstr, "%10d\n", (int)getpid() );
/* fixme: add the hostname to the second line (FQDN or IP addr?) */
/* Create a temporary file. */
if ( uname ( &utsbuf ) )
nodename = "unknown";
else
nodename = utsbuf.nodename;
#ifdef __riscos__
{
char *iter = (char *) nodename;
for (; iter[0]; iter++)
if (iter[0] == '.')
iter[0] = '/';
}
{
char *iter = (char *) nodename;
for (; iter[0]; iter++)
if (iter[0] == '.')
iter[0] = '/';
}
#endif /* __riscos__ */
if( !(dirpart = strrchr( file_to_lock, DIRSEP_C )) ) {
dirpart = EXTSEP_S;
dirpartlen = 1;
if ( !(dirpart = strrchr ( file_to_lock, DIRSEP_C )) )
{
dirpart = EXTSEP_S;
dirpartlen = 1;
}
else {
dirpartlen = dirpart - file_to_lock;
dirpart = file_to_lock;
else
{
dirpartlen = dirpart - file_to_lock;
dirpart = file_to_lock;
}
#ifdef _REENTRANT
/* fixme: aquire mutex on all_lockfiles */
#endif
h->next = all_lockfiles;
all_lockfiles = h;
h->next = all_lockfiles;
all_lockfiles = h;
h->tname = jnlib_xmalloc( dirpartlen + 6+30+ strlen(nodename) + 11 );
h->tname = jnlib_xmalloc ( dirpartlen + 6+30+ strlen(nodename) + 11 );
h->nodename_len = strlen (nodename);
#ifndef __riscos__
sprintf( h->tname, "%.*s/.#lk%p.%s.%d",
dirpartlen, dirpart, h, nodename, (int)getpid() );
sprintf (h->tname, "%.*s/.#lk%p.", dirpartlen, dirpart, h );
h->nodename_off = strlen (h->tname);
sprintf (h->tname+h->nodename_off, "%s.%d", nodename, (int)getpid ());
#else /* __riscos__ */
sprintf( h->tname, "%.*s.lk%p/%s/%d",
dirpartlen, dirpart, h, nodename, (int)getpid() );
sprintf (h->tname, "%.*s.lk%p/", dirpartlen, dirpart, h );
h->nodename_off = strlen (h->tname);
sprintf (h->tname+h->nodename_off, "%s/%d", nodename, (int)getpid () );
#endif /* __riscos__ */
do {
errno = 0;
fd = open( h->tname, O_WRONLY|O_CREAT|O_EXCL,
S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR );
} while( fd == -1 && errno == EINTR );
if( fd == -1 ) {
all_lockfiles = h->next;
log_error( "failed to create temporary file `%s': %s\n",
h->tname, strerror(errno));
jnlib_free(h->tname);
jnlib_free(h);
return NULL;
}
if( write(fd, pidstr, 11 ) != 11 ) {
all_lockfiles = h->next;
#ifdef _REENTRANT
/* release mutex */
#endif
log_fatal( "error writing to `%s': %s\n", h->tname, strerror(errno) );
close(fd);
unlink(h->tname);
jnlib_free(h->tname);
jnlib_free(h);
return NULL;
}
if( close(fd) ) {
all_lockfiles = h->next;
#ifdef _REENTRANT
/* release mutex */
#endif
log_fatal( "error writing to `%s': %s\n", h->tname, strerror(errno) );
close(fd);
unlink(h->tname);
jnlib_free(h->tname);
jnlib_free(h);
return NULL;
do
{
errno = 0;
fd = open (h->tname, O_WRONLY|O_CREAT|O_EXCL,
S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR );
}
while (fd == -1 && errno == EINTR);
if ( fd == -1 )
{
all_lockfiles = h->next;
log_error ( "failed to create temporary file `%s': %s\n",
h->tname, strerror(errno));
jnlib_free(h->tname);
jnlib_free(h);
return NULL;
}
if ( write (fd, pidstr, 11 ) != 11 )
goto write_failed;
if ( write (fd, nodename, strlen (nodename) ) != strlen (nodename) )
goto write_failed;
if ( write (fd, "\n", 1 ) != 1 )
goto write_failed;
if ( close (fd) )
goto write_failed;
# ifdef _REENTRANT
/* release mutex */
/* release mutex */
# endif
#endif /* !HAVE_DOSISH_SYSTEM */
h->lockname = jnlib_xmalloc( strlen(file_to_lock) + 6 );
strcpy(stpcpy(h->lockname, file_to_lock), EXTSEP_S "lock");
return h;
h->lockname = jnlib_xmalloc ( strlen (file_to_lock) + 6 );
strcpy (stpcpy(h->lockname, file_to_lock), EXTSEP_S "lock");
return h;
write_failed:
all_lockfiles = h->next;
# ifdef _REENTRANT
/* fixme: release mutex */
# endif
log_error ( "error writing to `%s': %s\n", h->tname, strerror(errno) );
close(fd);
unlink(h->tname);
jnlib_free(h->tname);
jnlib_free(h);
return NULL;
}
void
destroy_dotlock ( DOTLOCK h )
{
#if !defined (HAVE_DOSISH_SYSTEM)
if ( h )
{
DOTLOCK hprev, htmp;
/* First remove the handle from our global list of all locks. */
for (hprev=NULL, htmp=all_lockfiles; htmp; hprev=htmp, htmp=htmp->next)
if (htmp == h)
{
if (hprev)
hprev->next = htmp->next;
else
all_lockfiles = htmp->next;
h->next = NULL;
break;
}
/* Second destroy the lock. */
if (!h->disable)
#ifndef HAVE_DOSISH_SYSTEM
if ( h )
{
DOTLOCK hprev, htmp;
/* First remove the handle from our global list of all locks. */
for (hprev=NULL, htmp=all_lockfiles; htmp; hprev=htmp, htmp=htmp->next)
if (htmp == h)
{
if (h->locked && h->lockname)
unlink (h->lockname);
if (h->tname)
unlink (h->tname);
jnlib_free (h->tname);
jnlib_free (h->lockname);
if (hprev)
hprev->next = htmp->next;
else
all_lockfiles = htmp->next;
h->next = NULL;
break;
}
jnlib_free(h);
}
#endif
/* Second destroy the lock. */
if (!h->disable)
{
if (h->locked && h->lockname)
unlink (h->lockname);
if (h->tname)
unlink (h->tname);
jnlib_free (h->tname);
jnlib_free (h->lockname);
}
jnlib_free(h);
}
#endif /*!HAVE_DOSISH_SYSTEM*/
}
@ -255,107 +263,120 @@ destroy_dotlock ( DOTLOCK h )
static int
maybe_deadlock( DOTLOCK h )
{
DOTLOCK r;
DOTLOCK r;
for( r=all_lockfiles; r; r = r->next ) {
if( r != h && r->locked )
return 1;
for ( r=all_lockfiles; r; r = r->next )
{
if ( r != h && r->locked )
return 1;
}
return 0;
return 0;
}
/****************
* Do a lock on H. A TIMEOUT of 0 returns immediately,
* -1 waits forever (hopefully not), other
* values are timeouts in milliseconds.
* Returns: 0 on success
* Do a lock on H. A TIMEOUT of 0 returns immediately, -1 waits
* forever (hopefully not), other values are reserved (should then be
* timeouts in milliseconds). Returns: 0 on success
*/
int
make_dotlock( DOTLOCK h, long timeout )
{
#ifdef HAVE_DOSISH_SYSTEM
return 0;
return 0;
#else
int pid;
const char *maybe_dead="";
int backoff=0;
int pid;
const char *maybe_dead="";
int backoff=0;
int same_node;
if( h->disable ) {
return 0;
}
if ( h->disable )
return 0; /* Locks are completely disabled. Return success. */
if( h->locked ) {
if ( h->locked )
{
#ifndef __riscos__
log_debug("oops, `%s' is already locked\n", h->lockname );
log_debug("oops, `%s' is already locked\n", h->lockname );
#endif /* !__riscos__ */
return 0;
return 0;
}
for(;;) {
for(;;)
{
#ifndef __riscos__
if( !link(h->tname, h->lockname) ) {
/* fixme: better use stat to check the link count */
h->locked = 1;
return 0; /* okay */
if ( !link(h->tname, h->lockname) )
{
/* fixme: better use stat to check the link count */
h->locked = 1;
return 0; /* okay */
}
if( errno != EEXIST ) {
log_error( "lock not made: link() failed: %s\n", strerror(errno) );
return -1;
if ( errno != EEXIST )
{
log_error( "lock not made: link() failed: %s\n", strerror(errno) );
return -1;
}
#else /* __riscos__ */
if( !renamefile(h->tname, h->lockname) ) {
h->locked = 1;
return 0; /* okay */
if ( !renamefile(h->tname, h->lockname) )
{
h->locked = 1;
return 0; /* okay */
}
if( errno != EEXIST ) {
log_error( "lock not made: rename() failed: %s\n", strerror(errno) );
return -1;
if ( errno != EEXIST )
{
log_error( "lock not made: rename() failed: %s\n", strerror(errno) );
return -1;
}
#endif /* __riscos__ */
if( (pid = read_lockfile(h->lockname)) == -1 ) {
if( errno != ENOENT ) {
log_info("cannot read lockfile\n");
return -1;
if ( (pid = read_lockfile (h, &same_node)) == -1 )
{
if ( errno != ENOENT )
{
log_info ("cannot read lockfile\n");
return -1;
}
log_info( "lockfile disappeared\n");
continue;
log_info( "lockfile disappeared\n");
continue;
}
else if( pid == getpid() ) {
log_info( "Oops: lock already held by us\n");
h->locked = 1;
return 0; /* okay */
else if ( pid == getpid() && same_node )
{
log_info( "Oops: lock already held by us\n");
h->locked = 1;
return 0; /* okay */
}
else if( kill(pid, 0) && errno == ESRCH ) {
else if ( same_node && kill (pid, 0) && errno == ESRCH )
{
#ifndef __riscos__
maybe_dead = " - probably dead";
#if 0 /* we should not do this without checking the permissions */
/* and the hostname */
log_info( "removing stale lockfile (created by %d)", pid );
#endif
log_info ("removing stale lockfile (created by %d)", pid );
unlink (h->lockname);
continue;
#else /* __riscos__ */
/* we are *pretty* sure that the other task is dead and therefore
we remove the other lock file */
maybe_dead = " - probably dead - removing lock";
unlink(h->lockname);
/* Under RISCOS we are *pretty* sure that the other task
is dead and therefore we remove the stale lock file. */
maybe_dead = " - probably dead - removing lock";
unlink(h->lockname);
#endif /* __riscos__ */
}
if( timeout == -1 ) {
struct timeval tv;
log_info( "waiting for lock (held by %d%s) %s...\n",
pid, maybe_dead, maybe_deadlock(h)? "(deadlock?) ":"");
if ( timeout == -1 )
{
/* Wait until lock has been released. */
struct timeval tv;
log_info ("waiting for lock (held by %d%s) %s...\n",
pid, maybe_dead, maybe_deadlock(h)? "(deadlock?) ":"");
/* can't use sleep, cause signals may be blocked */
tv.tv_sec = 1 + backoff;
tv.tv_usec = 0;
select(0, NULL, NULL, NULL, &tv);
if( backoff < 10 )
backoff++ ;
/* We can't use sleep, cause signals may be blocked. */
tv.tv_sec = 1 + backoff;
tv.tv_usec = 0;
select(0, NULL, NULL, NULL, &tv);
if ( backoff < 10 )
backoff++ ;
}
else
return -1;
else
return -1;
}
/*not reached */
/*NOTREACHED*/
#endif /* !HAVE_DOSISH_SYSTEM */
}
@ -368,92 +389,150 @@ int
release_dotlock( DOTLOCK h )
{
#ifdef HAVE_DOSISH_SYSTEM
return 0;
return 0;
#else
int pid;
int pid, same_node;
/* To avoid atexit race conditions we first check whether there
are any locks left. It might happen that another atexit
handler tries to release the lock while the atexit handler of
this module already ran and thus H is undefined. */
if(!all_lockfiles)
return 0;
/* To avoid atexit race conditions we first check whether there are
any locks left. It might happen that another atexit handler
tries to release the lock while the atexit handler of this module
already ran and thus H is undefined. */
if (!all_lockfiles)
return 0;
if( h->disable ) {
return 0;
if ( h->disable )
return 0;
if ( !h->locked )
{
log_debug("oops, `%s' is not locked\n", h->lockname );
return 0;
}
if( !h->locked ) {
log_debug("oops, `%s' is not locked\n", h->lockname );
return 0;
pid = read_lockfile (h, &same_node);
if ( pid == -1 )
{
log_error( "release_dotlock: lockfile error\n");
return -1;
}
pid = read_lockfile( h->lockname );
if( pid == -1 ) {
log_error( "release_dotlock: lockfile error\n");
return -1;
}
if( pid != getpid() ) {
log_error( "release_dotlock: not our lock (pid=%d)\n", pid);
return -1;
if ( pid != getpid() || !same_node )
{
log_error( "release_dotlock: not our lock (pid=%d)\n", pid);
return -1;
}
#ifndef __riscos__
if( unlink( h->lockname ) ) {
log_error( "release_dotlock: error removing lockfile `%s'",
h->lockname);
return -1;
if ( unlink( h->lockname ) )
{
log_error( "release_dotlock: error removing lockfile `%s'",
h->lockname);
return -1;
}
#else /* __riscos__ */
if( renamefile(h->lockname, h->tname) ) {
log_error( "release_dotlock: error renaming lockfile `%s' to `%s'",
h->lockname, h->tname);
return -1;
if ( renamefile(h->lockname, h->tname) )
{
log_error( "release_dotlock: error renaming lockfile `%s' to `%s'",
h->lockname, h->tname);
return -1;
}
#endif /* __riscos__ */
/* fixme: check that the link count is now 1 */
h->locked = 0;
return 0;
/* fixme: check that the link count is now 1 */
h->locked = 0;
return 0;
#endif /* !HAVE_DOSISH_SYSTEM */
}
/****************
* Read the lock file and return the pid, returns -1 on error.
/*
Read the lock file and return the pid, returns -1 on error. True
will be stored at SAME_NODE if the lock file has been created on
the same node.
*/
static int
read_lockfile( const char *name )
read_lockfile (DOTLOCK h, int *same_node )
{
#ifdef HAVE_DOSISH_SYSTEM
return 0;
return 0;
#else
int fd, pid;
char pidstr[16];
char buffer_space[10+1+70+1]; /* 70 is just an estimated value; node
name are usually shorter. */
int fd, pid;
char *buffer, *p;
size_t expected_len;
int res, nread;
*same_node = 0;
expected_len = 10 + 1 + h->nodename_len + 1;
if ( expected_len >= sizeof buffer_space)
buffer = jnlib_xmalloc (expected_len);
else
buffer = buffer_space;
if( (fd = open(name, O_RDONLY)) == -1 ) {
int e = errno;
log_debug("error opening lockfile `%s': %s\n", name, strerror(errno) );
errno = e;
return -1;
if ( (fd = open (h->lockname, O_RDONLY)) == -1 )
{
int e = errno;
log_info ("error opening lockfile `%s': %s\n",
h->lockname, strerror(errno) );
if (buffer != buffer_space)
jnlib_free (buffer);
errno = e; /* Need to return ERRNO here. */
return -1;
}
if( read(fd, pidstr, 10 ) != 10 ) { /* Read 10 digits w/o newline */
log_debug("error reading lockfile `%s'", name );
close(fd);
errno = 0;
return -1;
p = buffer;
nread = 0;
do
{
res = read (fd, p, expected_len - nread);
if (res == -1 && errno == EINTR)
continue;
if (res < 0)
{
log_info ("error reading lockfile `%s'", h->lockname );
close (fd);
if (buffer != buffer_space)
jnlib_free (buffer);
errno = 0; /* Do not return an inappropriate ERRNO. */
return -1;
}
p += res;
nread += res;
}
pidstr[10] = 0; /* terminate pid string */
close(fd);
pid = atoi(pidstr);
while (res && nread != expected_len);
close(fd);
if (nread < 11)
{
log_info ("invalid size of lockfile `%s'", h->lockname );
if (buffer != buffer_space)
jnlib_free (buffer);
errno = 0; /* Do not return an inappropriate ERRNO. */
return -1;
}
if (buffer[10] != '\n'
|| (buffer[10] = 0, pid = atoi (buffer)) == -1
#ifndef __riscos__
if( !pid || pid == -1 ) {
|| !pid
#else /* __riscos__ */
if( (!pid && riscos_getpid()) || pid == -1 ) {
|| (!pid && riscos_getpid())
#endif /* __riscos__ */
log_error("invalid pid %d in lockfile `%s'", pid, name );
errno = 0;
return -1;
)
{
log_error ("invalid pid %d in lockfile `%s'", pid, h->lockname );
if (buffer != buffer_space)
jnlib_free (buffer);
errno = 0;
return -1;
}
return pid;
if (nread == expected_len
&& !memcmp (h->tname+h->nodename_off, buffer+11, h->nodename_len)
&& buffer[11+h->nodename_len] == '\n')
*same_node = 1;
if (buffer != buffer_space)
jnlib_free (buffer);
return pid;
#endif
}