/* stringhelp.c - standard string helper functions * Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, * 2006, 2007 Free Software Foundation, Inc. * * This file is part of JNLIB. * * JNLIB is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 3 of * the License, or (at your option) any later version. * * JNLIB is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . */ #include #include #include #include #include #ifdef HAVE_W32_SYSTEM #include #endif #include "libjnlib-config.h" #include "utf8conv.h" #include "stringhelp.h" #define tohex_lower(n) ((n) < 10 ? ((n) + '0') : (((n) - 10) + 'a')) /* * Look for the substring SUB in buffer and return a pointer to that * substring in BUFFER or NULL if not found. * Comparison is case-insensitive. */ const char * memistr (const void *buffer, size_t buflen, const char *sub) { const unsigned char *buf = buffer; const unsigned char *t = (const unsigned char *)buffer; const unsigned char *s = (const unsigned char *)sub; size_t n = buflen; for ( ; n ; t++, n-- ) { if ( toupper (*t) == toupper (*s) ) { for ( buf=t++, buflen = n--, s++; n && toupper (*t) == toupper (*s); t++, s++, n-- ) ; if (!*s) return (const char*)buf; t = buf; s = (const unsigned char *)sub ; n = buflen; } } return NULL; } const char * ascii_memistr ( const void *buffer, size_t buflen, const char *sub ) { const unsigned char *buf = buffer; const unsigned char *t = (const unsigned char *)buf; const unsigned char *s = (const unsigned char *)sub; size_t n = buflen; for ( ; n ; t++, n-- ) { if (ascii_toupper (*t) == ascii_toupper (*s) ) { for ( buf=t++, buflen = n--, s++; n && ascii_toupper (*t) == ascii_toupper (*s); t++, s++, n-- ) ; if (!*s) return (const char*)buf; t = (const unsigned char *)buf; s = (const unsigned char *)sub ; n = buflen; } } return NULL; } /* This function is similar to strncpy(). However it won't copy more than N - 1 characters and makes sure that a '\0' is appended. With N given as 0, nothing will happen. With DEST given as NULL, memory will be allocated using jnlib_xmalloc (i.e. if it runs out of core the function terminates). Returns DES or a pointer to the allocated memory. */ char * mem2str( char *dest , const void *src , size_t n ) { char *d; const char *s; if( n ) { if( !dest ) dest = jnlib_xmalloc( n ) ; d = dest; s = src ; for(n--; n && *s; n-- ) *d++ = *s++; *d = '\0' ; } return dest ; } /**************** * remove leading and trailing white spaces */ char * trim_spaces( char *str ) { char *string, *p, *mark; string = str; /* find first non space character */ for( p=string; *p && isspace( *(byte*)p ) ; p++ ) ; /* move characters */ for( (mark = NULL); (*string = *p); string++, p++ ) if( isspace( *(byte*)p ) ) { if( !mark ) mark = string ; } else mark = NULL ; if( mark ) *mark = '\0' ; /* remove trailing spaces */ return str ; } /**************** * remove trailing white spaces */ char * trim_trailing_spaces( char *string ) { char *p, *mark; for( mark = NULL, p = string; *p; p++ ) { if( isspace( *(byte*)p ) ) { if( !mark ) mark = p; } else mark = NULL; } if( mark ) *mark = '\0' ; return string ; } unsigned trim_trailing_chars( byte *line, unsigned len, const char *trimchars ) { byte *p, *mark; unsigned n; for(mark=NULL, p=line, n=0; n < len; n++, p++ ) { if( strchr(trimchars, *p ) ) { if( !mark ) mark = p; } else mark = NULL; } if( mark ) { *mark = 0; return mark - line; } return len; } /**************** * remove trailing white spaces and return the length of the buffer */ unsigned trim_trailing_ws( byte *line, unsigned len ) { return trim_trailing_chars( line, len, " \t\r\n" ); } size_t length_sans_trailing_chars (const unsigned char *line, size_t len, const char *trimchars ) { const unsigned char *p, *mark; size_t n; for( mark=NULL, p=line, n=0; n < len; n++, p++ ) { if (strchr (trimchars, *p )) { if( !mark ) mark = p; } else mark = NULL; } if (mark) return mark - line; return len; } /* * Return the length of line ignoring trailing white-space. */ size_t length_sans_trailing_ws (const unsigned char *line, size_t len) { return length_sans_trailing_chars (line, len, " \t\r\n"); } /*************** * Extract from a given path the filename component. * */ char * make_basename(const char *filepath, const char *inputpath) { char *p; #ifdef __riscos__ return riscos_make_basename(filepath, inputpath); #endif if ( !(p=strrchr(filepath, '/')) ) #ifdef HAVE_DRIVE_LETTERS if ( !(p=strrchr(filepath, '\\')) ) if ( !(p=strrchr(filepath, ':')) ) #endif { return jnlib_xstrdup(filepath); } return jnlib_xstrdup(p+1); } /*************** * Extract from a given filename the path prepended to it. * If their isn't a path prepended to the filename, a dot * is returned ('.'). * */ char * make_dirname(const char *filepath) { char *dirname; int dirname_length; char *p; if ( !(p=strrchr(filepath, '/')) ) #ifdef HAVE_DRIVE_LETTERS if ( !(p=strrchr(filepath, '\\')) ) if ( !(p=strrchr(filepath, ':')) ) #endif { return jnlib_xstrdup("."); } dirname_length = p-filepath; dirname = jnlib_xmalloc(dirname_length+1); strncpy(dirname, filepath, dirname_length); dirname[dirname_length] = 0; return dirname; } /**************** * Construct a filename from the NULL terminated list of parts. * Tilde expansion is done here. */ char * make_filename( const char *first_part, ... ) { va_list arg_ptr ; size_t n; const char *s; char *name, *home, *p; va_start (arg_ptr, first_part); n = strlen (first_part) + 1; while ( (s = va_arg (arg_ptr, const char *)) ) n += strlen(s) + 1; va_end(arg_ptr); home = NULL; if ( *first_part == '~' && first_part[1] == '/' && (home = getenv("HOME")) && *home ) n += strlen (home); name = jnlib_xmalloc (n); p = (home ? stpcpy (stpcpy (name,home), first_part + 1) : stpcpy(name, first_part)); va_start (arg_ptr, first_part) ; while ( (s = va_arg(arg_ptr, const char *)) ) p = stpcpy (stpcpy (p,"/"), s); va_end(arg_ptr); #ifdef HAVE_DRIVE_LETTERS /* We better avoid mixing slashes and backslashes and prefer backslashes. There is usual no problem with mixing them, however a very few W32 API calls can't grok plain slashes. Printing filenames with mixed slashes also looks a bit strange. */ if (strchr (name, '\\')) { for (p=name; *p; p++) if (*p == '/') *p = '\\'; } #endif /*HAVE_DRIVE_LETTERS*/ return name; } int compare_filenames (const char *a, const char *b) { /* ? check whether this is an absolute filename and resolve symlinks? */ #ifdef HAVE_DRIVE_LETTERS for ( ; *a && *b; a++, b++ ) { if (*a != *b && (toupper (*(const unsigned char*)a) != toupper (*(const unsigned char*)b) ) && !((*a == '/' && *b == '\\') || (*a == '\\' && *b == '/'))) break; } if ((*a == '/' && *b == '\\') || (*a == '\\' && *b == '/')) return 0; else return (toupper (*(const unsigned char*)a) - toupper (*(const unsigned char*)b)); #else return strcmp(a,b); #endif } /* Convert 2 hex characters at S to a byte value. Return this value or -1 if there is an error. */ int hextobyte (const char *s) { int c; if ( *s >= '0' && *s <= '9' ) c = 16 * (*s - '0'); else if ( *s >= 'A' && *s <= 'F' ) c = 16 * (10 + *s - 'A'); else if ( *s >= 'a' && *s <= 'f' ) c = 16 * (10 + *s - 'a'); else return -1; s++; if ( *s >= '0' && *s <= '9' ) c += *s - '0'; else if ( *s >= 'A' && *s <= 'F' ) c += 10 + *s - 'A'; else if ( *s >= 'a' && *s <= 'f' ) c += 10 + *s - 'a'; else return -1; return c; } /* Print a BUFFER to stream FP while replacing all control characters and the characters DELIM and DELIM2 with standard C escape sequences. Returns the number of characters printed. */ size_t print_sanitized_buffer2 (FILE *fp, const void *buffer, size_t length, int delim, int delim2) { const unsigned char *p = buffer; size_t count = 0; for (; length; length--, p++, count++) { /* Fixme: Check whether *p < 0xa0 is correct for utf8 encoding. */ if (*p < 0x20 || (*p >= 0x7f && *p < 0xa0) || *p == delim || *p == delim2 || ((delim || delim2) && *p=='\\')) { putc ('\\', fp); count++; if (*p == '\n') { putc ('n', fp); count++; } else if (*p == '\r') { putc ('r', fp); count++; } else if (*p == '\f') { putc ('f', fp); count++; } else if (*p == '\v') { putc ('v', fp); count++; } else if (*p == '\b') { putc ('b', fp); count++; } else if (!*p) { putc('0', fp); count++; } else { fprintf (fp, "x%02x", *p); count += 3; } } else { putc (*p, fp); count++; } } return count; } /* Same as print_sanitized_buffer2 but with just one delimiter. */ size_t print_sanitized_buffer (FILE *fp, const void *buffer, size_t length, int delim) { return print_sanitized_buffer2 (fp, buffer, length, delim, 0); } size_t print_sanitized_utf8_buffer (FILE *fp, const void *buffer, size_t length, int delim) { const char *p = buffer; size_t i; /* We can handle plain ascii simpler, so check for it first. */ for (i=0; i < length; i++ ) { if ( (p[i] & 0x80) ) break; } if (i < length) { char *buf = utf8_to_native (p, length, delim); /*(utf8 conversion already does the control character quoting)*/ i = strlen (buf); fputs (buf, fp); jnlib_free (buf); return i; } else return print_sanitized_buffer (fp, p, length, delim); } size_t print_sanitized_string2 (FILE *fp, const char *string, int delim, int delim2) { return string? print_sanitized_buffer2 (fp, string, strlen (string), delim, delim2):0; } size_t print_sanitized_string (FILE *fp, const char *string, int delim) { return string? print_sanitized_buffer (fp, string, strlen (string), delim):0; } size_t print_sanitized_utf8_string (FILE *fp, const char *string, int delim) { return string? print_sanitized_utf8_buffer (fp, string, strlen (string), delim) : 0; } /* Create a string from the buffer P_ARG of length N which is suitable for printing. Caller must release the created string using xfree. */ char * sanitize_buffer (const void *p_arg, size_t n, int delim) { const unsigned char *p = p_arg; size_t save_n, buflen; const unsigned char *save_p; char *buffer, *d; /* First count length. */ for (save_n = n, save_p = p, buflen=1 ; n; n--, p++ ) { if ( *p < 0x20 || *p == 0x7f || *p == delim || (delim && *p=='\\')) { if ( *p=='\n' || *p=='\r' || *p=='\f' || *p=='\v' || *p=='\b' || !*p ) buflen += 2; else buflen += 5; } else buflen++; } p = save_p; n = save_n; /* And now make the string */ d = buffer = jnlib_xmalloc( buflen ); for ( ; n; n--, p++ ) { if (*p < 0x20 || *p == 0x7f || *p == delim || (delim && *p=='\\')) { *d++ = '\\'; if( *p == '\n' ) *d++ = 'n'; else if( *p == '\r' ) *d++ = 'r'; else if( *p == '\f' ) *d++ = 'f'; else if( *p == '\v' ) *d++ = 'v'; else if( *p == '\b' ) *d++ = 'b'; else if( !*p ) *d++ = '0'; else { sprintf(d, "x%02x", *p ); d += 3; } } else *d++ = *p; } *d = 0; return buffer; } /* Given a string containing an UTF-8 encoded text, return the number of characters in this string. It differs from strlen in that it only counts complete UTF-8 characters. Note, that this function does not take combined characters into account. */ size_t utf8_charcount (const char *s) { size_t n; for (n=0; *s; s++) if ( (*s&0xc0) != 0x80 ) /* Exclude continuation bytes: 10xxxxxx */ n++; return n; } /**************************************************** ********** W32 specific functions **************** ****************************************************/ #ifdef HAVE_W32_SYSTEM const char * w32_strerror (int ec) { static char strerr[256]; if (ec == -1) ec = (int)GetLastError (); FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM, NULL, ec, MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT), strerr, DIM (strerr)-1, NULL); return strerr; } #endif /*HAVE_W32_SYSTEM*/ /**************************************************** ******** Locale insensitive ctype functions ******** ****************************************************/ /* FIXME: replace them by a table lookup and macros */ int ascii_isupper (int c) { return c >= 'A' && c <= 'Z'; } int ascii_islower (int c) { return c >= 'a' && c <= 'z'; } int ascii_toupper (int c) { if (c >= 'a' && c <= 'z') c &= ~0x20; return c; } int ascii_tolower (int c) { if (c >= 'A' && c <= 'Z') c |= 0x20; return c; } int ascii_strcasecmp( const char *a, const char *b ) { if (a == b) return 0; for (; *a && *b; a++, b++) { if (*a != *b && ascii_toupper(*a) != ascii_toupper(*b)) break; } return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b)); } int ascii_strncasecmp (const char *a, const char *b, size_t n) { const unsigned char *p1 = (const unsigned char *)a; const unsigned char *p2 = (const unsigned char *)b; unsigned char c1, c2; if (p1 == p2 || !n ) return 0; do { c1 = ascii_tolower (*p1); c2 = ascii_tolower (*p2); if ( !--n || c1 == '\0') break; ++p1; ++p2; } while (c1 == c2); return c1 - c2; } int ascii_memcasecmp (const void *a_arg, const void *b_arg, size_t n ) { const char *a = a_arg; const char *b = b_arg; if (a == b) return 0; for ( ; n; n--, a++, b++ ) { if( *a != *b && ascii_toupper (*a) != ascii_toupper (*b) ) return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b)); } return 0; } int ascii_strcmp( const char *a, const char *b ) { if (a == b) return 0; for (; *a && *b; a++, b++) { if (*a != *b ) break; } return *a == *b? 0 : (*(signed char *)a - *(signed char *)b); } void * ascii_memcasemem (const void *haystack, size_t nhaystack, const void *needle, size_t nneedle) { if (!nneedle) return (void*)haystack; /* finding an empty needle is really easy */ if (nneedle <= nhaystack) { const char *a = haystack; const char *b = a + nhaystack - nneedle; for (; a <= b; a++) { if ( !ascii_memcasecmp (a, needle, nneedle) ) return (void *)a; } } return NULL; } /********************************************* ********** missing string functions ********* *********************************************/ #ifndef HAVE_STPCPY char * stpcpy(char *a,const char *b) { while( *b ) *a++ = *b++; *a = 0; return (char*)a; } #endif #ifndef HAVE_STRSEP /* Code taken from glibc-2.2.1/sysdeps/generic/strsep.c. */ char * strsep (char **stringp, const char *delim) { char *begin, *end; begin = *stringp; if (begin == NULL) return NULL; /* A frequent case is when the delimiter string contains only one character. Here we don't need to call the expensive `strpbrk' function and instead work using `strchr'. */ if (delim[0] == '\0' || delim[1] == '\0') { char ch = delim[0]; if (ch == '\0') end = NULL; else { if (*begin == ch) end = begin; else if (*begin == '\0') end = NULL; else end = strchr (begin + 1, ch); } } else /* Find the end of the token. */ end = strpbrk (begin, delim); if (end) { /* Terminate the token and set *STRINGP past NUL character. */ *end++ = '\0'; *stringp = end; } else /* No more delimiters; this is the last token. */ *stringp = NULL; return begin; } #endif /*HAVE_STRSEP*/ #ifndef HAVE_STRLWR char * strlwr(char *s) { char *p; for(p=s; *p; p++ ) *p = tolower(*p); return s; } #endif #ifndef HAVE_STRCASECMP int strcasecmp( const char *a, const char *b ) { for( ; *a && *b; a++, b++ ) { if( *a != *b && toupper(*a) != toupper(*b) ) break; } return *(const byte*)a - *(const byte*)b; } #endif /**************** * mingw32/cpd has a memicmp() */ #ifndef HAVE_MEMICMP int memicmp( const char *a, const char *b, size_t n ) { for( ; n; n--, a++, b++ ) if( *a != *b && toupper(*(const byte*)a) != toupper(*(const byte*)b) ) return *(const byte *)a - *(const byte*)b; return 0; } #endif #ifndef HAVE_MEMRCHR void * memrchr (const void *buffer, int c, size_t n) { const unsigned char *p = buffer; for (p += n; n ; n--) if (*--p == c) return (void *)p; return NULL; } #endif /*HAVE_MEMRCHR*/ /* Percent-escape the string STR by replacing colons with '%3a'. If EXTRA is not NULL all characters in it are also escaped. */ char * percent_escape (const char *str, const char *extra) { int i, j; char *ptr; if (!str) return NULL; for (i=j=0; str[i]; i++) if (str[i] == ':' || str[i] == '%' || (extra && strchr (extra, str[i]))) j++; ptr = jnlib_xmalloc (i + 2 * j + 1); i = 0; while (*str) { if (*str == ':') { ptr[i++] = '%'; ptr[i++] = '3'; ptr[i++] = 'a'; } else if (*str == '%') { ptr[i++] = '%'; ptr[i++] = '2'; ptr[i++] = '5'; } else if (extra && strchr (extra, *str)) { ptr[i++] = '%'; ptr[i++] = tohex_lower ((*str>>4)&15); ptr[i++] = tohex_lower (*str&15); } else ptr[i++] = *str; str++; } ptr[i] = '\0'; return ptr; }