string.cpp

Go to the documentation of this file.
00001 /* $Id: string.cpp 15718 2009-03-15 00:32:18Z rubidium $ */
00002 
00005 #include "stdafx.h"
00006 #include "debug.h"
00007 #include "core/alloc_func.hpp"
00008 #include "core/math_func.hpp"
00009 #include "string_func.h"
00010 
00011 #include "table/control_codes.h"
00012 
00013 #include <stdarg.h>
00014 #include <ctype.h> /* required for tolower() */
00015 
00026 static int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
00027 {
00028   if (str >= last) return 0;
00029   size_t size = last - str;
00030   return min((int)size, vsnprintf(str, size, format, ap));
00031 }
00032 
00033 void ttd_strlcat(char *dst, const char *src, size_t size)
00034 {
00035   assert(size > 0);
00036   while (size > 0 && *dst != '\0') {
00037     size--;
00038     dst++;
00039   }
00040 
00041   ttd_strlcpy(dst, src, size);
00042 }
00043 
00044 
00045 void ttd_strlcpy(char *dst, const char *src, size_t size)
00046 {
00047   assert(size > 0);
00048   while (--size > 0 && *src != '\0') {
00049     *dst++ = *src++;
00050   }
00051   *dst = '\0';
00052 }
00053 
00054 
00055 char *strecat(char *dst, const char *src, const char *last)
00056 {
00057   assert(dst <= last);
00058   while (*dst != '\0') {
00059     if (dst == last) return dst;
00060     dst++;
00061   }
00062 
00063   return strecpy(dst, src, last);
00064 }
00065 
00066 
00067 char *strecpy(char *dst, const char *src, const char *last)
00068 {
00069   assert(dst <= last);
00070   while (dst != last && *src != '\0') {
00071     *dst++ = *src++;
00072   }
00073   *dst = '\0';
00074 
00075   if (dst == last && *src != '\0') {
00076 #ifdef STRGEN
00077     error("String too long for destination buffer");
00078 #else /* STRGEN */
00079     DEBUG(misc, 0, "String too long for destination buffer");
00080 #endif /* STRGEN */
00081   }
00082   return dst;
00083 }
00084 
00085 
00086 char *CDECL str_fmt(const char *str, ...)
00087 {
00088   char buf[4096];
00089   va_list va;
00090 
00091   va_start(va, str);
00092   int len = vseprintf(buf, lastof(buf), str, va);
00093   va_end(va);
00094   char *p = MallocT<char>(len + 1);
00095   memcpy(p, buf, len + 1);
00096   return p;
00097 }
00098 
00099 
00100 void str_validate(char *str, const char *last, bool allow_newlines, bool ignore)
00101 {
00102   /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
00103 
00104   char *dst = str;
00105   while (*str != '\0') {
00106     size_t len = Utf8EncodedCharLen(*str);
00107     /* If the character is unknown, i.e. encoded length is 0
00108      * we assume worst case for the length check.
00109      * The length check is needed to prevent Utf8Decode to read
00110      * over the terminating '\0' if that happens to be placed
00111      * within the encoding of an UTF8 character. */
00112     if ((len == 0 && str + 4 > last) || str + len > last) break;
00113 
00114     WChar c;
00115     len = Utf8Decode(&c, str);
00116     /* It's possible to encode the string termination character
00117      * into a multiple bytes. This prevents those termination
00118      * characters to be skipped */
00119     if (c == '\0') break;
00120 
00121     if (IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) {
00122       /* Copy the character back. Even if dst is current the same as str
00123        * (i.e. no characters have been changed) this is quicker than
00124        * moving the pointers ahead by len */
00125       do {
00126         *dst++ = *str++;
00127       } while (--len != 0);
00128     } else if (allow_newlines && c == '\n') {
00129       *dst++ = *str++;
00130     } else {
00131       if (allow_newlines && c == '\r' && str[1] == '\n') {
00132         str += len;
00133         continue;
00134       }
00135       /* Replace the undesirable character with a question mark */
00136       str += len;
00137       if (!ignore) *dst++ = '?';
00138     }
00139   }
00140 
00141   *dst = '\0';
00142 }
00143 
00144 
00145 void str_strip_colours(char *str)
00146 {
00147   char *dst = str;
00148   WChar c;
00149   size_t len;
00150 
00151   for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
00152     if (c < SCC_BLUE || c > SCC_BLACK) {
00153       /* Copy the character back. Even if dst is current the same as str
00154        * (i.e. no characters have been changed) this is quicker than
00155        * moving the pointers ahead by len */
00156       do {
00157         *dst++ = *str++;
00158       } while (--len != 0);
00159     } else {
00160       /* Just skip (strip) the colour codes */
00161       str += len;
00162     }
00163   }
00164   *dst = '\0';
00165 }
00166 
00175 void strtolower(char *str)
00176 {
00177   for (; *str != '\0'; str++) *str = tolower(*str);
00178 }
00179 
00187 bool IsValidChar(WChar key, CharSetFilter afilter)
00188 {
00189   switch (afilter) {
00190     case CS_ALPHANUMERAL: return IsPrintable(key);
00191     case CS_NUMERAL:      return (key >= '0' && key <= '9');
00192     case CS_ALPHA:        return IsPrintable(key) && !(key >= '0' && key <= '9');
00193   }
00194 
00195   return false;
00196 }
00197 
00198 #ifdef WIN32
00199 /* Since version 3.14, MinGW Runtime has snprintf() and vsnprintf() conform to C99 but it's not the case for older versions */
00200 #if (__MINGW32_MAJOR_VERSION < 3) || ((__MINGW32_MAJOR_VERSION == 3) && (__MINGW32_MINOR_VERSION < 14))
00201 int CDECL snprintf(char *str, size_t size, const char *format, ...)
00202 {
00203   va_list ap;
00204   int ret;
00205 
00206   va_start(ap, format);
00207   ret = vsnprintf(str, size, format, ap);
00208   va_end(ap);
00209   return ret;
00210 }
00211 #endif /* MinGW Runtime < 3.14 */
00212 
00213 #ifdef _MSC_VER
00214 /* *nprintf broken, not POSIX compliant, MSDN description
00215  * - If len < count, then len characters are stored in buffer, a null-terminator is appended, and len is returned.
00216  * - If len = count, then len characters are stored in buffer, no null-terminator is appended, and len is returned.
00217  * - If len > count, then count characters are stored in buffer, no null-terminator is appended, and a negative value is returned
00218  */
00219 int CDECL vsnprintf(char *str, size_t size, const char *format, va_list ap)
00220 {
00221   int ret;
00222   ret = _vsnprintf(str, size, format, ap);
00223   if (ret < 0 || ret == size) str[size - 1] = '\0';
00224   return ret;
00225 }
00226 #endif /* _MSC_VER */
00227 
00228 #endif /* WIN32 */
00229 
00239 int CDECL seprintf(char *str, const char *last, const char *format, ...)
00240 {
00241   va_list ap;
00242 
00243   va_start(ap, format);
00244   int ret = vseprintf(str, last, format, ap);
00245   va_end(ap);
00246   return ret;
00247 }
00248 
00249 
00255 char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
00256 {
00257   char *p = buf;
00258 
00259   for (uint i = 0; i < 16; i++) {
00260     p += seprintf(p, last, "%02X", md5sum[i]);
00261   }
00262 
00263   return p;
00264 }
00265 
00266 
00267 /* UTF-8 handling routines */
00268 
00269 
00270 /* Decode and consume the next UTF-8 encoded character
00271  * @param c Buffer to place decoded character.
00272  * @param s Character stream to retrieve character from.
00273  * @return Number of characters in the sequence.
00274  */
00275 size_t Utf8Decode(WChar *c, const char *s)
00276 {
00277   assert(c != NULL);
00278 
00279   if (!HasBit(s[0], 7)) {
00280     /* Single byte character: 0xxxxxxx */
00281     *c = s[0];
00282     return 1;
00283   } else if (GB(s[0], 5, 3) == 6) {
00284     if (IsUtf8Part(s[1])) {
00285       /* Double byte character: 110xxxxx 10xxxxxx */
00286       *c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00287       if (*c >= 0x80) return 2;
00288     }
00289   } else if (GB(s[0], 4, 4) == 14) {
00290     if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00291       /* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */
00292       *c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00293       if (*c >= 0x800) return 3;
00294     }
00295   } else if (GB(s[0], 3, 5) == 30) {
00296     if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00297       /* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
00298       *c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00299       if (*c >= 0x10000 && *c <= 0x10FFFF) return 4;
00300     }
00301   }
00302 
00303   /* DEBUG(misc, 1, "[utf8] invalid UTF-8 sequence"); */
00304   *c = '?';
00305   return 1;
00306 }
00307 
00308 
00309 /* Encode a unicode character and place it in the buffer
00310  * @param buf Buffer to place character.
00311  * @param c   Unicode character to encode.
00312  * @return Number of characters in the encoded sequence.
00313  */
00314 size_t Utf8Encode(char *buf, WChar c)
00315 {
00316   if (c < 0x80) {
00317     *buf = c;
00318     return 1;
00319   } else if (c < 0x800) {
00320     *buf++ = 0xC0 + GB(c,  6, 5);
00321     *buf   = 0x80 + GB(c,  0, 6);
00322     return 2;
00323   } else if (c < 0x10000) {
00324     *buf++ = 0xE0 + GB(c, 12, 4);
00325     *buf++ = 0x80 + GB(c,  6, 6);
00326     *buf   = 0x80 + GB(c,  0, 6);
00327     return 3;
00328   } else if (c < 0x110000) {
00329     *buf++ = 0xF0 + GB(c, 18, 3);
00330     *buf++ = 0x80 + GB(c, 12, 6);
00331     *buf++ = 0x80 + GB(c,  6, 6);
00332     *buf   = 0x80 + GB(c,  0, 6);
00333     return 4;
00334   }
00335 
00336   /* DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c); */
00337   *buf = '?';
00338   return 1;
00339 }
00340 
00348 size_t Utf8TrimString(char *s, size_t maxlen)
00349 {
00350   size_t length = 0;
00351 
00352   for (const char *ptr = strchr(s, '\0'); *s != '\0';) {
00353     size_t len = Utf8EncodedCharLen(*s);
00354     /* Silently ignore invalid UTF8 sequences, our only concern trimming */
00355     if (len == 0) len = 1;
00356 
00357     /* Take care when a hard cutoff was made for the string and
00358      * the last UTF8 sequence is invalid */
00359     if (length + len >= maxlen || (s + len > ptr)) break;
00360     s += len;
00361     length += len;
00362   }
00363 
00364   *s = '\0';
00365   return length;
00366 }
00367 
00368 #ifndef _GNU_SOURCE
00369 #include "core/math_func.hpp"
00370 char *strndup(const char *s, size_t len)
00371 {
00372   len = min(strlen(s), len);
00373   char *tmp = CallocT<char>(len + 1);
00374   memcpy(tmp, s, len);
00375   return tmp;
00376 }
00377 #endif /* !_GNU_SOURCE */
00378 
00379 #ifdef DEFINE_STRCASESTR
00380 const char *strcasestr(const char *haystack, const char *needle)
00381 {
00382   size_t hay_len = strlen(haystack);
00383   size_t needle_len = strlen(needle);
00384   while (hay_len >= needle_len) {
00385     if (strncasecmp(haystack, needle, needle_len) == 0) return haystack;
00386 
00387     haystack++;
00388     hay_len--;
00389   }
00390 
00391   return NULL;
00392 }
00393 #endif /* DEFINE_STRCASESTR */

Generated on Wed Jun 3 19:05:15 2009 for OpenTTD by  doxygen 1.5.6