simgear/misc/strutils.cxx

   1 // String utilities.
   2 //
   3 // Written by Bernie Bright, started 1998
   4 //
   5 // Copyright (C) 1998  Bernie Bright - bbright@bigpond.net.au
   6 //
   7 // This library is free software; you can redistribute it and/or
   8 // modify it under the terms of the GNU Library General Public
   9 // License as published by the Free Software Foundation; either
  10 // version 2 of the License, or (at your option) any later version.
  11 //
  12 // This library is distributed in the hope that it will be useful,
  13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 // Library General Public License for more details.
  16 //
  17 // You should have received a copy of the GNU General Public License
  18 // along with this program; if not, write to the Free Software
  19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  20 //
  21 // $Id$
  22
  23 #include <ctype.h>
  24 #include <cstring>
  25 #include <sstream>
  26
  27 #include "strutils.hxx"
  28
  29 #include <simgear/debug/logstream.hxx>
  30
  31 using std::string;
  32 using std::vector;
  33 using std::stringstream;
  34
  35 namespace simgear {
  36     namespace strutils {
  37
  38         /*
  39          * utf8ToLatin1() convert utf8 to latin, useful for accent character (i.e éâàîè...)
  40          */
  41         template <typename Iterator> size_t get_length (Iterator p) {
  42                 unsigned char c = static_cast<unsigned char> (*p);
  43                 if (c < 0x80) return 1;
  44                 else if (!(c & 0x20)) return 2;
  45                 else if (!(c & 0x10)) return 3;
  46                 else if (!(c & 0x08)) return 4;
  47                 else if (!(c & 0x04)) return 5;
  48                 else return 6;
  49         }
  50
  51         typedef unsigned int value_type;
  52         template <typename Iterator> value_type get_value (Iterator p) {
  53                 size_t len = get_length (p);
  54                 if (len == 1) return *p;
  55                 value_type res = static_cast<unsigned char> ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 );
  56                 for (--len; len; --len) {
  57                         value_type next_byte = static_cast<unsigned char> (*(++p)) - 0x80;
  58                         if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8
  59                         res |= next_byte << ((len - 1) * 6);
  60                         }
  61                 return res;
  62         }
  63
  64         string utf8ToLatin1( string& s_utf8 ) {
  65                 string s_latin1;
  66                 for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) {
  67                         value_type value = get_value<string::iterator&>(p);
  68                         if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1
  69                         if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value);
  70                         s_latin1 += static_cast<char>(value);
  71                 }
  72                 return s_latin1;
  73         }
  74
  75         /**
  76          *
  77          */
  78         static vector<string>
  79         split_whitespace( const string& str, int maxsplit )
  80         {
  81             vector<string> result;
  82             string::size_type len = str.length();
  83             string::size_type i = 0;
  84             string::size_type j;
  85             int countsplit = 0;
  86
  87             while (i < len)
  88             {
  89                 while (i < len && isspace((unsigned char)str[i]))
  90                 {
  91                     ++i;
  92                 }
  93
  94                 j = i;
  95
  96                 while (i < len && !isspace((unsigned char)str[i]))
  97                 {
  98                     ++i;
  99                 }
 100
 101                 if (j < i)
 102                 {
 103                     result.push_back( str.substr(j, i-j) );
 104                     ++countsplit;
 105                     while (i < len && isspace((unsigned char)str[i]))
 106                     {
 107                         ++i;
 108                     }
 109
 110                     if (maxsplit && (countsplit >= maxsplit) && i < len)
 111                     {
 112                         result.push_back( str.substr( i, len-i ) );
 113                         i = len;
 114                     }
 115                 }
 116             }
 117
 118             return result;
 119         }
 120
 121         /**
 122          *
 123          */
 124         vector<string>
 125         split( const string& str, const char* sep, int maxsplit )
 126         {
 127             if (sep == 0)
 128                 return split_whitespace( str, maxsplit );
 129
 130             vector<string> result;
 131             int n = std::strlen( sep );
 132             if (n == 0)
 133             {
 134                 // Error: empty separator string
 135                 return result;
 136             }
 137             const char* s = str.c_str();
 138             string::size_type len = str.length();
 139             string::size_type i = 0;
 140             string::size_type j = 0;
 141             int splitcount = 0;
 142
 143             while (i+n <= len)
 144             {
 145                 if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
 146                 {
 147                     result.push_back( str.substr(j,i-j) );
 148                     i = j = i + n;
 149                     ++splitcount;
 150                     if (maxsplit && (splitcount >= maxsplit))
 151                         break;
 152                 }
 153                 else
 154                 {
 155                     ++i;
 156                 }
 157             }
 158
 159             result.push_back( str.substr(j,len-j) );
 160             return result;
 161         }
 162
 163         /**
 164          * The lstrip(), rstrip() and strip() functions are implemented
 165          * in do_strip() which uses an additional parameter to indicate what
 166          * type of strip should occur.
 167          */
 168         const int LEFTSTRIP = 0;
 169         const int RIGHTSTRIP = 1;
 170         const int BOTHSTRIP = 2;
 171
 172         static string
 173         do_strip( const string& s, int striptype )
 174         {
 175             string::size_type len = s.length();
 176             if( len == 0 ) // empty string is trivial
 177                 return s;
 178             string::size_type i = 0;
 179             if (striptype != RIGHTSTRIP)
 180             {
 181                 while (i < len && isspace(s[i]))
 182                 {
 183                     ++i;
 184                 }
 185             }
 186
 187             string::size_type j = len;
 188             if (striptype != LEFTSTRIP)
 189             {
 190                 do
 191                 {
 192                     --j;
 193                 }
 194                 while (j >= 1 && isspace(s[j]));
 195                 ++j;
 196             }
 197
 198             if (i == 0 && j == len)
 199             {
 200                 return s;
 201             }
 202             else
 203             {
 204                 return s.substr( i, j - i );
 205             }
 206         }
 207
 208         string
 209         lstrip( const string& s )
 210         {
 211             return do_strip( s, LEFTSTRIP );
 212         }
 213
 214         string
 215         rstrip( const string& s )
 216         {
 217             return do_strip( s, RIGHTSTRIP );
 218         }
 219
 220         string
 221         strip( const string& s )
 222         {
 223             return do_strip( s, BOTHSTRIP );
 224         }
 225
 226         string
 227         rpad( const string & s, string::size_type length, char c )
 228         {
 229             string::size_type l = s.length();
 230             if( l >= length ) return s;
 231             string reply = s;
 232             return reply.append( length-l, c );
 233         }
 234
 235         string
 236         lpad( const string & s, size_t length, char c )
 237         {
 238             string::size_type l = s.length();
 239             if( l >= length ) return s;
 240             string reply = s;
 241             return reply.insert( 0, length-l, c );
 242         }
 243
 244         bool
 245         starts_with( const string & s, const string & substr )
 246         {
 247           return s.compare(0, substr.length(), substr) == 0;
 248         }
 249
 250         bool
 251         ends_with( const string & s, const string & substr )
 252         {
 253           if( substr.length() > s.length() )
 254             return false;
 255           return s.compare( s.length() - substr.length(),
 256                             substr.length(),
 257                             substr ) == 0;
 258         }
 259
 260     string simplify(const string& s)
 261     {
 262         string result; // reserve size of 's'?
 263         string::const_iterator it = s.begin(),
 264             end = s.end();
 265
 266     // advance to first non-space char - simplifes logic in main loop,
 267     // since we can always prepend a single space when we see a
 268     // space -> non-space transition
 269         for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
 270
 271         bool lastWasSpace = false;
 272         for (; it != end; ++it) {
 273             char c = *it;
 274             if (isspace(c)) {
 275                 lastWasSpace = true;
 276                 continue;
 277             }
 278
 279             if (lastWasSpace) {
 280                 result.push_back(' ');
 281             }
 282
 283             lastWasSpace = false;
 284             result.push_back(c);
 285         }
 286
 287         return result;
 288     }
 289
 290     int to_int(const std::string& s, int base)
 291     {
 292         stringstream ss(s);
 293         switch (base) {
 294         case 8:      ss >> std::oct; break;
 295         case 16:     ss >> std::hex; break;
 296         default: break;
 297         }
 298
 299         int result;
 300         ss >> result;
 301         return result;
 302     }
 303
 304     int compare_versions(const string& v1, const string& v2)
 305     {
 306         vector<string> v1parts(split(v1, "."));
 307         vector<string> v2parts(split(v2, "."));
 308
 309         int lastPart = std::min(v1parts.size(), v2parts.size());
 310         for (int part=0; part < lastPart; ++part) {
 311             int part1 = to_int(v1parts[part]);
 312             int part2 = to_int(v2parts[part]);
 313
 314             if (part1 != part2) {
 315                 return part1 - part2;
 316             }
 317         } // of parts iteration
 318
 319         // reached end - longer wins
 320         return v1parts.size() - v2parts.size();
 321     }
 322
 323     string join(const string_list& l, const string& joinWith)
 324     {
 325         string result;
 326         unsigned int count = l.size();
 327         for (unsigned int i=0; i < count; ++i) {
 328             result += l[i];
 329             if (i < (count - 1)) {
 330                 result += joinWith;
 331             }
 332         }
 333
 334         return result;
 335     }
 336
 337     string uppercase(const string &s) {
 338       string rslt(s);
 339       for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
 340         *p = toupper(*p);
 341       }
 342       return rslt;
 343     }
 344
 345     string lowercase(const string &s) {
 346       string rslt(s);
 347       for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
 348         *p = tolower(*p);
 349       }
 350       return rslt;
 351     }
 352
 353     void lowercase(string &s) {
 354       for(string::iterator p = s.begin(); p != s.end(); p++){
 355         *p = tolower(*p);
 356       }
 357     }
 358
 359 #if defined(SG_WINDOWS)
 360
 361 #include <windows.h>
 362
 363 static WCharVec convertMultiByteToWString(DWORD encoding, const std::string& a)
 364 {
 365     WCharVec result;
 366     DWORD flags = 0;
 367     int requiredWideChars = MultiByteToWideChar(encoding, flags,
 368                         a.c_str(), a.size(),
 369                         NULL, 0);
 370     result.resize(requiredWideChars);
 371     MultiByteToWideChar(encoding, flags, a.c_str(), a.size(),
 372                         result.data(), result.size());
 373     return result;
 374 }
 375
 376 WCharVec convertUtf8ToWString(const std::string& a)
 377 {
 378     return convertMultiByteToWString(CP_UTF8, a);
 379 }
 380
 381 #endif
 382
 383 std::string convertWindowsLocal8BitToUtf8(const std::string& a)
 384 {
 385 #ifdef SG_WINDOWS
 386     DWORD flags = 0;
 387     WCharVec wideString = convertMultiByteToWString(CP_ACP, a);
 388
 389     // convert down to UTF-8
 390     std::vector<char> result;
 391     int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
 392                                                 wideString.data(), wideString.size(),
 393                                                 NULL, 0, NULL, NULL);
 394     result.resize(requiredUTF8Chars);
 395     WideCharToMultiByte(CP_UTF8, flags,
 396                         wideString.data(), wideString.size(),
 397                         result.data(), result.size(), NULL, NULL);
 398     return std::string(result.data(), result.size());
 399 #else
 400     return a;
 401 #endif
 402 }
 403
 404
 405
 406 static const std::string base64_chars =
 407 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 408 "abcdefghijklmnopqrstuvwxyz"
 409 "0123456789+/";
 410
 411 static const unsigned char base64_decode_map[128] =
 412 {
 413     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 414     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 415     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 416     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 417     127, 127, 127,  62, 127, 127, 127,  63,  52,  53,
 418     54,  55,  56,  57,  58,  59,  60,  61, 127, 127,
 419     127,  64, 127, 127, 127,   0,   1,   2,   3,   4,
 420     5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
 421     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
 422     25, 127, 127, 127, 127, 127, 127,  26,  27,  28,
 423     29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
 424     39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
 425     49,  50,  51, 127, 127, 127, 127, 127
 426 };
 427
 428
 429 static inline bool is_base64(unsigned char c) {
 430   return (isalnum(c) || (c == '+') || (c == '/'));
 431 }
 432
 433 static bool is_whitespace(unsigned char c) {
 434     return ((c == ' ') || (c == '\r') || (c == '\n'));
 435 }
 436
 437 void decodeBase64(const std::string& encoded_string, std::vector<unsigned char>& ret)
 438 {
 439   int in_len = encoded_string.size();
 440   int i = 0;
 441   int j = 0;
 442   int in_ = 0;
 443   unsigned char char_array_4[4], char_array_3[3];
 444
 445   while (in_len-- && ( encoded_string[in_] != '=')) {
 446     if (is_whitespace( encoded_string[in_])) {
 447         in_++;
 448         continue;
 449     }
 450
 451     if (!is_base64(encoded_string[in_])) {
 452         break;
 453     }
 454
 455     char_array_4[i++] = encoded_string[in_]; in_++;
 456     if (i ==4) {
 457       for (i = 0; i <4; i++)
 458         char_array_4[i] = base64_decode_map[char_array_4[i]];
 459
 460       char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
 461       char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
 462       char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
 463
 464       for (i = 0; (i < 3); i++)
 465         ret.push_back(char_array_3[i]);
 466       i = 0;
 467     }
 468   }
 469
 470   if (i) {
 471     for (j = i; j <4; j++)
 472       char_array_4[j] = 0;
 473
 474     for (j = 0; j <4; j++)
 475       char_array_4[j] = base64_decode_map[char_array_4[j]];
 476
 477     char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
 478     char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
 479     char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
 480
 481     for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
 482   }
 483 }
 484
 485 const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
 486
 487 std::string encodeHex(const std::string& bytes)
 488 {
 489   std::string hex;
 490   size_t count = bytes.size();
 491   for (unsigned int i=0; i<count;++i) {
 492       unsigned char c = bytes[i];
 493       hex.push_back(hexChar[c >> 4]);
 494       hex.push_back(hexChar[c & 0x0f]);
 495   }
 496
 497   return hex;
 498 }
 499
 500 std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
 501 {
 502   std::string hex;
 503   for (unsigned int i=0; i<length;++i) {
 504       unsigned char c = *rawBytes++;
 505       hex.push_back(hexChar[c >> 4]);
 506       hex.push_back(hexChar[c & 0x0f]);
 507   }
 508
 509   return hex;
 510 }
 511
 512 //------------------------------------------------------------------------------
 513 std::string unescape(const char* s)
 514 {
 515   std::string r;
 516   while( *s )
 517   {
 518     if( *s != '\\' )
 519     {
 520       r += *s++;
 521       continue;
 522     }
 523
 524     if( !*++s )
 525       break;
 526
 527     if (*s == '\\') {
 528         r += '\\';
 529     } else if (*s == 'n') {
 530         r += '\n';
 531     } else if (*s == 'r') {
 532         r += '\r';
 533     } else if (*s == 't') {
 534         r += '\t';
 535     } else if (*s == 'v') {
 536         r += '\v';
 537     } else if (*s == 'f') {
 538         r += '\f';
 539     } else if (*s == 'a') {
 540         r += '\a';
 541     } else if (*s == 'b') {
 542         r += '\b';
 543     } else if (*s == 'x') {
 544         if (!*++s)
 545             break;
 546         int v = 0;
 547         for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
 548             v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
 549         r += v;
 550         continue;
 551
 552     } else if (*s >= '0' && *s <= '7') {
 553         int v = *s++ - '0';
 554         for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
 555             v = v * 8 + *s - '0';
 556         r += v;
 557         continue;
 558
 559     } else {
 560         r += *s;
 561     }
 562     s++;
 563   }
 564   return r;
 565 }
 566
 567 string sanitizePrintfFormat(const string& input)
 568 {
 569     string::size_type i = input.find("%n");
 570     if (i != string::npos) {
 571         SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
 572         return string();
 573     }
 574
 575     return input;
 576 }
 577
 578 } // end namespace strutils
 579
 580 } // end namespace simgear