simgear/misc/strutils.cxx

   1 // String utilities.
   2 //
   3 // Written by Bernie Bright, started 1998
   4 //
   5 // Copyright (C) 1998  Bernie Bright - bbright@bigpond.net.au
   6 //
   7 // This library is free software; you can redistribute it and/or
   8 // modify it under the terms of the GNU Library General Public
   9 // License as published by the Free Software Foundation; either
  10 // version 2 of the License, or (at your option) any later version.
  11 //
  12 // This library is distributed in the hope that it will be useful,
  13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 // Library General Public License for more details.
  16 //
  17 // You should have received a copy of the GNU General Public License
  18 // along with this program; if not, write to the Free Software
  19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  20 //
  21 // $Id$
  22
  23 #include <ctype.h>
  24 #include <cstring>
  25 #include <sstream>
  26
  27 #include "strutils.hxx"
  28
  29 #include <simgear/debug/logstream.hxx>
  30
  31 using std::string;
  32 using std::vector;
  33 using std::stringstream;
  34
  35 namespace simgear {
  36     namespace strutils {
  37
  38         /**
  39          *
  40          */
  41         static vector<string>
  42         split_whitespace( const string& str, int maxsplit )
  43         {
  44             vector<string> result;
  45             string::size_type len = str.length();
  46             string::size_type i = 0;
  47             string::size_type j;
  48             int countsplit = 0;
  49
  50             while (i < len)
  51             {
  52                 while (i < len && isspace((unsigned char)str[i]))
  53                 {
  54                     ++i;
  55                 }
  56
  57                 j = i;
  58
  59                 while (i < len && !isspace((unsigned char)str[i]))
  60                 {
  61                     ++i;
  62                 }
  63
  64                 if (j < i)
  65                 {
  66                     result.push_back( str.substr(j, i-j) );
  67                     ++countsplit;
  68                     while (i < len && isspace((unsigned char)str[i]))
  69                     {
  70                         ++i;
  71                     }
  72
  73                     if (maxsplit && (countsplit >= maxsplit) && i < len)
  74                     {
  75                         result.push_back( str.substr( i, len-i ) );
  76                         i = len;
  77                     }
  78                 }
  79             }
  80
  81             return result;
  82         }
  83
  84         /**
  85          *
  86          */
  87         vector<string>
  88         split( const string& str, const char* sep, int maxsplit )
  89         {
  90             if (sep == 0)
  91                 return split_whitespace( str, maxsplit );
  92
  93             vector<string> result;
  94             int n = std::strlen( sep );
  95             if (n == 0)
  96             {
  97                 // Error: empty separator string
  98                 return result;
  99             }
 100             const char* s = str.c_str();
 101             string::size_type len = str.length();
 102             string::size_type i = 0;
 103             string::size_type j = 0;
 104             int splitcount = 0;
 105
 106             while (i+n <= len)
 107             {
 108                 if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
 109                 {
 110                     result.push_back( str.substr(j,i-j) );
 111                     i = j = i + n;
 112                     ++splitcount;
 113                     if (maxsplit && (splitcount >= maxsplit))
 114                         break;
 115                 }
 116                 else
 117                 {
 118                     ++i;
 119                 }
 120             }
 121
 122             result.push_back( str.substr(j,len-j) );
 123             return result;
 124         }
 125
 126         /**
 127          * The lstrip(), rstrip() and strip() functions are implemented
 128          * in do_strip() which uses an additional parameter to indicate what
 129          * type of strip should occur.
 130          */
 131         const int LEFTSTRIP = 0;
 132         const int RIGHTSTRIP = 1;
 133         const int BOTHSTRIP = 2;
 134
 135         static string
 136         do_strip( const string& s, int striptype )
 137         {
 138             string::size_type len = s.length();
 139             if( len == 0 ) // empty string is trivial
 140                 return s;
 141             string::size_type i = 0;
 142             if (striptype != RIGHTSTRIP)
 143             {
 144                 while (i < len && isspace(s[i]))
 145                 {
 146                     ++i;
 147                 }
 148             }
 149
 150             string::size_type j = len;
 151             if (striptype != LEFTSTRIP)
 152             {
 153                 do
 154                 {
 155                     --j;
 156                 }
 157                 while (j >= 1 && isspace(s[j]));
 158                 ++j;
 159             }
 160
 161             if (i == 0 && j == len)
 162             {
 163                 return s;
 164             }
 165             else
 166             {
 167                 return s.substr( i, j - i );
 168             }
 169         }
 170
 171         string
 172         lstrip( const string& s )
 173         {
 174             return do_strip( s, LEFTSTRIP );
 175         }
 176
 177         string
 178         rstrip( const string& s )
 179         {
 180             return do_strip( s, RIGHTSTRIP );
 181         }
 182
 183         string
 184         strip( const string& s )
 185         {
 186             return do_strip( s, BOTHSTRIP );
 187         }
 188
 189         string
 190         rpad( const string & s, string::size_type length, char c )
 191         {
 192             string::size_type l = s.length();
 193             if( l >= length ) return s;
 194             string reply = s;
 195             return reply.append( length-l, c );
 196         }
 197
 198         string
 199         lpad( const string & s, size_t length, char c )
 200         {
 201             string::size_type l = s.length();
 202             if( l >= length ) return s;
 203             string reply = s;
 204             return reply.insert( 0, length-l, c );
 205         }
 206
 207         bool
 208         starts_with( const string & s, const string & substr )
 209         {
 210           return s.compare(0, substr.length(), substr) == 0;
 211         }
 212
 213         bool
 214         ends_with( const string & s, const string & substr )
 215         {
 216           if( substr.length() > s.length() )
 217             return false;
 218           return s.compare( s.length() - substr.length(),
 219                             substr.length(),
 220                             substr ) == 0;
 221         }
 222
 223     string simplify(const string& s)
 224     {
 225         string result; // reserve size of 's'?
 226         string::const_iterator it = s.begin(),
 227             end = s.end();
 228
 229     // advance to first non-space char - simplifes logic in main loop,
 230     // since we can always prepend a single space when we see a
 231     // space -> non-space transition
 232         for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
 233
 234         bool lastWasSpace = false;
 235         for (; it != end; ++it) {
 236             char c = *it;
 237             if (isspace(c)) {
 238                 lastWasSpace = true;
 239                 continue;
 240             }
 241
 242             if (lastWasSpace) {
 243                 result.push_back(' ');
 244             }
 245
 246             lastWasSpace = false;
 247             result.push_back(c);
 248         }
 249
 250         return result;
 251     }
 252
 253     int to_int(const std::string& s, int base)
 254     {
 255         stringstream ss(s);
 256         switch (base) {
 257         case 8:      ss >> std::oct; break;
 258         case 16:     ss >> std::hex; break;
 259         default: break;
 260         }
 261
 262         int result;
 263         ss >> result;
 264         return result;
 265     }
 266
 267     int compare_versions(const string& v1, const string& v2)
 268     {
 269         vector<string> v1parts(split(v1, "."));
 270         vector<string> v2parts(split(v2, "."));
 271
 272         int lastPart = std::min(v1parts.size(), v2parts.size());
 273         for (int part=0; part < lastPart; ++part) {
 274             int part1 = to_int(v1parts[part]);
 275             int part2 = to_int(v2parts[part]);
 276
 277             if (part1 != part2) {
 278                 return part1 - part2;
 279             }
 280         } // of parts iteration
 281
 282         // reached end - longer wins
 283         return v1parts.size() - v2parts.size();
 284     }
 285
 286     string join(const string_list& l, const string& joinWith)
 287     {
 288         string result;
 289         unsigned int count = l.size();
 290         for (unsigned int i=0; i < count; ++i) {
 291             result += l[i];
 292             if (i < (count - 1)) {
 293                 result += joinWith;
 294             }
 295         }
 296
 297         return result;
 298     }
 299
 300     string uppercase(const string &s) {
 301       string rslt(s);
 302       for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
 303         *p = toupper(*p);
 304       }
 305       return rslt;
 306     }
 307
 308
 309 #ifdef SG_WINDOWS
 310     #include <windows.h>
 311 #endif
 312
 313 std::string convertWindowsLocal8BitToUtf8(const std::string& a)
 314 {
 315 #ifdef SG_WINDOWS
 316     DWORD flags = 0;
 317     std::vector<wchar_t> wideString;
 318
 319     // call to query transform size
 320     int requiredWideChars = MultiByteToWideChar(CP_ACP, flags, a.c_str(), a.size(),
 321                         NULL, 0);
 322     // allocate storage and call for real
 323     wideString.resize(requiredWideChars);
 324     MultiByteToWideChar(CP_ACP, flags, a.c_str(), a.size(),
 325                         wideString.data(), wideString.size());
 326
 327     // now convert back down to UTF-8
 328     std::vector<char> result;
 329     int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
 330                                                 wideString.data(), wideString.size(),
 331                                                 NULL, 0, NULL, NULL);
 332     result.resize(requiredUTF8Chars);
 333     WideCharToMultiByte(CP_UTF8, flags,
 334                         wideString.data(), wideString.size(),
 335                         result.data(), result.size(), NULL, NULL);
 336     return std::string(result.data(), result.size());
 337 #else
 338     return a;
 339 #endif
 340 }
 341
 342 static const std::string base64_chars =
 343 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 344 "abcdefghijklmnopqrstuvwxyz"
 345 "0123456789+/";
 346
 347 static const unsigned char base64_decode_map[128] =
 348 {
 349     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 350     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 351     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 352     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 353     127, 127, 127,  62, 127, 127, 127,  63,  52,  53,
 354     54,  55,  56,  57,  58,  59,  60,  61, 127, 127,
 355     127,  64, 127, 127, 127,   0,   1,   2,   3,   4,
 356     5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
 357     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
 358     25, 127, 127, 127, 127, 127, 127,  26,  27,  28,
 359     29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
 360     39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
 361     49,  50,  51, 127, 127, 127, 127, 127
 362 };
 363
 364
 365 static inline bool is_base64(unsigned char c) {
 366   return (isalnum(c) || (c == '+') || (c == '/'));
 367 }
 368
 369 static bool is_whitespace(unsigned char c) {
 370     return ((c == ' ') || (c == '\r') || (c == '\n'));
 371 }
 372
 373 std::string decodeBase64(const std::string& encoded_string)
 374 {
 375   int in_len = encoded_string.size();
 376   int i = 0;
 377   int j = 0;
 378   int in_ = 0;
 379   unsigned char char_array_4[4], char_array_3[3];
 380   std::string ret;
 381
 382   while (in_len-- && ( encoded_string[in_] != '=')) {
 383     if (is_whitespace( encoded_string[in_])) {
 384         in_++;
 385         continue;
 386     }
 387
 388     if (!is_base64(encoded_string[in_])) {
 389         break;
 390     }
 391
 392     char_array_4[i++] = encoded_string[in_]; in_++;
 393     if (i ==4) {
 394       for (i = 0; i <4; i++)
 395         char_array_4[i] = base64_decode_map[char_array_4[i]];
 396
 397       char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
 398       char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
 399       char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
 400
 401       for (i = 0; (i < 3); i++)
 402         ret += char_array_3[i];
 403       i = 0;
 404     }
 405   }
 406
 407   if (i) {
 408     for (j = i; j <4; j++)
 409       char_array_4[j] = 0;
 410
 411     for (j = 0; j <4; j++)
 412       char_array_4[j] = base64_decode_map[char_array_4[j]];
 413
 414     char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
 415     char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
 416     char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
 417
 418     for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
 419   }
 420
 421   return ret;
 422 }
 423
 424 const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
 425
 426 std::string encodeHex(const std::string& bytes)
 427 {
 428   std::string hex;
 429   size_t count = bytes.size();
 430   for (unsigned int i=0; i<count;++i) {
 431       unsigned char c = bytes[i];
 432       hex.push_back(hexChar[c >> 4]);
 433       hex.push_back(hexChar[c & 0x0f]);
 434   }
 435
 436   return hex;
 437 }
 438
 439 std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
 440 {
 441   std::string hex;
 442   for (unsigned int i=0; i<length;++i) {
 443       unsigned char c = *rawBytes++;
 444       hex.push_back(hexChar[c >> 4]);
 445       hex.push_back(hexChar[c & 0x0f]);
 446   }
 447
 448   return hex;
 449 }
 450
 451 //------------------------------------------------------------------------------
 452 std::string unescape(const char* s)
 453 {
 454   std::string r;
 455   while( *s )
 456   {
 457     if( *s != '\\' )
 458     {
 459       r += *s++;
 460       continue;
 461     }
 462
 463     if( !*++s )
 464       break;
 465
 466     if (*s == '\\') {
 467         r += '\\';
 468     } else if (*s == 'n') {
 469         r += '\n';
 470     } else if (*s == 'r') {
 471         r += '\r';
 472     } else if (*s == 't') {
 473         r += '\t';
 474     } else if (*s == 'v') {
 475         r += '\v';
 476     } else if (*s == 'f') {
 477         r += '\f';
 478     } else if (*s == 'a') {
 479         r += '\a';
 480     } else if (*s == 'b') {
 481         r += '\b';
 482     } else if (*s == 'x') {
 483         if (!*++s)
 484             break;
 485         int v = 0;
 486         for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
 487             v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
 488         r += v;
 489         continue;
 490
 491     } else if (*s >= '0' && *s <= '7') {
 492         int v = *s++ - '0';
 493         for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
 494             v = v * 8 + *s - '0';
 495         r += v;
 496         continue;
 497
 498     } else {
 499         r += *s;
 500     }
 501     s++;
 502   }
 503   return r;
 504 }
 505
 506 string sanitizePrintfFormat(const string& input)
 507 {
 508     string::size_type i = input.find("%n");
 509     if (i != string::npos) {
 510         SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
 511         return string();
 512     }
 513
 514     return input;
 515 }
 516
 517 } // end namespace strutils
 518
 519 } // end namespace simgear