simgear/misc/strutils.cxx

   1 // String utilities.
   2 //
   3 // Written by Bernie Bright, started 1998
   4 //
   5 // Copyright (C) 1998  Bernie Bright - bbright@bigpond.net.au
   6 //
   7 // This library is free software; you can redistribute it and/or
   8 // modify it under the terms of the GNU Library General Public
   9 // License as published by the Free Software Foundation; either
  10 // version 2 of the License, or (at your option) any later version.
  11 //
  12 // This library is distributed in the hope that it will be useful,
  13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 // Library General Public License for more details.
  16 //
  17 // You should have received a copy of the GNU General Public License
  18 // along with this program; if not, write to the Free Software
  19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  20 //
  21 // $Id$
  22
  23 #include <ctype.h>
  24 #include <cstring>
  25 #include <sstream>
  26
  27 #include "strutils.hxx"
  28
  29 #include <simgear/debug/logstream.hxx>
  30
  31 using std::string;
  32 using std::vector;
  33 using std::stringstream;
  34
  35 namespace simgear {
  36     namespace strutils {
  37
  38         /**
  39          *
  40          */
  41         static vector<string>
  42         split_whitespace( const string& str, int maxsplit )
  43         {
  44             vector<string> result;
  45             string::size_type len = str.length();
  46             string::size_type i = 0;
  47             string::size_type j;
  48             int countsplit = 0;
  49
  50             while (i < len)
  51             {
  52                 while (i < len && isspace((unsigned char)str[i]))
  53                 {
  54                     ++i;
  55                 }
  56
  57                 j = i;
  58
  59                 while (i < len && !isspace((unsigned char)str[i]))
  60                 {
  61                     ++i;
  62                 }
  63
  64                 if (j < i)
  65                 {
  66                     result.push_back( str.substr(j, i-j) );
  67                     ++countsplit;
  68                     while (i < len && isspace((unsigned char)str[i]))
  69                     {
  70                         ++i;
  71                     }
  72
  73                     if (maxsplit && (countsplit >= maxsplit) && i < len)
  74                     {
  75                         result.push_back( str.substr( i, len-i ) );
  76                         i = len;
  77                     }
  78                 }
  79             }
  80
  81             return result;
  82         }
  83
  84         /**
  85          *
  86          */
  87         vector<string>
  88         split( const string& str, const char* sep, int maxsplit )
  89         {
  90             if (sep == 0)
  91                 return split_whitespace( str, maxsplit );
  92
  93             vector<string> result;
  94             int n = std::strlen( sep );
  95             if (n == 0)
  96             {
  97                 // Error: empty separator string
  98                 return result;
  99             }
 100             const char* s = str.c_str();
 101             string::size_type len = str.length();
 102             string::size_type i = 0;
 103             string::size_type j = 0;
 104             int splitcount = 0;
 105
 106             while (i+n <= len)
 107             {
 108                 if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
 109                 {
 110                     result.push_back( str.substr(j,i-j) );
 111                     i = j = i + n;
 112                     ++splitcount;
 113                     if (maxsplit && (splitcount >= maxsplit))
 114                         break;
 115                 }
 116                 else
 117                 {
 118                     ++i;
 119                 }
 120             }
 121
 122             result.push_back( str.substr(j,len-j) );
 123             return result;
 124         }
 125
 126         /**
 127          * The lstrip(), rstrip() and strip() functions are implemented
 128          * in do_strip() which uses an additional parameter to indicate what
 129          * type of strip should occur.
 130          */
 131         const int LEFTSTRIP = 0;
 132         const int RIGHTSTRIP = 1;
 133         const int BOTHSTRIP = 2;
 134
 135         static string
 136         do_strip( const string& s, int striptype )
 137         {
 138             string::size_type len = s.length();
 139             if( len == 0 ) // empty string is trivial
 140                 return s;
 141             string::size_type i = 0;
 142             if (striptype != RIGHTSTRIP)
 143             {
 144                 while (i < len && isspace(s[i]))
 145                 {
 146                     ++i;
 147                 }
 148             }
 149
 150             string::size_type j = len;
 151             if (striptype != LEFTSTRIP)
 152             {
 153                 do
 154                 {
 155                     --j;
 156                 }
 157                 while (j >= 1 && isspace(s[j]));
 158                 ++j;
 159             }
 160
 161             if (i == 0 && j == len)
 162             {
 163                 return s;
 164             }
 165             else
 166             {
 167                 return s.substr( i, j - i );
 168             }
 169         }
 170
 171         string
 172         lstrip( const string& s )
 173         {
 174             return do_strip( s, LEFTSTRIP );
 175         }
 176
 177         string
 178         rstrip( const string& s )
 179         {
 180             return do_strip( s, RIGHTSTRIP );
 181         }
 182
 183         string
 184         strip( const string& s )
 185         {
 186             return do_strip( s, BOTHSTRIP );
 187         }
 188
 189         string
 190         rpad( const string & s, string::size_type length, char c )
 191         {
 192             string::size_type l = s.length();
 193             if( l >= length ) return s;
 194             string reply = s;
 195             return reply.append( length-l, c );
 196         }
 197
 198         string
 199         lpad( const string & s, size_t length, char c )
 200         {
 201             string::size_type l = s.length();
 202             if( l >= length ) return s;
 203             string reply = s;
 204             return reply.insert( 0, length-l, c );
 205         }
 206
 207         bool
 208         starts_with( const string & s, const string & substr )
 209         {
 210           return s.compare(0, substr.length(), substr) == 0;
 211         }
 212
 213         bool
 214         ends_with( const string & s, const string & substr )
 215         {
 216           if( substr.length() > s.length() )
 217             return false;
 218           return s.compare( s.length() - substr.length(),
 219                             substr.length(),
 220                             substr ) == 0;
 221         }
 222
 223     string simplify(const string& s)
 224     {
 225         string result; // reserve size of 's'?
 226         string::const_iterator it = s.begin(),
 227             end = s.end();
 228
 229     // advance to first non-space char - simplifes logic in main loop,
 230     // since we can always prepend a single space when we see a
 231     // space -> non-space transition
 232         for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
 233
 234         bool lastWasSpace = false;
 235         for (; it != end; ++it) {
 236             char c = *it;
 237             if (isspace(c)) {
 238                 lastWasSpace = true;
 239                 continue;
 240             }
 241
 242             if (lastWasSpace) {
 243                 result.push_back(' ');
 244             }
 245
 246             lastWasSpace = false;
 247             result.push_back(c);
 248         }
 249
 250         return result;
 251     }
 252
 253     int to_int(const std::string& s, int base)
 254     {
 255         stringstream ss(s);
 256         switch (base) {
 257         case 8:      ss >> std::oct; break;
 258         case 16:     ss >> std::hex; break;
 259         default: break;
 260         }
 261
 262         int result;
 263         ss >> result;
 264         return result;
 265     }
 266
 267     int compare_versions(const string& v1, const string& v2)
 268     {
 269         vector<string> v1parts(split(v1, "."));
 270         vector<string> v2parts(split(v2, "."));
 271
 272         int lastPart = std::min(v1parts.size(), v2parts.size());
 273         for (int part=0; part < lastPart; ++part) {
 274             int part1 = to_int(v1parts[part]);
 275             int part2 = to_int(v2parts[part]);
 276
 277             if (part1 != part2) {
 278                 return part1 - part2;
 279             }
 280         } // of parts iteration
 281
 282         // reached end - longer wins
 283         return v1parts.size() - v2parts.size();
 284     }
 285
 286     string join(const string_list& l, const string& joinWith)
 287     {
 288         string result;
 289         unsigned int count = l.size();
 290         for (unsigned int i=0; i < count; ++i) {
 291             result += l[i];
 292             if (i < (count - 1)) {
 293                 result += joinWith;
 294             }
 295         }
 296
 297         return result;
 298     }
 299
 300     string uppercase(const string &s) {
 301       string rslt(s);
 302       for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
 303         *p = toupper(*p);
 304       }
 305       return rslt;
 306     }
 307
 308
 309 #ifdef SG_WINDOWS
 310     #include <windows.h>
 311 #endif
 312
 313 std::string convertWindowsLocal8BitToUtf8(const std::string& a)
 314 {
 315 #ifdef SG_WINDOWS
 316     DWORD flags = 0;
 317     std::vector<wchar_t> wideString;
 318
 319     // call to query transform size
 320     int requiredWideChars = MultiByteToWideChar(CP_ACP, flags, a.c_str(), a.size(),
 321                         NULL, 0);
 322     // allocate storage and call for real
 323     wideString.resize(requiredWideChars);
 324     MultiByteToWideChar(CP_ACP, flags, a.c_str(), a.size(),
 325                         wideString.data(), wideString.size());
 326
 327     // now convert back down to UTF-8
 328     std::vector<char> result;
 329     int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
 330                                                 wideString.data(), wideString.size(),
 331                                                 NULL, 0, NULL, NULL);
 332     result.resize(requiredUTF8Chars);
 333     WideCharToMultiByte(CP_UTF8, flags,
 334                         wideString.data(), wideString.size(),
 335                         result.data(), result.size(), NULL, NULL);
 336     return std::string(result.data(), result.size());
 337 #else
 338     return a;
 339 #endif
 340 }
 341
 342 static const std::string base64_chars =
 343 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 344 "abcdefghijklmnopqrstuvwxyz"
 345 "0123456789+/";
 346
 347 static const unsigned char base64_decode_map[128] =
 348 {
 349     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 350     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 351     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 352     127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
 353     127, 127, 127,  62, 127, 127, 127,  63,  52,  53,
 354     54,  55,  56,  57,  58,  59,  60,  61, 127, 127,
 355     127,  64, 127, 127, 127,   0,   1,   2,   3,   4,
 356     5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
 357     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
 358     25, 127, 127, 127, 127, 127, 127,  26,  27,  28,
 359     29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
 360     39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
 361     49,  50,  51, 127, 127, 127, 127, 127
 362 };
 363
 364
 365 static inline bool is_base64(unsigned char c) {
 366   return (isalnum(c) || (c == '+') || (c == '/'));
 367 }
 368
 369 static bool is_whitespace(unsigned char c) {
 370     return ((c == ' ') || (c == '\r') || (c == '\n'));
 371 }
 372
 373 void decodeBase64(const std::string& encoded_string, std::vector<unsigned char>& ret)
 374 {
 375   int in_len = encoded_string.size();
 376   int i = 0;
 377   int j = 0;
 378   int in_ = 0;
 379   unsigned char char_array_4[4], char_array_3[3];
 380
 381   while (in_len-- && ( encoded_string[in_] != '=')) {
 382     if (is_whitespace( encoded_string[in_])) {
 383         in_++;
 384         continue;
 385     }
 386
 387     if (!is_base64(encoded_string[in_])) {
 388         break;
 389     }
 390
 391     char_array_4[i++] = encoded_string[in_]; in_++;
 392     if (i ==4) {
 393       for (i = 0; i <4; i++)
 394         char_array_4[i] = base64_decode_map[char_array_4[i]];
 395
 396       char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
 397       char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
 398       char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
 399
 400       for (i = 0; (i < 3); i++)
 401         ret.push_back(char_array_3[i]);
 402       i = 0;
 403     }
 404   }
 405
 406   if (i) {
 407     for (j = i; j <4; j++)
 408       char_array_4[j] = 0;
 409
 410     for (j = 0; j <4; j++)
 411       char_array_4[j] = base64_decode_map[char_array_4[j]];
 412
 413     char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
 414     char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
 415     char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
 416
 417     for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
 418   }
 419 }
 420
 421 const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
 422
 423 std::string encodeHex(const std::string& bytes)
 424 {
 425   std::string hex;
 426   size_t count = bytes.size();
 427   for (unsigned int i=0; i<count;++i) {
 428       unsigned char c = bytes[i];
 429       hex.push_back(hexChar[c >> 4]);
 430       hex.push_back(hexChar[c & 0x0f]);
 431   }
 432
 433   return hex;
 434 }
 435
 436 std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
 437 {
 438   std::string hex;
 439   for (unsigned int i=0; i<length;++i) {
 440       unsigned char c = *rawBytes++;
 441       hex.push_back(hexChar[c >> 4]);
 442       hex.push_back(hexChar[c & 0x0f]);
 443   }
 444
 445   return hex;
 446 }
 447
 448 //------------------------------------------------------------------------------
 449 std::string unescape(const char* s)
 450 {
 451   std::string r;
 452   while( *s )
 453   {
 454     if( *s != '\\' )
 455     {
 456       r += *s++;
 457       continue;
 458     }
 459
 460     if( !*++s )
 461       break;
 462
 463     if (*s == '\\') {
 464         r += '\\';
 465     } else if (*s == 'n') {
 466         r += '\n';
 467     } else if (*s == 'r') {
 468         r += '\r';
 469     } else if (*s == 't') {
 470         r += '\t';
 471     } else if (*s == 'v') {
 472         r += '\v';
 473     } else if (*s == 'f') {
 474         r += '\f';
 475     } else if (*s == 'a') {
 476         r += '\a';
 477     } else if (*s == 'b') {
 478         r += '\b';
 479     } else if (*s == 'x') {
 480         if (!*++s)
 481             break;
 482         int v = 0;
 483         for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
 484             v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
 485         r += v;
 486         continue;
 487
 488     } else if (*s >= '0' && *s <= '7') {
 489         int v = *s++ - '0';
 490         for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
 491             v = v * 8 + *s - '0';
 492         r += v;
 493         continue;
 494
 495     } else {
 496         r += *s;
 497     }
 498     s++;
 499   }
 500   return r;
 501 }
 502
 503 string sanitizePrintfFormat(const string& input)
 504 {
 505     string::size_type i = input.find("%n");
 506     if (i != string::npos) {
 507         SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
 508         return string();
 509     }
 510
 511     return input;
 512 }
 513
 514 } // end namespace strutils
 515
 516 } // end namespace simgear