Fix missing include in simgear/misc/strutils.cxx

[simgear.git] / simgear / misc / strutils.cxx
diff --git a/simgear/misc/strutils.cxx b/simgear/misc/strutils.cxx

index 17e39c95eaf4ca29a027f0ad0836ee42f918ad52..7a2ffa30dff43a7e6251b603e4a3e7824bc9862d 100644 (file)
--- a/simgear/misc/strutils.cxx
+++ b/simgear/misc/strutils.cxx
@@ -1,71 +1,649 @@
  // String utilities.
  //
-// Written by Bernie Bright, 1998
+// Written by Bernie Bright, started 1998
  //
-// Copyright (C) 1998  Bernie Bright - bbright@c031.aone.net.au
+// Copyright (C) 1998  Bernie Bright - bbright@bigpond.net.au
  //
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of the
-// License, or (at your option) any later version.
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Library General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
  //
-// This program is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-// General Public License for more details.
+// Library General Public License for more details.
  //
  // You should have received a copy of the GNU General Public License
  // along with this program; if not, write to the Free Software
-// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  //
  // $Id$
  
-#ifdef HAVE_CONFIG_H
-#  include <config.h>
-#endif
+#include <ctype.h>
+#include <cstring>
+#include <sstream>
+#include <algorithm>
+#include <string.h>             // strerror_r() and strerror_s()
+#include <errno.h>
  
  #include "strutils.hxx"
  
-const string whitespace = " \n\r\t";
+#include <simgear/debug/logstream.hxx>
+#include <simgear/package/md5.h>
+#include <simgear/compiler.h>   // SG_WINDOWS
+#include <simgear/structure/exception.hxx>
  
-//
-string
-trimleft( const string& s, const string& trimmings )
-{
-    string result;
-    string::size_type pos = s.find_first_not_of( trimmings );
-    if ( pos != string::npos )
+using std::string;
+using std::vector;
+using std::stringstream;
+
+namespace simgear {
+    namespace strutils {
+
+       /*
+        * utf8ToLatin1() convert utf8 to latin, useful for accent character (i.e éâàîè...)
+        */
+       template <typename Iterator> size_t get_length (Iterator p) {
+               unsigned char c = static_cast<unsigned char> (*p);
+               if (c < 0x80) return 1;
+               else if (!(c & 0x20)) return 2;
+               else if (!(c & 0x10)) return 3;
+               else if (!(c & 0x08)) return 4;
+               else if (!(c & 0x04)) return 5;
+               else return 6;
+       }
+
+       typedef unsigned int value_type;
+       template <typename Iterator> value_type get_value (Iterator p) {
+               size_t len = get_length (p);
+               if (len == 1) return *p;
+               value_type res = static_cast<unsigned char> ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 );
+               for (--len; len; --len) {
+                       value_type next_byte = static_cast<unsigned char> (*(++p)) - 0x80;
+                       if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8
+                       res |= next_byte << ((len - 1) * 6);
+                       }
+               return res;
+       }
+
+       string utf8ToLatin1( string& s_utf8 ) {
+               string s_latin1;
+               for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) {
+                       value_type value = get_value<string::iterator&>(p);
+                       if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1
+                       if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value);
+                       s_latin1 += static_cast<char>(value);
+               }
+               return s_latin1;
+       }
+
+       /**
+        * 
+        */
+       static vector<string>
+       split_whitespace( const string& str, int maxsplit )
+       {
+           vector<string> result;
+           string::size_type len = str.length();
+           string::size_type i = 0;
+           string::size_type j;
+           int countsplit = 0;
+
+           while (i < len)
+           {
+               while (i < len && isspace((unsigned char)str[i]))
+               {
+                   ++i;
+               }
+
+               j = i;
+
+               while (i < len && !isspace((unsigned char)str[i]))
+               {
+                   ++i;
+               }
+
+               if (j < i)
+               {
+                   result.push_back( str.substr(j, i-j) );
+                   ++countsplit;
+                   while (i < len && isspace((unsigned char)str[i]))
+                   {
+                       ++i;
+                   }
+
+                   if (maxsplit && (countsplit >= maxsplit) && i < len)
+                   {
+                       result.push_back( str.substr( i, len-i ) );
+                       i = len;
+                   }
+               }
+           }
+
+           return result;
+       }
+
+       /**
+        * 
+        */
+       vector<string>
+       split( const string& str, const char* sep, int maxsplit )
+       {
+           if (sep == 0)
+               return split_whitespace( str, maxsplit );
+
+           vector<string> result;
+           int n = std::strlen( sep );
+           if (n == 0)
+           {
+               // Error: empty separator string
+               return result;
+           }
+           const char* s = str.c_str();
+           string::size_type len = str.length();
+           string::size_type i = 0;
+           string::size_type j = 0;
+           int splitcount = 0;
+
+           while (i+n <= len)
+           {
+               if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
+               {
+                   result.push_back( str.substr(j,i-j) );
+                   i = j = i + n;
+                   ++splitcount;
+                   if (maxsplit && (splitcount >= maxsplit))
+                       break;
+               }
+               else
+               {
+                   ++i;
+               }
+           }
+
+           result.push_back( str.substr(j,len-j) );
+           return result;
+       }
+
+       /**
+        * The lstrip(), rstrip() and strip() functions are implemented
+        * in do_strip() which uses an additional parameter to indicate what
+        * type of strip should occur.
+        */
+       const int LEFTSTRIP = 0;
+       const int RIGHTSTRIP = 1;
+       const int BOTHSTRIP = 2;
+
+       static string
+       do_strip( const string& s, int striptype )
+       {
+           string::size_type len = s.length();
+           if( len == 0 ) // empty string is trivial
+               return s;
+           string::size_type i = 0;
+           if (striptype != RIGHTSTRIP)
+           {
+               while (i < len && isspace(s[i]))
+               {
+                   ++i;
+               }
+           }
+
+           string::size_type j = len;
+           if (striptype != LEFTSTRIP)
+           {
+               do
+               {
+                   --j;
+               }
+               while (j >= 1 && isspace(s[j]));
+               ++j;
+           }
+
+           if (i == 0 && j == len)
+           {
+               return s;
+           }
+           else
+           {
+               return s.substr( i, j - i );
+           }
+       }
+
+       string
+       lstrip( const string& s )
+       {
+           return do_strip( s, LEFTSTRIP );
+       }
+
+       string
+       rstrip( const string& s )
+       {
+           return do_strip( s, RIGHTSTRIP );
+       }
+
+       string
+       strip( const string& s )
+       {
+           return do_strip( s, BOTHSTRIP );
+       }
+
+       string 
+       rpad( const string & s, string::size_type length, char c )
+       {
+           string::size_type l = s.length();
+           if( l >= length ) return s;
+           string reply = s;
+           return reply.append( length-l, c );
+       }
+
+       string 
+       lpad( const string & s, size_t length, char c )
+       {
+           string::size_type l = s.length();
+           if( l >= length ) return s;
+           string reply = s;
+           return reply.insert( 0, length-l, c );
+       }
+
+       bool
+       starts_with( const string & s, const string & substr )
+       {
+         return s.compare(0, substr.length(), substr) == 0;
+       }
+
+       bool
+       ends_with( const string & s, const string & substr )
+       {
+         if( substr.length() > s.length() )
+           return false;
+         return s.compare( s.length() - substr.length(),
+                           substr.length(),
+                           substr ) == 0;
+       }
+
+    string simplify(const string& s)
      {
-        result.assign( s.substr( pos ) );
+        string result; // reserve size of 's'?
+        string::const_iterator it = s.begin(),
+            end = s.end();
+    
+    // advance to first non-space char - simplifes logic in main loop,
+    // since we can always prepend a single space when we see a 
+    // space -> non-space transition
+        for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
+        
+        bool lastWasSpace = false;
+        for (; it != end; ++it) {
+            char c = *it;
+            if (isspace(c)) {
+                lastWasSpace = true;
+                continue;
+            }
+            
+            if (lastWasSpace) {
+                result.push_back(' ');
+            }
+            
+            lastWasSpace = false;
+            result.push_back(c);
+        }
+        
+        return result;
      }
+    
+    int to_int(const std::string& s, int base)
+    {
+        stringstream ss(s);
+        switch (base) {
+        case 8:      ss >> std::oct; break;
+        case 16:     ss >> std::hex; break;
+        default: break;
+        }
+        
+        int result;
+        ss >> result;
+        return result;
+    }
+    
+    int compare_versions(const string& v1, const string& v2)
+    {
+        vector<string> v1parts(split(v1, "."));
+        vector<string> v2parts(split(v2, "."));
  
+        int lastPart = std::min(v1parts.size(), v2parts.size());
+        for (int part=0; part < lastPart; ++part) {
+            int part1 = to_int(v1parts[part]);
+            int part2 = to_int(v2parts[part]);
+
+            if (part1 != part2) {
+                return part1 - part2;
+            }
+        } // of parts iteration
+
+        // reached end - longer wins
+        return v1parts.size() - v2parts.size();
+    }
+    
+    string join(const string_list& l, const string& joinWith)
+    {
+        string result;
+        unsigned int count = l.size();
+        for (unsigned int i=0; i < count; ++i) {
+            result += l[i];
+            if (i < (count - 1)) {
+                result += joinWith;
+            }
+        }
+        
+        return result;
+    }
+    
+    string uppercase(const string &s) {
+      string rslt(s);
+      for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
+        *p = toupper(*p);
+      }
+      return rslt;
+    }
+
+    string lowercase(const string &s) {
+      string rslt(s);
+      for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
+        *p = tolower(*p);
+      }
+      return rslt;
+    }
+
+    void lowercase(string &s) {
+      for(string::iterator p = s.begin(); p != s.end(); p++){
+        *p = tolower(*p);
+      }
+    }
+    
+#if defined(SG_WINDOWS)
+
+#include <windows.h>
+    
+static WCharVec convertMultiByteToWString(DWORD encoding, const std::string& a)
+{
+    WCharVec result;
+    DWORD flags = 0;
+    int requiredWideChars = MultiByteToWideChar(encoding, flags, 
+                        a.c_str(), a.size(),
+                        NULL, 0);
+    result.resize(requiredWideChars);
+    MultiByteToWideChar(encoding, flags, a.c_str(), a.size(),
+                        result.data(), result.size());
      return result;
  }
  
-//
-string
-trimright( const string& s, const string& trimmings )
+WCharVec convertUtf8ToWString(const std::string& a)
  {
-    string result;
+    return convertMultiByteToWString(CP_UTF8, a);
+}
  
-    string::size_type pos = s.find_last_not_of( trimmings );
-    if ( pos == string::npos )
-    {
-       // Not found, return the original string.
-       result = s;
+#endif
+
+std::string convertWindowsLocal8BitToUtf8(const std::string& a)
+{
+#ifdef SG_WINDOWS
+    DWORD flags = 0;
+    WCharVec wideString = convertMultiByteToWString(CP_ACP, a);
+   
+    // convert down to UTF-8
+    std::vector<char> result;
+    int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
+                                                wideString.data(), wideString.size(),
+                                                NULL, 0, NULL, NULL);
+    result.resize(requiredUTF8Chars);
+    WideCharToMultiByte(CP_UTF8, flags,
+                        wideString.data(), wideString.size(),
+                        result.data(), result.size(), NULL, NULL);
+    return std::string(result.data(), result.size());
+#else
+    return a;
+#endif
+}
+
+//------------------------------------------------------------------------------
+std::string md5(const unsigned char* data, size_t num)
+{
+  SG_MD5_CTX md5_ctx;
+  SG_MD5Init(&md5_ctx);
+  SG_MD5Update(&md5_ctx, data, num);
+
+  unsigned char digest[MD5_DIGEST_LENGTH];
+  SG_MD5Final(digest, &md5_ctx);
+
+  return encodeHex(digest, MD5_DIGEST_LENGTH);
+}
+
+//------------------------------------------------------------------------------
+std::string md5(const char* data, size_t num)
+{
+  return md5(reinterpret_cast<const unsigned char*>(data), num);
+}
+
+//------------------------------------------------------------------------------
+std::string md5(const std::string& str)
+{
+  return md5(reinterpret_cast<const unsigned char*>(str.c_str()), str.size());
+}
+
+//------------------------------------------------------------------------------
+static const std::string base64_chars =
+"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+"abcdefghijklmnopqrstuvwxyz"
+"0123456789+/";
+
+static const unsigned char base64_decode_map[128] =
+{
+    127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+    127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+    127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+    127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
+    127, 127, 127,  62, 127, 127, 127,  63,  52,  53,
+    54,  55,  56,  57,  58,  59,  60,  61, 127, 127,
+    127,  64, 127, 127, 127,   0,   1,   2,   3,   4,
+    5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
+    25, 127, 127, 127, 127, 127, 127,  26,  27,  28,
+    29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
+    39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
+    49,  50,  51, 127, 127, 127, 127, 127
+};
+ 
+        
+static inline bool is_base64(unsigned char c) {
+  return (isalnum(c) || (c == '+') || (c == '/'));
+}
+
+static bool is_whitespace(unsigned char c) {
+    return ((c == ' ') || (c == '\r') || (c == '\n'));
+}
+
+void decodeBase64(const std::string& encoded_string, std::vector<unsigned char>& ret)
+{
+  int in_len = encoded_string.size();
+  int i = 0;
+  int j = 0;
+  int in_ = 0;
+  unsigned char char_array_4[4], char_array_3[3];
+  
+  while (in_len-- && ( encoded_string[in_] != '=')) {
+    if (is_whitespace( encoded_string[in_])) {
+        in_++; 
+        continue;
+    }
+    
+    if (!is_base64(encoded_string[in_])) {
+        break;
      }
-    else
+    
+    char_array_4[i++] = encoded_string[in_]; in_++;
+    if (i ==4) {
+      for (i = 0; i <4; i++)
+        char_array_4[i] = base64_decode_map[char_array_4[i]];
+      
+      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+      
+      for (i = 0; (i < 3); i++)
+        ret.push_back(char_array_3[i]);
+      i = 0;
+    }
+  }
+  
+  if (i) {
+    for (j = i; j <4; j++)
+      char_array_4[j] = 0;
+    
+    for (j = 0; j <4; j++)
+      char_array_4[j] = base64_decode_map[char_array_4[j]];
+    
+    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+    
+    for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
+  }
+}  
+
+//------------------------------------------------------------------------------
+const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+
+std::string encodeHex(const std::string& bytes)
+{
+  return encodeHex(
+    reinterpret_cast<const unsigned char*>(bytes.c_str()),
+    bytes.size()
+  );
+}
+
+std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
+{
+  std::string hex(length * 2, '\0');
+  for (unsigned int i=0; i<length;++i) {
+      unsigned char c = *rawBytes++;
+      hex[i * 2] = hexChar[c >> 4];
+      hex[i * 2 + 1] = hexChar[c & 0x0f];
+  }
+  
+  return hex;
+}
+
+//------------------------------------------------------------------------------
+std::string unescape(const char* s)
+{
+  std::string r;
+  while( *s )
+  {
+    if( *s != '\\' )
      {
-        result.assign( s.substr( 0, pos+1 ) );
+      r += *s++;
+      continue;
      }
  
-    return result;
+    if( !*++s )
+      break;
+
+    if (*s == '\\') {
+        r += '\\';
+    } else if (*s == 'n') {
+        r += '\n';
+    } else if (*s == 'r') {
+        r += '\r';
+    } else if (*s == 't') {
+        r += '\t';
+    } else if (*s == 'v') {
+        r += '\v';
+    } else if (*s == 'f') {
+        r += '\f';
+    } else if (*s == 'a') {
+        r += '\a';
+    } else if (*s == 'b') {
+        r += '\b';
+    } else if (*s == 'x') {
+        if (!*++s)
+            break;
+        int v = 0;
+        for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
+            v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
+        r += v;
+        continue;
+
+    } else if (*s >= '0' && *s <= '7') {
+        int v = *s++ - '0';
+        for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
+            v = v * 8 + *s - '0';
+        r += v;
+        continue;
+
+    } else {
+        r += *s;
+    }
+    s++;
+  }
+  return r;
  }
  
-//
-string
-trim( const string& s, const string& trimmings )
+string sanitizePrintfFormat(const string& input)
  {
-    return trimright( trimleft( s, trimmings ), trimmings );
+    string::size_type i = input.find("%n");
+    if (i != string::npos) {
+        SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
+        return string();
+    }
+    
+    return input;
+}
+
+std::string error_string(int errnum)
+{
+  char buf[512];                // somewhat arbitrary...
+  // This could be simplified with C11 (annex K, optional...), which offers:
+  //
+  //   errno_t strerror_s( char *buf, rsize_t bufsz, errno_t errnum );
+  //   size_t strerrorlen_s( errno_t errnum );
+
+#if defined(SG_WINDOWS)
+  errno_t retcode;
+  // Always makes the string in 'buf' null-terminated
+  retcode = strerror_s(buf, sizeof(buf), errnum);
+#elif defined(_GNU_SOURCE)
+  return std::string(strerror_r(errnum, buf, sizeof(buf)));
+#elif _POSIX_C_SOURCE >= 200112L
+  int retcode;
+  // POSIX.1-2001 and POSIX.1-2008
+  retcode = strerror_r(errnum, buf, sizeof(buf));
+#else
+#error "Could not find a thread-safe alternative to strerror()."
+#endif
+
+#if !defined(_GNU_SOURCE)
+  if (retcode) {
+    std::string msg = "unable to get error message for a given error number";
+    // C++11 would make this shorter with std::to_string()
+    std::ostringstream ostr;
+    ostr << errnum;
+
+#if !defined(SG_WINDOWS)
+    if (retcode == ERANGE) {    // more specific error message in this case
+      msg = std::string("buffer too small to hold the error message for "
+                        "the specified error number");
+    }
+#endif
+
+    throw sg_error(msg, ostr.str());
+  }
+
+  return std::string(buf);
+#endif  // !defined(_GNU_SOURCE)
  }
  
+} // end namespace strutils
+    
+} // end namespace simgear