3 // Written by Bernie Bright, started 1998
5 // Copyright (C) 1998 Bernie Bright - bbright@bigpond.net.au
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Library General Public
9 // License as published by the Free Software Foundation; either
10 // version 2 of the License, or (at your option) any later version.
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Library General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with this program; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
28 #include "strutils.hxx"
30 #include <simgear/debug/logstream.hxx>
31 #include <simgear/package/md5.h>
35 using std::stringstream;
41 * utf8ToLatin1() convert utf8 to latin, useful for accent character (i.e éâàîè...)
43 template <typename Iterator> size_t get_length (Iterator p) {
44 unsigned char c = static_cast<unsigned char> (*p);
45 if (c < 0x80) return 1;
46 else if (!(c & 0x20)) return 2;
47 else if (!(c & 0x10)) return 3;
48 else if (!(c & 0x08)) return 4;
49 else if (!(c & 0x04)) return 5;
53 typedef unsigned int value_type;
54 template <typename Iterator> value_type get_value (Iterator p) {
55 size_t len = get_length (p);
56 if (len == 1) return *p;
57 value_type res = static_cast<unsigned char> ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 );
58 for (--len; len; --len) {
59 value_type next_byte = static_cast<unsigned char> (*(++p)) - 0x80;
60 if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8
61 res |= next_byte << ((len - 1) * 6);
66 string utf8ToLatin1( string& s_utf8 ) {
68 for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) {
69 value_type value = get_value<string::iterator&>(p);
70 if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1
71 if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value);
72 s_latin1 += static_cast<char>(value);
81 split_whitespace( const string& str, int maxsplit )
83 vector<string> result;
84 string::size_type len = str.length();
85 string::size_type i = 0;
91 while (i < len && isspace((unsigned char)str[i]))
98 while (i < len && !isspace((unsigned char)str[i]))
105 result.push_back( str.substr(j, i-j) );
107 while (i < len && isspace((unsigned char)str[i]))
112 if (maxsplit && (countsplit >= maxsplit) && i < len)
114 result.push_back( str.substr( i, len-i ) );
127 split( const string& str, const char* sep, int maxsplit )
130 return split_whitespace( str, maxsplit );
132 vector<string> result;
133 int n = std::strlen( sep );
136 // Error: empty separator string
139 const char* s = str.c_str();
140 string::size_type len = str.length();
141 string::size_type i = 0;
142 string::size_type j = 0;
147 if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
149 result.push_back( str.substr(j,i-j) );
152 if (maxsplit && (splitcount >= maxsplit))
161 result.push_back( str.substr(j,len-j) );
166 * The lstrip(), rstrip() and strip() functions are implemented
167 * in do_strip() which uses an additional parameter to indicate what
168 * type of strip should occur.
170 const int LEFTSTRIP = 0;
171 const int RIGHTSTRIP = 1;
172 const int BOTHSTRIP = 2;
175 do_strip( const string& s, int striptype )
177 string::size_type len = s.length();
178 if( len == 0 ) // empty string is trivial
180 string::size_type i = 0;
181 if (striptype != RIGHTSTRIP)
183 while (i < len && isspace(s[i]))
189 string::size_type j = len;
190 if (striptype != LEFTSTRIP)
196 while (j >= 1 && isspace(s[j]));
200 if (i == 0 && j == len)
206 return s.substr( i, j - i );
211 lstrip( const string& s )
213 return do_strip( s, LEFTSTRIP );
217 rstrip( const string& s )
219 return do_strip( s, RIGHTSTRIP );
223 strip( const string& s )
225 return do_strip( s, BOTHSTRIP );
229 rpad( const string & s, string::size_type length, char c )
231 string::size_type l = s.length();
232 if( l >= length ) return s;
234 return reply.append( length-l, c );
238 lpad( const string & s, size_t length, char c )
240 string::size_type l = s.length();
241 if( l >= length ) return s;
243 return reply.insert( 0, length-l, c );
247 starts_with( const string & s, const string & substr )
249 return s.compare(0, substr.length(), substr) == 0;
253 ends_with( const string & s, const string & substr )
255 if( substr.length() > s.length() )
257 return s.compare( s.length() - substr.length(),
262 string simplify(const string& s)
264 string result; // reserve size of 's'?
265 string::const_iterator it = s.begin(),
268 // advance to first non-space char - simplifes logic in main loop,
269 // since we can always prepend a single space when we see a
270 // space -> non-space transition
271 for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
273 bool lastWasSpace = false;
274 for (; it != end; ++it) {
282 result.push_back(' ');
285 lastWasSpace = false;
292 int to_int(const std::string& s, int base)
296 case 8: ss >> std::oct; break;
297 case 16: ss >> std::hex; break;
306 int compare_versions(const string& v1, const string& v2)
308 vector<string> v1parts(split(v1, "."));
309 vector<string> v2parts(split(v2, "."));
311 int lastPart = std::min(v1parts.size(), v2parts.size());
312 for (int part=0; part < lastPart; ++part) {
313 int part1 = to_int(v1parts[part]);
314 int part2 = to_int(v2parts[part]);
316 if (part1 != part2) {
317 return part1 - part2;
319 } // of parts iteration
321 // reached end - longer wins
322 return v1parts.size() - v2parts.size();
325 string join(const string_list& l, const string& joinWith)
328 unsigned int count = l.size();
329 for (unsigned int i=0; i < count; ++i) {
331 if (i < (count - 1)) {
339 string uppercase(const string &s) {
341 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
347 string lowercase(const string &s) {
349 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
355 void lowercase(string &s) {
356 for(string::iterator p = s.begin(); p != s.end(); p++){
361 #if defined(SG_WINDOWS)
365 static WCharVec convertMultiByteToWString(DWORD encoding, const std::string& a)
369 int requiredWideChars = MultiByteToWideChar(encoding, flags,
372 result.resize(requiredWideChars);
373 MultiByteToWideChar(encoding, flags, a.c_str(), a.size(),
374 result.data(), result.size());
378 WCharVec convertUtf8ToWString(const std::string& a)
380 return convertMultiByteToWString(CP_UTF8, a);
385 std::string convertWindowsLocal8BitToUtf8(const std::string& a)
389 WCharVec wideString = convertMultiByteToWString(CP_ACP, a);
391 // convert down to UTF-8
392 std::vector<char> result;
393 int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
394 wideString.data(), wideString.size(),
395 NULL, 0, NULL, NULL);
396 result.resize(requiredUTF8Chars);
397 WideCharToMultiByte(CP_UTF8, flags,
398 wideString.data(), wideString.size(),
399 result.data(), result.size(), NULL, NULL);
400 return std::string(result.data(), result.size());
406 //------------------------------------------------------------------------------
407 std::string md5(const unsigned char* data, size_t num)
410 SG_MD5Init(&md5_ctx);
411 SG_MD5Update(&md5_ctx, data, num);
413 unsigned char digest[MD5_DIGEST_LENGTH];
414 SG_MD5Final(digest, &md5_ctx);
416 return encodeHex(digest, MD5_DIGEST_LENGTH);
419 //------------------------------------------------------------------------------
420 std::string md5(const char* data, size_t num)
422 return md5(reinterpret_cast<const unsigned char*>(data), num);
425 //------------------------------------------------------------------------------
426 std::string md5(const std::string& str)
428 return md5(reinterpret_cast<const unsigned char*>(str.c_str()), str.size());
431 //------------------------------------------------------------------------------
432 static const std::string base64_chars =
433 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
434 "abcdefghijklmnopqrstuvwxyz"
437 static const unsigned char base64_decode_map[128] =
439 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
440 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
441 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
442 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
443 127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
444 54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
445 127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
446 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
447 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
448 25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
449 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
450 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
451 49, 50, 51, 127, 127, 127, 127, 127
455 static inline bool is_base64(unsigned char c) {
456 return (isalnum(c) || (c == '+') || (c == '/'));
459 static bool is_whitespace(unsigned char c) {
460 return ((c == ' ') || (c == '\r') || (c == '\n'));
463 void decodeBase64(const std::string& encoded_string, std::vector<unsigned char>& ret)
465 int in_len = encoded_string.size();
469 unsigned char char_array_4[4], char_array_3[3];
471 while (in_len-- && ( encoded_string[in_] != '=')) {
472 if (is_whitespace( encoded_string[in_])) {
477 if (!is_base64(encoded_string[in_])) {
481 char_array_4[i++] = encoded_string[in_]; in_++;
483 for (i = 0; i <4; i++)
484 char_array_4[i] = base64_decode_map[char_array_4[i]];
486 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
487 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
488 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
490 for (i = 0; (i < 3); i++)
491 ret.push_back(char_array_3[i]);
497 for (j = i; j <4; j++)
500 for (j = 0; j <4; j++)
501 char_array_4[j] = base64_decode_map[char_array_4[j]];
503 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
504 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
505 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
507 for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
511 //------------------------------------------------------------------------------
512 const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
514 std::string encodeHex(const std::string& bytes)
517 reinterpret_cast<const unsigned char*>(bytes.c_str()),
522 std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
524 std::string hex(length * 2, '\0');
525 for (unsigned int i=0; i<length;++i) {
526 unsigned char c = *rawBytes++;
527 hex[i * 2] = hexChar[c >> 4];
528 hex[i * 2 + 1] = hexChar[c & 0x0f];
534 //------------------------------------------------------------------------------
535 std::string unescape(const char* s)
551 } else if (*s == 'n') {
553 } else if (*s == 'r') {
555 } else if (*s == 't') {
557 } else if (*s == 'v') {
559 } else if (*s == 'f') {
561 } else if (*s == 'a') {
563 } else if (*s == 'b') {
565 } else if (*s == 'x') {
569 for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
570 v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
574 } else if (*s >= '0' && *s <= '7') {
576 for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
577 v = v * 8 + *s - '0';
589 string sanitizePrintfFormat(const string& input)
591 string::size_type i = input.find("%n");
592 if (i != string::npos) {
593 SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
600 } // end namespace strutils
602 } // end namespace simgear