3 // Written by Bernie Bright, started 1998
5 // Copyright (C) 1998 Bernie Bright - bbright@bigpond.net.au
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Library General Public
9 // License as published by the Free Software Foundation; either
10 // version 2 of the License, or (at your option) any later version.
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Library General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with this program; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 #include "strutils.hxx"
29 #include <simgear/debug/logstream.hxx>
30 #include <simgear/package/md5.h>
34 using std::stringstream;
40 * utf8ToLatin1() convert utf8 to latin, useful for accent character (i.e éâàîè...)
42 template <typename Iterator> size_t get_length (Iterator p) {
43 unsigned char c = static_cast<unsigned char> (*p);
44 if (c < 0x80) return 1;
45 else if (!(c & 0x20)) return 2;
46 else if (!(c & 0x10)) return 3;
47 else if (!(c & 0x08)) return 4;
48 else if (!(c & 0x04)) return 5;
52 typedef unsigned int value_type;
53 template <typename Iterator> value_type get_value (Iterator p) {
54 size_t len = get_length (p);
55 if (len == 1) return *p;
56 value_type res = static_cast<unsigned char> ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 );
57 for (--len; len; --len) {
58 value_type next_byte = static_cast<unsigned char> (*(++p)) - 0x80;
59 if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8
60 res |= next_byte << ((len - 1) * 6);
65 string utf8ToLatin1( string& s_utf8 ) {
67 for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) {
68 value_type value = get_value<string::iterator&>(p);
69 if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1
70 if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value);
71 s_latin1 += static_cast<char>(value);
80 split_whitespace( const string& str, int maxsplit )
82 vector<string> result;
83 string::size_type len = str.length();
84 string::size_type i = 0;
90 while (i < len && isspace((unsigned char)str[i]))
97 while (i < len && !isspace((unsigned char)str[i]))
104 result.push_back( str.substr(j, i-j) );
106 while (i < len && isspace((unsigned char)str[i]))
111 if (maxsplit && (countsplit >= maxsplit) && i < len)
113 result.push_back( str.substr( i, len-i ) );
126 split( const string& str, const char* sep, int maxsplit )
129 return split_whitespace( str, maxsplit );
131 vector<string> result;
132 int n = std::strlen( sep );
135 // Error: empty separator string
138 const char* s = str.c_str();
139 string::size_type len = str.length();
140 string::size_type i = 0;
141 string::size_type j = 0;
146 if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
148 result.push_back( str.substr(j,i-j) );
151 if (maxsplit && (splitcount >= maxsplit))
160 result.push_back( str.substr(j,len-j) );
165 * The lstrip(), rstrip() and strip() functions are implemented
166 * in do_strip() which uses an additional parameter to indicate what
167 * type of strip should occur.
169 const int LEFTSTRIP = 0;
170 const int RIGHTSTRIP = 1;
171 const int BOTHSTRIP = 2;
174 do_strip( const string& s, int striptype )
176 string::size_type len = s.length();
177 if( len == 0 ) // empty string is trivial
179 string::size_type i = 0;
180 if (striptype != RIGHTSTRIP)
182 while (i < len && isspace(s[i]))
188 string::size_type j = len;
189 if (striptype != LEFTSTRIP)
195 while (j >= 1 && isspace(s[j]));
199 if (i == 0 && j == len)
205 return s.substr( i, j - i );
210 lstrip( const string& s )
212 return do_strip( s, LEFTSTRIP );
216 rstrip( const string& s )
218 return do_strip( s, RIGHTSTRIP );
222 strip( const string& s )
224 return do_strip( s, BOTHSTRIP );
228 rpad( const string & s, string::size_type length, char c )
230 string::size_type l = s.length();
231 if( l >= length ) return s;
233 return reply.append( length-l, c );
237 lpad( const string & s, size_t length, char c )
239 string::size_type l = s.length();
240 if( l >= length ) return s;
242 return reply.insert( 0, length-l, c );
246 starts_with( const string & s, const string & substr )
248 return s.compare(0, substr.length(), substr) == 0;
252 ends_with( const string & s, const string & substr )
254 if( substr.length() > s.length() )
256 return s.compare( s.length() - substr.length(),
261 string simplify(const string& s)
263 string result; // reserve size of 's'?
264 string::const_iterator it = s.begin(),
267 // advance to first non-space char - simplifes logic in main loop,
268 // since we can always prepend a single space when we see a
269 // space -> non-space transition
270 for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
272 bool lastWasSpace = false;
273 for (; it != end; ++it) {
281 result.push_back(' ');
284 lastWasSpace = false;
291 int to_int(const std::string& s, int base)
295 case 8: ss >> std::oct; break;
296 case 16: ss >> std::hex; break;
305 int compare_versions(const string& v1, const string& v2)
307 vector<string> v1parts(split(v1, "."));
308 vector<string> v2parts(split(v2, "."));
310 int lastPart = std::min(v1parts.size(), v2parts.size());
311 for (int part=0; part < lastPart; ++part) {
312 int part1 = to_int(v1parts[part]);
313 int part2 = to_int(v2parts[part]);
315 if (part1 != part2) {
316 return part1 - part2;
318 } // of parts iteration
320 // reached end - longer wins
321 return v1parts.size() - v2parts.size();
324 string join(const string_list& l, const string& joinWith)
327 unsigned int count = l.size();
328 for (unsigned int i=0; i < count; ++i) {
330 if (i < (count - 1)) {
338 string uppercase(const string &s) {
340 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
346 string lowercase(const string &s) {
348 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
354 void lowercase(string &s) {
355 for(string::iterator p = s.begin(); p != s.end(); p++){
360 #if defined(SG_WINDOWS)
364 static WCharVec convertMultiByteToWString(DWORD encoding, const std::string& a)
368 int requiredWideChars = MultiByteToWideChar(encoding, flags,
371 result.resize(requiredWideChars);
372 MultiByteToWideChar(encoding, flags, a.c_str(), a.size(),
373 result.data(), result.size());
377 WCharVec convertUtf8ToWString(const std::string& a)
379 return convertMultiByteToWString(CP_UTF8, a);
384 std::string convertWindowsLocal8BitToUtf8(const std::string& a)
388 WCharVec wideString = convertMultiByteToWString(CP_ACP, a);
390 // convert down to UTF-8
391 std::vector<char> result;
392 int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
393 wideString.data(), wideString.size(),
394 NULL, 0, NULL, NULL);
395 result.resize(requiredUTF8Chars);
396 WideCharToMultiByte(CP_UTF8, flags,
397 wideString.data(), wideString.size(),
398 result.data(), result.size(), NULL, NULL);
399 return std::string(result.data(), result.size());
405 //------------------------------------------------------------------------------
406 std::string md5(const unsigned char* data, size_t num)
409 SG_MD5Init(&md5_ctx);
410 SG_MD5Update(&md5_ctx, data, num);
412 unsigned char digest[MD5_DIGEST_LENGTH];
413 SG_MD5Final(digest, &md5_ctx);
415 return encodeHex(digest, MD5_DIGEST_LENGTH);
418 //------------------------------------------------------------------------------
419 std::string md5(const char* data, size_t num)
421 return md5(reinterpret_cast<const unsigned char*>(data), num);
424 //------------------------------------------------------------------------------
425 std::string md5(const std::string& str)
427 return md5(reinterpret_cast<const unsigned char*>(str.c_str()), str.size());
430 //------------------------------------------------------------------------------
431 static const std::string base64_chars =
432 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
433 "abcdefghijklmnopqrstuvwxyz"
436 static const unsigned char base64_decode_map[128] =
438 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
439 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
440 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
441 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
442 127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
443 54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
444 127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
445 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
446 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
447 25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
448 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
449 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
450 49, 50, 51, 127, 127, 127, 127, 127
454 static inline bool is_base64(unsigned char c) {
455 return (isalnum(c) || (c == '+') || (c == '/'));
458 static bool is_whitespace(unsigned char c) {
459 return ((c == ' ') || (c == '\r') || (c == '\n'));
462 void decodeBase64(const std::string& encoded_string, std::vector<unsigned char>& ret)
464 int in_len = encoded_string.size();
468 unsigned char char_array_4[4], char_array_3[3];
470 while (in_len-- && ( encoded_string[in_] != '=')) {
471 if (is_whitespace( encoded_string[in_])) {
476 if (!is_base64(encoded_string[in_])) {
480 char_array_4[i++] = encoded_string[in_]; in_++;
482 for (i = 0; i <4; i++)
483 char_array_4[i] = base64_decode_map[char_array_4[i]];
485 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
486 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
487 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
489 for (i = 0; (i < 3); i++)
490 ret.push_back(char_array_3[i]);
496 for (j = i; j <4; j++)
499 for (j = 0; j <4; j++)
500 char_array_4[j] = base64_decode_map[char_array_4[j]];
502 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
503 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
504 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
506 for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
510 //------------------------------------------------------------------------------
511 const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
513 std::string encodeHex(const std::string& bytes)
516 reinterpret_cast<const unsigned char*>(bytes.c_str()),
521 std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
523 std::string hex(length * 2, '\0');
524 for (unsigned int i=0; i<length;++i) {
525 unsigned char c = *rawBytes++;
526 hex[i * 2] = hexChar[c >> 4];
527 hex[i * 2 + 1] = hexChar[c & 0x0f];
533 //------------------------------------------------------------------------------
534 std::string unescape(const char* s)
550 } else if (*s == 'n') {
552 } else if (*s == 'r') {
554 } else if (*s == 't') {
556 } else if (*s == 'v') {
558 } else if (*s == 'f') {
560 } else if (*s == 'a') {
562 } else if (*s == 'b') {
564 } else if (*s == 'x') {
568 for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
569 v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
573 } else if (*s >= '0' && *s <= '7') {
575 for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
576 v = v * 8 + *s - '0';
588 string sanitizePrintfFormat(const string& input)
590 string::size_type i = input.find("%n");
591 if (i != string::npos) {
592 SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
599 } // end namespace strutils
601 } // end namespace simgear