3 // Written by Bernie Bright, started 1998
5 // Copyright (C) 1998 Bernie Bright - bbright@bigpond.net.au
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Library General Public
9 // License as published by the Free Software Foundation; either
10 // version 2 of the License, or (at your option) any later version.
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Library General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with this program; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 #include "strutils.hxx"
29 #include <simgear/debug/logstream.hxx>
33 using std::stringstream;
39 * utf8ToLatin1() convert utf8 to latin, useful for accent character (i.e éâàîè...)
41 template <typename Iterator> size_t get_length (Iterator p) {
42 unsigned char c = static_cast<unsigned char> (*p);
43 if (c < 0x80) return 1;
44 else if (!(c & 0x20)) return 2;
45 else if (!(c & 0x10)) return 3;
46 else if (!(c & 0x08)) return 4;
47 else if (!(c & 0x04)) return 5;
51 typedef unsigned int value_type;
52 template <typename Iterator> value_type get_value (Iterator p) {
53 size_t len = get_length (p);
54 if (len == 1) return *p;
55 value_type res = static_cast<unsigned char> ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 );
56 for (--len; len; --len) {
57 value_type next_byte = static_cast<unsigned char> (*(++p)) - 0x80;
58 if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8
59 res |= next_byte << ((len - 1) * 6);
64 string utf8ToLatin1( string& s_utf8 ) {
66 for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) {
67 value_type value = get_value<string::iterator&>(p);
68 if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1
69 if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value);
70 s_latin1 += static_cast<char>(value);
79 split_whitespace( const string& str, int maxsplit )
81 vector<string> result;
82 string::size_type len = str.length();
83 string::size_type i = 0;
89 while (i < len && isspace((unsigned char)str[i]))
96 while (i < len && !isspace((unsigned char)str[i]))
103 result.push_back( str.substr(j, i-j) );
105 while (i < len && isspace((unsigned char)str[i]))
110 if (maxsplit && (countsplit >= maxsplit) && i < len)
112 result.push_back( str.substr( i, len-i ) );
125 split( const string& str, const char* sep, int maxsplit )
128 return split_whitespace( str, maxsplit );
130 vector<string> result;
131 int n = std::strlen( sep );
134 // Error: empty separator string
137 const char* s = str.c_str();
138 string::size_type len = str.length();
139 string::size_type i = 0;
140 string::size_type j = 0;
145 if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
147 result.push_back( str.substr(j,i-j) );
150 if (maxsplit && (splitcount >= maxsplit))
159 result.push_back( str.substr(j,len-j) );
164 * The lstrip(), rstrip() and strip() functions are implemented
165 * in do_strip() which uses an additional parameter to indicate what
166 * type of strip should occur.
168 const int LEFTSTRIP = 0;
169 const int RIGHTSTRIP = 1;
170 const int BOTHSTRIP = 2;
173 do_strip( const string& s, int striptype )
175 string::size_type len = s.length();
176 if( len == 0 ) // empty string is trivial
178 string::size_type i = 0;
179 if (striptype != RIGHTSTRIP)
181 while (i < len && isspace(s[i]))
187 string::size_type j = len;
188 if (striptype != LEFTSTRIP)
194 while (j >= 1 && isspace(s[j]));
198 if (i == 0 && j == len)
204 return s.substr( i, j - i );
209 lstrip( const string& s )
211 return do_strip( s, LEFTSTRIP );
215 rstrip( const string& s )
217 return do_strip( s, RIGHTSTRIP );
221 strip( const string& s )
223 return do_strip( s, BOTHSTRIP );
227 rpad( const string & s, string::size_type length, char c )
229 string::size_type l = s.length();
230 if( l >= length ) return s;
232 return reply.append( length-l, c );
236 lpad( const string & s, size_t length, char c )
238 string::size_type l = s.length();
239 if( l >= length ) return s;
241 return reply.insert( 0, length-l, c );
245 starts_with( const string & s, const string & substr )
247 return s.compare(0, substr.length(), substr) == 0;
251 ends_with( const string & s, const string & substr )
253 if( substr.length() > s.length() )
255 return s.compare( s.length() - substr.length(),
260 string simplify(const string& s)
262 string result; // reserve size of 's'?
263 string::const_iterator it = s.begin(),
266 // advance to first non-space char - simplifes logic in main loop,
267 // since we can always prepend a single space when we see a
268 // space -> non-space transition
269 for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
271 bool lastWasSpace = false;
272 for (; it != end; ++it) {
280 result.push_back(' ');
283 lastWasSpace = false;
290 int to_int(const std::string& s, int base)
294 case 8: ss >> std::oct; break;
295 case 16: ss >> std::hex; break;
304 int compare_versions(const string& v1, const string& v2)
306 vector<string> v1parts(split(v1, "."));
307 vector<string> v2parts(split(v2, "."));
309 int lastPart = std::min(v1parts.size(), v2parts.size());
310 for (int part=0; part < lastPart; ++part) {
311 int part1 = to_int(v1parts[part]);
312 int part2 = to_int(v2parts[part]);
314 if (part1 != part2) {
315 return part1 - part2;
317 } // of parts iteration
319 // reached end - longer wins
320 return v1parts.size() - v2parts.size();
323 string join(const string_list& l, const string& joinWith)
326 unsigned int count = l.size();
327 for (unsigned int i=0; i < count; ++i) {
329 if (i < (count - 1)) {
337 string uppercase(const string &s) {
339 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
345 string lowercase(const string &s) {
347 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
353 void lowercase(string &s) {
354 for(string::iterator p = s.begin(); p != s.end(); p++){
359 #if defined(SG_WINDOWS)
363 static WCharVec convertMultiByteToWString(DWORD encoding, const std::string& a)
367 int requiredWideChars = MultiByteToWideChar(encoding, flags,
370 result.resize(requiredWideChars);
371 MultiByteToWideChar(encoding, flags, a.c_str(), a.size(),
372 result.data(), result.size());
376 WCharVec convertUtf8ToWString(const std::string& a)
378 return convertMultiByteToWString(CP_UTF8, a);
383 std::string convertWindowsLocal8BitToUtf8(const std::string& a)
387 WCharVec wideString = convertMultiByteToWString(CP_ACP, a);
389 // convert down to UTF-8
390 std::vector<char> result;
391 int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
392 wideString.data(), wideString.size(),
393 NULL, 0, NULL, NULL);
394 result.resize(requiredUTF8Chars);
395 WideCharToMultiByte(CP_UTF8, flags,
396 wideString.data(), wideString.size(),
397 result.data(), result.size(), NULL, NULL);
398 return std::string(result.data(), result.size());
406 static const std::string base64_chars =
407 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
408 "abcdefghijklmnopqrstuvwxyz"
411 static const unsigned char base64_decode_map[128] =
413 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
414 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
415 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
416 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
417 127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
418 54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
419 127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
420 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
421 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
422 25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
423 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
424 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
425 49, 50, 51, 127, 127, 127, 127, 127
429 static inline bool is_base64(unsigned char c) {
430 return (isalnum(c) || (c == '+') || (c == '/'));
433 static bool is_whitespace(unsigned char c) {
434 return ((c == ' ') || (c == '\r') || (c == '\n'));
437 void decodeBase64(const std::string& encoded_string, std::vector<unsigned char>& ret)
439 int in_len = encoded_string.size();
443 unsigned char char_array_4[4], char_array_3[3];
445 while (in_len-- && ( encoded_string[in_] != '=')) {
446 if (is_whitespace( encoded_string[in_])) {
451 if (!is_base64(encoded_string[in_])) {
455 char_array_4[i++] = encoded_string[in_]; in_++;
457 for (i = 0; i <4; i++)
458 char_array_4[i] = base64_decode_map[char_array_4[i]];
460 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
461 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
462 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
464 for (i = 0; (i < 3); i++)
465 ret.push_back(char_array_3[i]);
471 for (j = i; j <4; j++)
474 for (j = 0; j <4; j++)
475 char_array_4[j] = base64_decode_map[char_array_4[j]];
477 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
478 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
479 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
481 for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
485 const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
487 std::string encodeHex(const std::string& bytes)
490 size_t count = bytes.size();
491 for (unsigned int i=0; i<count;++i) {
492 unsigned char c = bytes[i];
493 hex.push_back(hexChar[c >> 4]);
494 hex.push_back(hexChar[c & 0x0f]);
500 std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
503 for (unsigned int i=0; i<length;++i) {
504 unsigned char c = *rawBytes++;
505 hex.push_back(hexChar[c >> 4]);
506 hex.push_back(hexChar[c & 0x0f]);
512 //------------------------------------------------------------------------------
513 std::string unescape(const char* s)
529 } else if (*s == 'n') {
531 } else if (*s == 'r') {
533 } else if (*s == 't') {
535 } else if (*s == 'v') {
537 } else if (*s == 'f') {
539 } else if (*s == 'a') {
541 } else if (*s == 'b') {
543 } else if (*s == 'x') {
547 for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
548 v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
552 } else if (*s >= '0' && *s <= '7') {
554 for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
555 v = v * 8 + *s - '0';
567 string sanitizePrintfFormat(const string& input)
569 string::size_type i = input.find("%n");
570 if (i != string::npos) {
571 SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
578 } // end namespace strutils
580 } // end namespace simgear