3 // Written by Bernie Bright, started 1998
5 // Copyright (C) 1998 Bernie Bright - bbright@bigpond.net.au
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Library General Public
9 // License as published by the Free Software Foundation; either
10 // version 2 of the License, or (at your option) any later version.
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Library General Public License for more details.
17 // You should have received a copy of the GNU General Public License
18 // along with this program; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 #include <string.h> // strerror_r() and strerror_s()
30 #include "strutils.hxx"
32 #include <simgear/debug/logstream.hxx>
33 #include <simgear/package/md5.h>
34 #include <simgear/compiler.h> // SG_WINDOWS
38 using std::stringstream;
44 * utf8ToLatin1() convert utf8 to latin, useful for accent character (i.e éâàîè...)
46 template <typename Iterator> size_t get_length (Iterator p) {
47 unsigned char c = static_cast<unsigned char> (*p);
48 if (c < 0x80) return 1;
49 else if (!(c & 0x20)) return 2;
50 else if (!(c & 0x10)) return 3;
51 else if (!(c & 0x08)) return 4;
52 else if (!(c & 0x04)) return 5;
56 typedef unsigned int value_type;
57 template <typename Iterator> value_type get_value (Iterator p) {
58 size_t len = get_length (p);
59 if (len == 1) return *p;
60 value_type res = static_cast<unsigned char> ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 );
61 for (--len; len; --len) {
62 value_type next_byte = static_cast<unsigned char> (*(++p)) - 0x80;
63 if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8
64 res |= next_byte << ((len - 1) * 6);
69 string utf8ToLatin1( string& s_utf8 ) {
71 for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) {
72 value_type value = get_value<string::iterator&>(p);
73 if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1
74 if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value);
75 s_latin1 += static_cast<char>(value);
84 split_whitespace( const string& str, int maxsplit )
86 vector<string> result;
87 string::size_type len = str.length();
88 string::size_type i = 0;
94 while (i < len && isspace((unsigned char)str[i]))
101 while (i < len && !isspace((unsigned char)str[i]))
108 result.push_back( str.substr(j, i-j) );
110 while (i < len && isspace((unsigned char)str[i]))
115 if (maxsplit && (countsplit >= maxsplit) && i < len)
117 result.push_back( str.substr( i, len-i ) );
130 split( const string& str, const char* sep, int maxsplit )
133 return split_whitespace( str, maxsplit );
135 vector<string> result;
136 int n = std::strlen( sep );
139 // Error: empty separator string
142 const char* s = str.c_str();
143 string::size_type len = str.length();
144 string::size_type i = 0;
145 string::size_type j = 0;
150 if (s[i] == sep[0] && (n == 1 || std::memcmp(s+i, sep, n) == 0))
152 result.push_back( str.substr(j,i-j) );
155 if (maxsplit && (splitcount >= maxsplit))
164 result.push_back( str.substr(j,len-j) );
169 * The lstrip(), rstrip() and strip() functions are implemented
170 * in do_strip() which uses an additional parameter to indicate what
171 * type of strip should occur.
173 const int LEFTSTRIP = 0;
174 const int RIGHTSTRIP = 1;
175 const int BOTHSTRIP = 2;
178 do_strip( const string& s, int striptype )
180 string::size_type len = s.length();
181 if( len == 0 ) // empty string is trivial
183 string::size_type i = 0;
184 if (striptype != RIGHTSTRIP)
186 while (i < len && isspace(s[i]))
192 string::size_type j = len;
193 if (striptype != LEFTSTRIP)
199 while (j >= 1 && isspace(s[j]));
203 if (i == 0 && j == len)
209 return s.substr( i, j - i );
214 lstrip( const string& s )
216 return do_strip( s, LEFTSTRIP );
220 rstrip( const string& s )
222 return do_strip( s, RIGHTSTRIP );
226 strip( const string& s )
228 return do_strip( s, BOTHSTRIP );
232 rpad( const string & s, string::size_type length, char c )
234 string::size_type l = s.length();
235 if( l >= length ) return s;
237 return reply.append( length-l, c );
241 lpad( const string & s, size_t length, char c )
243 string::size_type l = s.length();
244 if( l >= length ) return s;
246 return reply.insert( 0, length-l, c );
250 starts_with( const string & s, const string & substr )
252 return s.compare(0, substr.length(), substr) == 0;
256 ends_with( const string & s, const string & substr )
258 if( substr.length() > s.length() )
260 return s.compare( s.length() - substr.length(),
265 string simplify(const string& s)
267 string result; // reserve size of 's'?
268 string::const_iterator it = s.begin(),
271 // advance to first non-space char - simplifes logic in main loop,
272 // since we can always prepend a single space when we see a
273 // space -> non-space transition
274 for (; (it != end) && isspace(*it); ++it) { /* nothing */ }
276 bool lastWasSpace = false;
277 for (; it != end; ++it) {
285 result.push_back(' ');
288 lastWasSpace = false;
295 int to_int(const std::string& s, int base)
299 case 8: ss >> std::oct; break;
300 case 16: ss >> std::hex; break;
309 int compare_versions(const string& v1, const string& v2)
311 vector<string> v1parts(split(v1, "."));
312 vector<string> v2parts(split(v2, "."));
314 int lastPart = std::min(v1parts.size(), v2parts.size());
315 for (int part=0; part < lastPart; ++part) {
316 int part1 = to_int(v1parts[part]);
317 int part2 = to_int(v2parts[part]);
319 if (part1 != part2) {
320 return part1 - part2;
322 } // of parts iteration
324 // reached end - longer wins
325 return v1parts.size() - v2parts.size();
328 string join(const string_list& l, const string& joinWith)
331 unsigned int count = l.size();
332 for (unsigned int i=0; i < count; ++i) {
334 if (i < (count - 1)) {
342 string uppercase(const string &s) {
344 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
350 string lowercase(const string &s) {
352 for(string::iterator p = rslt.begin(); p != rslt.end(); p++){
358 void lowercase(string &s) {
359 for(string::iterator p = s.begin(); p != s.end(); p++){
364 #if defined(SG_WINDOWS)
368 static WCharVec convertMultiByteToWString(DWORD encoding, const std::string& a)
372 int requiredWideChars = MultiByteToWideChar(encoding, flags,
375 result.resize(requiredWideChars);
376 MultiByteToWideChar(encoding, flags, a.c_str(), a.size(),
377 result.data(), result.size());
381 WCharVec convertUtf8ToWString(const std::string& a)
383 return convertMultiByteToWString(CP_UTF8, a);
388 std::string convertWindowsLocal8BitToUtf8(const std::string& a)
392 WCharVec wideString = convertMultiByteToWString(CP_ACP, a);
394 // convert down to UTF-8
395 std::vector<char> result;
396 int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags,
397 wideString.data(), wideString.size(),
398 NULL, 0, NULL, NULL);
399 result.resize(requiredUTF8Chars);
400 WideCharToMultiByte(CP_UTF8, flags,
401 wideString.data(), wideString.size(),
402 result.data(), result.size(), NULL, NULL);
403 return std::string(result.data(), result.size());
409 //------------------------------------------------------------------------------
410 std::string md5(const unsigned char* data, size_t num)
413 SG_MD5Init(&md5_ctx);
414 SG_MD5Update(&md5_ctx, data, num);
416 unsigned char digest[MD5_DIGEST_LENGTH];
417 SG_MD5Final(digest, &md5_ctx);
419 return encodeHex(digest, MD5_DIGEST_LENGTH);
422 //------------------------------------------------------------------------------
423 std::string md5(const char* data, size_t num)
425 return md5(reinterpret_cast<const unsigned char*>(data), num);
428 //------------------------------------------------------------------------------
429 std::string md5(const std::string& str)
431 return md5(reinterpret_cast<const unsigned char*>(str.c_str()), str.size());
434 //------------------------------------------------------------------------------
435 static const std::string base64_chars =
436 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
437 "abcdefghijklmnopqrstuvwxyz"
440 static const unsigned char base64_decode_map[128] =
442 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
443 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
444 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
445 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
446 127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
447 54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
448 127, 64, 127, 127, 127, 0, 1, 2, 3, 4,
449 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
450 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
451 25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
452 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
453 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
454 49, 50, 51, 127, 127, 127, 127, 127
458 static inline bool is_base64(unsigned char c) {
459 return (isalnum(c) || (c == '+') || (c == '/'));
462 static bool is_whitespace(unsigned char c) {
463 return ((c == ' ') || (c == '\r') || (c == '\n'));
466 void decodeBase64(const std::string& encoded_string, std::vector<unsigned char>& ret)
468 int in_len = encoded_string.size();
472 unsigned char char_array_4[4], char_array_3[3];
474 while (in_len-- && ( encoded_string[in_] != '=')) {
475 if (is_whitespace( encoded_string[in_])) {
480 if (!is_base64(encoded_string[in_])) {
484 char_array_4[i++] = encoded_string[in_]; in_++;
486 for (i = 0; i <4; i++)
487 char_array_4[i] = base64_decode_map[char_array_4[i]];
489 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
490 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
491 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
493 for (i = 0; (i < 3); i++)
494 ret.push_back(char_array_3[i]);
500 for (j = i; j <4; j++)
503 for (j = 0; j <4; j++)
504 char_array_4[j] = base64_decode_map[char_array_4[j]];
506 char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
507 char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
508 char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
510 for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
514 //------------------------------------------------------------------------------
515 const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
517 std::string encodeHex(const std::string& bytes)
520 reinterpret_cast<const unsigned char*>(bytes.c_str()),
525 std::string encodeHex(const unsigned char* rawBytes, unsigned int length)
527 std::string hex(length * 2, '\0');
528 for (unsigned int i=0; i<length;++i) {
529 unsigned char c = *rawBytes++;
530 hex[i * 2] = hexChar[c >> 4];
531 hex[i * 2 + 1] = hexChar[c & 0x0f];
537 //------------------------------------------------------------------------------
538 std::string unescape(const char* s)
554 } else if (*s == 'n') {
556 } else if (*s == 'r') {
558 } else if (*s == 't') {
560 } else if (*s == 'v') {
562 } else if (*s == 'f') {
564 } else if (*s == 'a') {
566 } else if (*s == 'b') {
568 } else if (*s == 'x') {
572 for (int i = 0; i < 2 && isxdigit(*s); i++, s++)
573 v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a');
577 } else if (*s >= '0' && *s <= '7') {
579 for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++)
580 v = v * 8 + *s - '0';
592 string sanitizePrintfFormat(const string& input)
594 string::size_type i = input.find("%n");
595 if (i != string::npos) {
596 SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input);
603 std::string error_string(int errnum)
605 char buf[512]; // somewhat arbitrary...
606 // This could be simplified with C11 (annex K, optional...), which offers:
608 // errno_t strerror_s( char *buf, rsize_t bufsz, errno_t errnum );
609 // size_t strerrorlen_s( errno_t errnum );
611 #if defined(SG_WINDOWS)
613 // Always makes the string in 'buf' null-terminated
614 retcode = strerror_s(buf, sizeof(buf), errnum);
615 #elif defined(_GNU_SOURCE)
616 return std::string(strerror_r(errnum, buf, sizeof(buf)));
617 #elif _POSIX_C_SOURCE >= 200112L
619 // POSIX.1-2001 and POSIX.1-2008
620 retcode = strerror_r(errnum, buf, sizeof(buf));
622 #error "Could not find a thread-safe alternative to strerror()."
625 #if !defined(_GNU_SOURCE)
627 std::string msg = "unable to get error message for a given error number";
628 // C++11 would make this shorter with std::to_string()
629 std::ostringstream ostr;
632 #if !defined(SG_WINDOWS)
633 if (retcode == ERANGE) { // more specific error message in this case
634 msg = std::string("buffer too small to hold the error message for "
635 "the specified error number");
639 throw sg_error(msg, ostr.str());
642 return std::string(buf);
643 #endif // !defined(_GNU_SOURCE)
646 } // end namespace strutils
648 } // end namespace simgear