X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=simgear%2Fmisc%2Fstrutils.cxx;h=7a2ffa30dff43a7e6251b603e4a3e7824bc9862d;hb=0d5781aee74071419abccea73fc2fa3f3f5b121b;hp=d33b0a9dcdbd3a72febcf0cf965b4ef46938c2f6;hpb=11479cd8c386d8bf7e1fee7bed60ab4abefc5fad;p=simgear.git diff --git a/simgear/misc/strutils.cxx b/simgear/misc/strutils.cxx index d33b0a9d..7a2ffa30 100644 --- a/simgear/misc/strutils.cxx +++ b/simgear/misc/strutils.cxx @@ -22,15 +22,62 @@ #include #include +#include +#include +#include // strerror_r() and strerror_s() +#include #include "strutils.hxx" +#include +#include +#include // SG_WINDOWS +#include + using std::string; using std::vector; +using std::stringstream; namespace simgear { namespace strutils { + /* + * utf8ToLatin1() convert utf8 to latin, useful for accent character (i.e éâàîè...) + */ + template size_t get_length (Iterator p) { + unsigned char c = static_cast (*p); + if (c < 0x80) return 1; + else if (!(c & 0x20)) return 2; + else if (!(c & 0x10)) return 3; + else if (!(c & 0x08)) return 4; + else if (!(c & 0x04)) return 5; + else return 6; + } + + typedef unsigned int value_type; + template value_type get_value (Iterator p) { + size_t len = get_length (p); + if (len == 1) return *p; + value_type res = static_cast ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 ); + for (--len; len; --len) { + value_type next_byte = static_cast (*(++p)) - 0x80; + if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8 + res |= next_byte << ((len - 1) * 6); + } + return res; + } + + string utf8ToLatin1( string& s_utf8 ) { + string s_latin1; + for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) { + value_type value = get_value(p); + if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1 + if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value); + s_latin1 += static_cast(value); + } + return s_latin1; + } + /** * */ @@ -131,10 +178,9 @@ namespace simgear { static string do_strip( const string& s, int striptype ) { - // if (s.empty()) - // return s; - string::size_type len = s.length(); + if( len == 0 ) // empty string is trivial + return s; string::size_type i = 0; if (striptype != RIGHTSTRIP) { @@ -183,5 +229,421 @@ namespace simgear { return do_strip( s, BOTHSTRIP ); } - } // end namespace strutils + string + rpad( const string & s, string::size_type length, char c ) + { + string::size_type l = s.length(); + if( l >= length ) return s; + string reply = s; + return reply.append( length-l, c ); + } + + string + lpad( const string & s, size_t length, char c ) + { + string::size_type l = s.length(); + if( l >= length ) return s; + string reply = s; + return reply.insert( 0, length-l, c ); + } + + bool + starts_with( const string & s, const string & substr ) + { + return s.compare(0, substr.length(), substr) == 0; + } + + bool + ends_with( const string & s, const string & substr ) + { + if( substr.length() > s.length() ) + return false; + return s.compare( s.length() - substr.length(), + substr.length(), + substr ) == 0; + } + + string simplify(const string& s) + { + string result; // reserve size of 's'? + string::const_iterator it = s.begin(), + end = s.end(); + + // advance to first non-space char - simplifes logic in main loop, + // since we can always prepend a single space when we see a + // space -> non-space transition + for (; (it != end) && isspace(*it); ++it) { /* nothing */ } + + bool lastWasSpace = false; + for (; it != end; ++it) { + char c = *it; + if (isspace(c)) { + lastWasSpace = true; + continue; + } + + if (lastWasSpace) { + result.push_back(' '); + } + + lastWasSpace = false; + result.push_back(c); + } + + return result; + } + + int to_int(const std::string& s, int base) + { + stringstream ss(s); + switch (base) { + case 8: ss >> std::oct; break; + case 16: ss >> std::hex; break; + default: break; + } + + int result; + ss >> result; + return result; + } + + int compare_versions(const string& v1, const string& v2) + { + vector v1parts(split(v1, ".")); + vector v2parts(split(v2, ".")); + + int lastPart = std::min(v1parts.size(), v2parts.size()); + for (int part=0; part < lastPart; ++part) { + int part1 = to_int(v1parts[part]); + int part2 = to_int(v2parts[part]); + + if (part1 != part2) { + return part1 - part2; + } + } // of parts iteration + + // reached end - longer wins + return v1parts.size() - v2parts.size(); + } + + string join(const string_list& l, const string& joinWith) + { + string result; + unsigned int count = l.size(); + for (unsigned int i=0; i < count; ++i) { + result += l[i]; + if (i < (count - 1)) { + result += joinWith; + } + } + + return result; + } + + string uppercase(const string &s) { + string rslt(s); + for(string::iterator p = rslt.begin(); p != rslt.end(); p++){ + *p = toupper(*p); + } + return rslt; + } + + string lowercase(const string &s) { + string rslt(s); + for(string::iterator p = rslt.begin(); p != rslt.end(); p++){ + *p = tolower(*p); + } + return rslt; + } + + void lowercase(string &s) { + for(string::iterator p = s.begin(); p != s.end(); p++){ + *p = tolower(*p); + } + } + +#if defined(SG_WINDOWS) + +#include + +static WCharVec convertMultiByteToWString(DWORD encoding, const std::string& a) +{ + WCharVec result; + DWORD flags = 0; + int requiredWideChars = MultiByteToWideChar(encoding, flags, + a.c_str(), a.size(), + NULL, 0); + result.resize(requiredWideChars); + MultiByteToWideChar(encoding, flags, a.c_str(), a.size(), + result.data(), result.size()); + return result; +} + +WCharVec convertUtf8ToWString(const std::string& a) +{ + return convertMultiByteToWString(CP_UTF8, a); +} + +#endif + +std::string convertWindowsLocal8BitToUtf8(const std::string& a) +{ +#ifdef SG_WINDOWS + DWORD flags = 0; + WCharVec wideString = convertMultiByteToWString(CP_ACP, a); + + // convert down to UTF-8 + std::vector result; + int requiredUTF8Chars = WideCharToMultiByte(CP_UTF8, flags, + wideString.data(), wideString.size(), + NULL, 0, NULL, NULL); + result.resize(requiredUTF8Chars); + WideCharToMultiByte(CP_UTF8, flags, + wideString.data(), wideString.size(), + result.data(), result.size(), NULL, NULL); + return std::string(result.data(), result.size()); +#else + return a; +#endif +} + +//------------------------------------------------------------------------------ +std::string md5(const unsigned char* data, size_t num) +{ + SG_MD5_CTX md5_ctx; + SG_MD5Init(&md5_ctx); + SG_MD5Update(&md5_ctx, data, num); + + unsigned char digest[MD5_DIGEST_LENGTH]; + SG_MD5Final(digest, &md5_ctx); + + return encodeHex(digest, MD5_DIGEST_LENGTH); +} + +//------------------------------------------------------------------------------ +std::string md5(const char* data, size_t num) +{ + return md5(reinterpret_cast(data), num); +} + +//------------------------------------------------------------------------------ +std::string md5(const std::string& str) +{ + return md5(reinterpret_cast(str.c_str()), str.size()); +} + +//------------------------------------------------------------------------------ +static const std::string base64_chars = +"ABCDEFGHIJKLMNOPQRSTUVWXYZ" +"abcdefghijklmnopqrstuvwxyz" +"0123456789+/"; + +static const unsigned char base64_decode_map[128] = +{ + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 62, 127, 127, 127, 63, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 127, 127, + 127, 64, 127, 127, 127, 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 127, 127, 127, 127, 127, 127, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 127, 127, 127, 127, 127 +}; + + +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '+') || (c == '/')); +} + +static bool is_whitespace(unsigned char c) { + return ((c == ' ') || (c == '\r') || (c == '\n')); +} + +void decodeBase64(const std::string& encoded_string, std::vector& ret) +{ + int in_len = encoded_string.size(); + int i = 0; + int j = 0; + int in_ = 0; + unsigned char char_array_4[4], char_array_3[3]; + + while (in_len-- && ( encoded_string[in_] != '=')) { + if (is_whitespace( encoded_string[in_])) { + in_++; + continue; + } + + if (!is_base64(encoded_string[in_])) { + break; + } + + char_array_4[i++] = encoded_string[in_]; in_++; + if (i ==4) { + for (i = 0; i <4; i++) + char_array_4[i] = base64_decode_map[char_array_4[i]]; + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) + ret.push_back(char_array_3[i]); + i = 0; + } + } + + if (i) { + for (j = i; j <4; j++) + char_array_4[j] = 0; + + for (j = 0; j <4; j++) + char_array_4[j] = base64_decode_map[char_array_4[j]]; + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]); + } +} + +//------------------------------------------------------------------------------ +const char hexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + +std::string encodeHex(const std::string& bytes) +{ + return encodeHex( + reinterpret_cast(bytes.c_str()), + bytes.size() + ); +} + +std::string encodeHex(const unsigned char* rawBytes, unsigned int length) +{ + std::string hex(length * 2, '\0'); + for (unsigned int i=0; i> 4]; + hex[i * 2 + 1] = hexChar[c & 0x0f]; + } + + return hex; +} + +//------------------------------------------------------------------------------ +std::string unescape(const char* s) +{ + std::string r; + while( *s ) + { + if( *s != '\\' ) + { + r += *s++; + continue; + } + + if( !*++s ) + break; + + if (*s == '\\') { + r += '\\'; + } else if (*s == 'n') { + r += '\n'; + } else if (*s == 'r') { + r += '\r'; + } else if (*s == 't') { + r += '\t'; + } else if (*s == 'v') { + r += '\v'; + } else if (*s == 'f') { + r += '\f'; + } else if (*s == 'a') { + r += '\a'; + } else if (*s == 'b') { + r += '\b'; + } else if (*s == 'x') { + if (!*++s) + break; + int v = 0; + for (int i = 0; i < 2 && isxdigit(*s); i++, s++) + v = v * 16 + (isdigit(*s) ? *s - '0' : 10 + tolower(*s) - 'a'); + r += v; + continue; + + } else if (*s >= '0' && *s <= '7') { + int v = *s++ - '0'; + for (int i = 0; i < 3 && *s >= '0' && *s <= '7'; i++, s++) + v = v * 8 + *s - '0'; + r += v; + continue; + + } else { + r += *s; + } + s++; + } + return r; +} + +string sanitizePrintfFormat(const string& input) +{ + string::size_type i = input.find("%n"); + if (i != string::npos) { + SG_LOG(SG_IO, SG_WARN, "sanitizePrintfFormat: bad format string:" << input); + return string(); + } + + return input; +} + +std::string error_string(int errnum) +{ + char buf[512]; // somewhat arbitrary... + // This could be simplified with C11 (annex K, optional...), which offers: + // + // errno_t strerror_s( char *buf, rsize_t bufsz, errno_t errnum ); + // size_t strerrorlen_s( errno_t errnum ); + +#if defined(SG_WINDOWS) + errno_t retcode; + // Always makes the string in 'buf' null-terminated + retcode = strerror_s(buf, sizeof(buf), errnum); +#elif defined(_GNU_SOURCE) + return std::string(strerror_r(errnum, buf, sizeof(buf))); +#elif _POSIX_C_SOURCE >= 200112L + int retcode; + // POSIX.1-2001 and POSIX.1-2008 + retcode = strerror_r(errnum, buf, sizeof(buf)); +#else +#error "Could not find a thread-safe alternative to strerror()." +#endif + +#if !defined(_GNU_SOURCE) + if (retcode) { + std::string msg = "unable to get error message for a given error number"; + // C++11 would make this shorter with std::to_string() + std::ostringstream ostr; + ostr << errnum; + +#if !defined(SG_WINDOWS) + if (retcode == ERANGE) { // more specific error message in this case + msg = std::string("buffer too small to hold the error message for " + "the specified error number"); + } +#endif + + throw sg_error(msg, ostr.str()); + } + + return std::string(buf); +#endif // !defined(_GNU_SOURCE) +} + +} // end namespace strutils + } // end namespace simgear