]> git.mxchange.org Git - simgear.git/commitdiff
utf8ToLatin1: return original instead of crashing on non-UTF-8 input
authorRebecca Palmer <R.Palmer@bham.ac.uk>
Tue, 10 Jun 2014 18:30:09 +0000 (19:30 +0100)
committerRebecca Palmer <R.Palmer@bham.ac.uk>
Tue, 10 Jun 2014 18:30:09 +0000 (19:30 +0100)
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=750859
(In the long run we should probably fix the underlying
inconsistent-text-encodings problem, but probably not in time for 3.2)

simgear/misc/strutils.cxx

index 25480271069b75128d4f4261faadf3e37fb9255b..bc56100abb3db4e2942135bf2efa62506a9f3ba3 100644 (file)
@@ -53,8 +53,11 @@ namespace simgear {
                size_t len = get_length (p);
                if (len == 1) return *p;
                value_type res = static_cast<unsigned char> ( *p & (0xff >> (len + 1))) << ((len - 1) * 6 );
-               for (--len; len; --len)
-                       res |= (static_cast<unsigned char> (*(++p)) - 0x80) << ((len - 1) * 6);
+               for (--len; len; --len) {
+                       value_type next_byte = static_cast<unsigned char> (*(++p)) - 0x80;
+                       if (next_byte & 0xC0) return 0x00ffffff; // invalid UTF-8
+                       res |= next_byte << ((len - 1) * 6);
+                       }
                return res;
        }
 
@@ -62,6 +65,7 @@ namespace simgear {
                string s_latin1;
                for (string::iterator p = s_utf8.begin(); p != s_utf8.end(); ++p) {
                        value_type value = get_value<string::iterator&>(p);
+                       if (value > 0x10ffff) return s_utf8; // invalid UTF-8: guess that the input was already Latin-1
                        if (value > 0xff) SG_LOG(SG_IO, SG_WARN, "utf8ToLatin1: wrong char value: " << value);
                        s_latin1 += static_cast<char>(value);
                }