From: Brion Vibber Date: Wed, 6 Oct 2010 20:00:30 +0000 (-0700) Subject: Basic validation of UTF-8 input via GET/POST vars: invalid UTF-8 sequences will cause... X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=ebfa8bce27d341a3cd10c53d5673b75b60c212bb;p=quix0rs-gnu-social.git Basic validation of UTF-8 input via GET/POST vars: invalid UTF-8 sequences will cause the string to drop. Not necessarily super-thorough; should be improved in future to drop individual bad sequences, do normalization of combining forms, etc. General input validation (for ints, types of strings, etc) still would be good to have! --- diff --git a/lib/util.php b/lib/util.php index dc853f657b..35fcfdb090 100644 --- a/lib/util.php +++ b/lib/util.php @@ -906,6 +906,28 @@ function common_shorten_links($text, $always = false) return common_replace_urls_callback($text, array('File_redirection', 'makeShort')); } +/** + * Very basic stripping of invalid UTF-8 input text. + * + * @param string $str + * @return mixed string or null if invalid input + * + * @todo ideally we should drop bad chars, and maybe do some of the checks + * from common_xml_safe_str. But we can't strip newlines, etc. + * @todo Unicode normalization might also be useful, but not needed now. + */ +function common_validate_utf8($str) +{ + // preg_replace will return NULL on invalid UTF-8 input. + return preg_replace('//u', '', $str); +} + +/** + * Make sure an arbitrary string is safe for output in XML as a single line. + * + * @param string $str + * @return string + */ function common_xml_safe_str($str) { // Replace common eol and extra whitespace input chars @@ -1663,19 +1685,25 @@ function common_config($main, $sub) array_key_exists($sub, $config[$main])) ? $config[$main][$sub] : false; } +/** + * Pull arguments from a GET/POST/REQUEST array with first-level input checks: + * strips "magic quotes" slashes if necessary, and kills invalid UTF-8 strings. + * + * @param array $from + * @return array + */ function common_copy_args($from) { $to = array(); $strip = get_magic_quotes_gpc(); foreach ($from as $k => $v) { - if($strip) { - if(is_array($v)) { - $to[$k] = common_copy_args($v); - } else { - $to[$k] = stripslashes($v); - } + if(is_array($v)) { + $to[$k] = common_copy_args($v); } else { - $to[$k] = $v; + if ($strip) { + $v = stripslashes($v); + } + $to[$k] = strval(common_validate_utf8($v)); } } return $to;