From 77f1fa8cd38db6a89be86c51b9aef70915457ed8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 9 Jan 2013 21:09:18 +0000 Subject: [PATCH] RDF/RSS parser fixed: - Added possiblity of a call-back function for finish() method - Fixed many other things (sorry) - TODOs.txt updated --- DOCS/TODOs.txt | 1 - inc/classes/rdf.class.php | 201 +++++++++++++---------- inc/modules/guest/what-wernis_portal.php | 1 + inc/template-functions.php | 6 +- 4 files changed, 118 insertions(+), 91 deletions(-) diff --git a/DOCS/TODOs.txt b/DOCS/TODOs.txt index 1ee1bacba4..71b11c870b 100644 --- a/DOCS/TODOs.txt +++ b/DOCS/TODOs.txt @@ -196,7 +196,6 @@ ./inc/template-functions.php:1658:// @TODO Lame description for this function ./inc/template-functions.php:1680: // @TODO Move this in a filter ./inc/template-functions.php:209: * @TODO On some pages this is buggy -./inc/template-functions.php:2320: // @TODO Is this needed for e.g. $GLOBALS['template_content'] ? $this->setRawTemplateData($compactedContent); ./inc/template-functions.php:298: // @TODO Remove these sanity checks if all is fine ./inc/template-functions.php:747: // @TODO $userid is deprecated and should be removed from loadEmailTemplate() and replaced with $content[userid] in all templates ./inc/wrapper-functions.php:3197: // @TODO Find a way to not use direct module comparison diff --git a/inc/classes/rdf.class.php b/inc/classes/rdf.class.php index 23df025b28..fd0ff99b1b 100644 --- a/inc/classes/rdf.class.php +++ b/inc/classes/rdf.class.php @@ -69,7 +69,7 @@ class fase4_rdf { * * "htmlentities" - Use the function htmlentities() * "utf8_decode" - Use the function ut8_decode() when you have UTF8 encoded text - * - Use non of both + * - Use none of both * * @access private * @var string @@ -355,16 +355,25 @@ class fase4_rdf { var $_output = ''; /** - * @var string + * @var string */ var $_parse_mode = ''; - // Output variable + /** + * Output variable + */ var $out = ''; - // Salt for hashing + /** + * Salt for hashing + */ var $salt = ''; + /** + * Callback function for processing content in finish() method. + */ + var $_finishCallback = NULL; + /** * Constructor of our Class * @@ -375,8 +384,7 @@ class fase4_rdf { * @author Stefan Saasen * @see _refresh */ - function fase4_rdf() - { + function fase4_rdf () { // default Value, to be overwritten in set_refresh() $this->_refresh = (time() - 1200); $this->_clear_cItems(); @@ -392,8 +400,7 @@ class fase4_rdf { * @return string Displays RDF Content (using _display()) * @see _remote_file, cache() */ - function parse_RDF($rdf) - { + function parse_RDF ($rdf) { unset($this->_array_item); $this->_remote_file = $rdf; $this->out .= ""; @@ -414,15 +421,25 @@ class fase4_rdf { * @return string Displays RDF Content (using _display()) * @see _remote_file, cache() */ - function finish($return = FALSE) { + function finish ($return = FALSE) { + // Replace dollar chars as they may cause problems $this->out = str_replace('$', '$', $this->out); - if (!$return) { - echo $this->out; + // Is the call-back enabled? + if ((!empty($this->out)) && (!is_null($this->_finishCallback)) && (is_callable($this->_finishCallback))) { + // Then call it + $this->out = call_user_func($this->_finishCallback, $this->out); + } // END - if + + // Do garbage collection + $this->_garbage_collection(); + + // Return or output? + if ($return === FALSE) { + print($this->out); } else { return $this->out; } - $this->_garbage_collection(); } /** @@ -440,7 +457,7 @@ class fase4_rdf { * @return string Displays RDF Content (using _display()) * @see _remote_file, cache() */ - function use_dynamic_display($bool) { + function use_dynamic_display ($bool) { $this->_use_dynamic_display = $bool; return TRUE; } @@ -483,7 +500,7 @@ class fase4_rdf { return TRUE; } - function set_salt($saltPara) { + function set_salt ($saltPara) { $this->salt = $saltPara; return TRUE; } @@ -497,8 +514,7 @@ class fase4_rdf { * @return boolean * @see _max_count, _endElement() */ - function set_max_item($int) - { + function set_max_item ($int) { $this->_max_count = $int; return TRUE; } @@ -512,8 +528,7 @@ class fase4_rdf { * @return boolean * @see _cache_dir */ - function set_CacheDir($dir) - { + function set_CacheDir ($dir) { if (substr($dir, -1) != '/') { $dir = $dir.'/'; } @@ -527,8 +542,7 @@ class fase4_rdf { * @param string $msg Message to display on failure * @author Stefan Saasen */ - function _throw_exception($msg) - { + function _throw_exception ($msg) { $this->out .= "
fase4 RDF Error: ".$msg."
"; return TRUE; } @@ -597,7 +611,7 @@ class fase4_rdf { * @param string $attrs contains an associative array with the element's attributes (if any). * @see _get_ChannelData(), _clear_Items(), _type, _parse_mode, _depth, _tags, _cdepth, _ctags */ - function _startElement($parser, $name, $attrs) { + function _startElement ($parser, $name, $attrs) { // We have to determine, which type of xml data we have to parse if ($name == 'rss') { $this->_type = 'rss'; @@ -639,8 +653,7 @@ class fase4_rdf { * @param mixed $parser a reference to the XML parser calling the handler. * @see _output, _display_opt, _citem */ - function _get_ChannelData($parser) - { + function _get_ChannelData ($parser) { $this->_citem['link'] = trim($this->_citem['link']); if (($this->_display_opt['sitelink'] == $this->_citem['link']) && (!empty($this->_display_opt['reflink'])) && (!empty($this->_display_opt['refid']))) { @@ -684,7 +697,7 @@ class fase4_rdf { * @param string $name contains the name of the element for which this handler is called. * @see _clear_Items(), _type, _parse_mode, _depth, _tags, _cdepth, _ctags, _item, _output, _display_opt */ - function _endElement($parser, $name) { + function _endElement ($parser, $name) { array_pop($this->_tags); $this->_depth[$this->get_parser_id($parser)]--; array_pop($this->_ctags); @@ -832,8 +845,7 @@ class fase4_rdf { * @return array * @see _array_channel */ - function get_array_channel() - { + function get_array_channel () { return $this->_array_channel; } @@ -845,8 +857,7 @@ class fase4_rdf { * @return array * @see _array_item */ - function get_array_item() - { + function get_array_item () { return $this->_array_item; } @@ -858,8 +869,7 @@ class fase4_rdf { * @return array * @see _array_textinput */ - function get_array_textinput() - { + function get_array_textinput () { return $this->_array_textinput; } @@ -892,8 +902,7 @@ class fase4_rdf { * @return array * @see _array_image */ - function get_array_image() - { + function get_array_image() { return $this->_array_image; } @@ -906,11 +915,9 @@ class fase4_rdf { * @param string $text contains the character data as a string. * @see _parse_mode, _item, _tags, _depth, _citem, _ctags, _cdepth */ - function _parseData($parser, $text) - { + function _parseData($parser, $text) { // Deocing mode added by Roland Haeder - switch ($this->_decoding_mode) - { + switch ($this->_decoding_mode) { case 'utf8_decode': $text = utf8_decode($text); break; @@ -920,9 +927,9 @@ class fase4_rdf { break; } - $clean = preg_replace("/\s/", "", $text); + $clean = preg_replace("/\s/", '', $text); if ($clean) { - $text = preg_replace("/^\s+/", "", $text)."\n"; + $text = preg_replace("/^\s+/", '', $text)."\n"; if ($this->_parse_mode == 'all') { if (isset($this->_item[$this->_tags[$this->_depth[$this->get_parser_id($parser)]]]) && $this->_item[$this->_tags[$this->_depth[$this->get_parser_id($parser)]]]) { @@ -949,8 +956,7 @@ class fase4_rdf { * @param array $options * @see _display_opt */ - function set_Options($options = '') - { + function set_Options ($options = NULL) { if (is_array($options)) { $this->_display_opt = $options; return TRUE; @@ -968,8 +974,7 @@ class fase4_rdf { * @param int $width attribute width in tag * @see _table_width */ - function set_table_width($width = 400) - { + function set_table_width ($width = 400) { $this->_table_width = $width; return TRUE; } @@ -985,13 +990,13 @@ class fase4_rdf { * @return array $options * @see _display_opt */ - function get_Options() { + function get_Options () { $options = array( - 'image' => "If 'image' is set to \"hidden\" no image provided by the RDF Publisher will be displayed.", - 'channel' => "If 'channel' is set to \"hidden\" the Channel Meta Data (i.e the Title and the short description regarding the RDF Publisher will not be displayed", - 'textinput' => "If set to \"hidden\" no Input Form will be displayed", - 'build' => "If set to \"hidden\" the Build Date (if provided) of the RDF File will not be displayed", - 'cache_update' => "If set to \"hidden\" the Update Date/Time of the cached Rdf File will not be displayed" + 'image' => "If 'image' is set to "hidden" no image provided by the RDF Publisher will be displayed.", + 'channel' => "If 'channel' is set to "hidden" the Channel Meta Data (i.e the Title and the short description regarding the RDF Publisher will not be displayed", + 'textinput' => "If set to "hidden" no Input Form will be displayed", + 'build' => "If set to "hidden" the Build Date (if provided) of the RDF File will not be displayed", + 'cache_update' => "If set to "hidden" the Update Date/Time of the cached Rdf File will not be displayed" ); return $options; } @@ -1005,18 +1010,18 @@ class fase4_rdf { * @return string XML Presentation of parsed RDF File * @see _cached_file, _remote_file, _cache_dir, _refresh, _update_cache() */ - function cache() - { + function cache () { // checks if the cache directory already exists // if not, the cache directory will be created if (!$this->_cache_dir_ok) { $this->_create_cache_dir(); } + if ($this->_use_dynamic_display == TRUE) { - $this->_cached_file = md5('dynamic'.$this->salt.$this->_remote_file) . '.cache'; + $this->_cached_file = md5('dynamic' . $this->salt.$this->_remote_file) . '.rss'; $this->_cache_type = 'normal'; } else { - $this->_cached_file = md5($this->salt.$this->_remote_file) . '.cache'; + $this->_cached_file = md5($this->salt . $this->_remote_file) . '.rss'; $this->_cache_type = 'fast'; } @@ -1064,6 +1069,8 @@ class fase4_rdf { $data = implode(' ', file($_cache_f)); } } + + // Return trimmed data return trim($data); } // END cache() @@ -1112,21 +1119,23 @@ class fase4_rdf { * @param string $rdf RDF File (Location) * @see _cache_dir, _cached_file, _throw_exception() */ - function _update_cache($content = '') - { + function _update_cache ($content = '') { if (defined('__SECURITY') && function_exists('writeToFile')) { // Use mailer-project function - return writeToFile($this->_cache_dir.$this->_cached_file, compactContent($content)); + return writeToFile($this->_cache_dir.$this->_cached_file, $content); } + $_local = @fopen($this->_cache_dir.$this->_cached_file, 'w'); if (!$_local) { $this->_throw_exception('Cannot open '.$this->_cached_file.'

Exception at Line: '.__LINE__); return FALSE; } + if (fwrite($_local, $content) === FALSE) { $this->_throw_exception('Cannot write to '.$this->_cached_file.'
Exeception at Line: '.__LINE__); return FALSE; } + fclose($_local); @chmod($this->_cache_dir.$this->_cached_file, 0666); return TRUE; @@ -1140,8 +1149,7 @@ class fase4_rdf { * @return string Date/Time of last Update * @see _cache_dir, _cached_file */ - function get_cache_update_time() - { + function get_cache_update_time () { return (file_exists($this->_cache_dir.$this->_cached_file))?date('d.m.Y H:i:s', filemtime($this->_cache_dir.$this->_cached_file)):'Cachemiss'; } // END get_cache_update_time() @@ -1154,8 +1162,7 @@ class fase4_rdf { * @return string Displays RDF Content (using _display()) * @see _remote_file, cache() */ - function get_CacheType() - { + function get_CacheType() { return $this->_cache_type; } @@ -1167,8 +1174,7 @@ class fase4_rdf { * @return array $options * @see _use_cached_file */ - function is_cachedFile() - { + function is_cachedFile () { return $this->_use_cached_file; } @@ -1183,17 +1189,16 @@ class fase4_rdf { * @author Stefan Saasen * @see _cache_dir */ - function clear_cache() - { + function clear_cache () { $dir = dir($this->_cache_dir); - while($file=$dir->read()) { + while($file = $dir->read()) { // Exclude directories - if (is_file($dir->path.$file) && substr($file, -6, 6) != '.cache' && substr($file, -4, 4) != '.log') { - if ((defined('__SECURITY') && function_exists('removeFile')) && (!removeFile($dir->path.$file))) { - $this->_throw_exception("removeFile() was unable to unlink ".$dir->path.$file."
\n
\nException at Line: ".__LINE__); + if (is_file($dir->path . $file) && substr($file, -4, 4) == '.rss') { + if ((defined('__SECURITY') && function_exists('removeFile')) && (!removeFile($dir->path . $file))) { + $this->_throw_exception("removeFile() was unable to unlink ".$dir->path . $file."
\n
\nException at Line: ".__LINE__); return FALSE; - } elseif (!unlink($dir->path.$file)) { - $this->_throw_exception("Unable to unlink ".$dir->path.$file."
\n
\nException at Line: ".__LINE__); + } elseif (!unlink($dir->path . $file)) { + $this->_throw_exception("Unable to unlink ".$dir->path . $file."
\n
\nException at Line: ".__LINE__); return FALSE; } // END - if } // END - if @@ -1231,9 +1236,15 @@ class fase4_rdf { srand((double) microtime() * 1000000); if (mt_rand(1, 100) <= $this->gc_probability) { $dir = dir($this->_cache_dir); - while($file=$dir->read()) { - if (is_file($dir->path.$file) && substr($file, -6, 6) != '.cache' && substr($file, -4, 4) != '.log' && filemtime($dir->path.$file) <= time() - $this->_refresh) { - @unlink($dir->path.$file); + while($file = $dir->read()) { + if ((is_file($dir->path . $file)) && (substr($file, -4, 4) == '.rss') && (filemtime($dir->path . $file) <= time() - $this->_refresh)) { + if (defined('__SECURITY') && function_exists('removeFile')) { + // Use mailer-project's function + removeFile($dir->path . $file); + } else { + // Use PHP's function + unlink($dir->path . $file); + } } // END - if } $dir->close(); @@ -1292,11 +1303,14 @@ class fase4_rdf { * @return array * @see _use_proxy, cache() */ - function _rdf_data() - { + function _rdf_data () { + // Init output + $output = array(); + + // Use mailer-project's function or own code? if (defined('__SECURITY') && function_exists('sendHttpGetRequest')) { // Use mailer-project instead (see http://mxchange.org) - return sendHttpGetRequest($this->_remote_file, array(), TRUE); + $output = sendHttpGetRequest($this->_remote_file, array(), TRUE); } elseif ($this->_use_proxy == TRUE) { // we need a raw socket here to connect to proxy $fp = fsockopen($this->_phost,$this->_pport); @@ -1314,7 +1328,7 @@ class fase4_rdf { for ($i = '0'; !feof ($fp) ; $i++) { - $usable_data[$i] = ""; + $usable_data[$i] = ''; $usable_data[$i] = fgets($fp,4096); // PARSE HEADER ---- first line has to be _remote_file, 0, 7) != 'http://') { $this->_throw_exception('Cannot find http:// in '.$this->_remote_file.'!'); - return ''; + return array(); } else { // Extract host information $host = substr($this->_remote_file, 7); + // Extract the GET part $get = '/'; if (strpos($host, '/') > 0) { $get = substr($host, strpos($host, '/')); $host = substr($host, 0, strpos($host, '/')); - } + } // END - if + // Extract port $port = '80'; if (strpos($host, ':') > 0) { $port = substr($host, (strpos($host, ':') + 1)); $host = substr($host, 0, (strpos($host, ':') - 1)); - } + } // END - if // Start connection to server $fp = fsockopen($host, $port); if (!$fp) { $this->_throw_exception($this->_remote_file.' is maybe not available.'); - return ''; - } + return array(); + } // END - if + // Repare request line $request = sprintf("GET %s HTTP/1.0\r\nHost: %s\r\nUser-Agent: Fase4 RDF-Reader/1.40 modified by Quix0r\r\n\r\n", $get, $host); + // Send request out fputs($fp, $request); $reply = ''; $isContent = FALSE; $dummy = ''; + // Read reply $i=0; while (!feof($fp)) { @@ -1397,17 +1416,25 @@ class fase4_rdf { // Put in dummy $dummy[] = $read; } + $i++; - } + } // END - while + if ((count($dummy) > 0) && (count($reply) == 0) && (!$isContent)) { // Transfer content from dummy $reply = $content; - } + } // END - if + fclose($fp); //die(htmlentities($reply)); - return $reply; + $output = $reply; } } - } // END _rdf_data() -} // END class + + // Exit here + return $output; + } // END _rdf_data() +} // END class + +// [EOF] ?> diff --git a/inc/modules/guest/what-wernis_portal.php b/inc/modules/guest/what-wernis_portal.php index 273dd23362..9a680e8bf5 100644 --- a/inc/modules/guest/what-wernis_portal.php +++ b/inc/modules/guest/what-wernis_portal.php @@ -54,6 +54,7 @@ if ((!isExtensionActive('wernis')) && (!isAdmin())) { // Init PDF/RDF parser class $rdf = new fase4_rdf(); $rdf->_use_nl2br = FALSE; +$rdf->_finishCallback = 'compactContent'; $rdf->use_dynamic_display(FALSE); $rdf->set_CacheDir(getCachePath() . ''); $rdf->set_salt(md5(getSiteKey())); diff --git a/inc/template-functions.php b/inc/template-functions.php index 9b07f6b3a0..aa8678f376 100644 --- a/inc/template-functions.php +++ b/inc/template-functions.php @@ -2304,6 +2304,9 @@ function compactContent ($uncompactedContent) { // First, remove all tab/new-line/revert characters $compactedContent = str_replace(chr(9), '', str_replace(PHP_EOL, '', str_replace(chr(13), '', $uncompactedContent))); + // Make a space after > + $compactedContent = str_replace(array('>', ' '), array('> ', ' '), $compactedContent); + // Then regex all comments like away preg_match_all('//', $compactedContent, $matches); @@ -2316,9 +2319,6 @@ function compactContent ($uncompactedContent) { } // END - foreach } // END - if - // Set the content again - // @TODO Is this needed for e.g. $GLOBALS['template_content'] ? $this->setRawTemplateData($compactedContent); - // Return compacted content return $compactedContent; } -- 2.39.5