3 * Name: Retrieve Feed Content
4 * Description: Follow the permalink of RSS/Atom feed items and replace the summary with the full content.
6 * Author: Matthew Exon <http://mat.exon.name>
9 require_once('include/html2bbcode.php');
10 require_once('include/Photo.php');
12 function retriever_install() {
13 register_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
14 register_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
15 register_hook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
16 register_hook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
17 register_hook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
19 $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
20 if (count($r) || (get_config('retriever', 'dbversion') == '0.1')) {
21 $retrievers = array();
22 $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'");
24 $retrievers[$rr['contact']][$rr['k']] = $rr['v'];
26 foreach ($retrievers as $k => $v) {
27 $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k));
30 q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')",
31 intval($uid), intval($k), dbesc(json_encode($v)));
33 q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'");
34 set_config('retriever', 'dbversion', '0.2');
36 if (get_config('retriever', 'dbversion') == '0.2') {
37 q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`");
38 set_config('retriever', 'dbversion', '0.3');
40 if (get_config('retriever', 'dbversion') == '0.3') {
41 q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL");
42 q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL");
43 set_config('retriever', 'dbversion', '0.4');
45 if (get_config('retriever', 'dbversion') == '0.4') {
46 q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'");
47 set_config('retriever', 'dbversion', '0.5');
49 if (get_config('retriever', 'dbversion') == '0.5') {
50 q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()');
51 q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL');
52 q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL');
53 q('ALTER TABLE `retriever_item` DROP KEY `all`');
54 q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)');
55 set_config('retriever', 'dbversion', '0.6');
57 if (get_config('retriever', 'dbversion') == '0.6') {
58 q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
59 q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
60 q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
61 q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL');
62 q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin');
63 set_config('retriever', 'dbversion', '0.7');
65 if (get_config('retriever', 'dbversion') == '0.7') {
66 $r = q("SELECT `id`, `data` FROM `retriever_rule`");
68 logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA);
69 $data = json_decode($rr['data'], true);
70 if ($data['pattern']) {
72 if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) {
73 $data['pattern'] = $matches[1];
78 foreach (explode('|', $data['match']) as $component) {
80 if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
82 'element' => $matches[1],
83 'attribute' => $matches[2],
84 'value' => $matches[3]);
86 if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
88 'element' => $matches[1],
89 'attribute' => $matches[2],
90 'value' => $matches[3]);
93 $data['include'] = $include;
94 unset($data['match']);
96 if ($data['remove']) {
98 foreach (explode('|', $data['remove']) as $component) {
100 if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) {
102 'element' => $matches[1],
103 'attribute' => $matches[2],
104 'value' => $matches[3]);
106 if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) {
108 'element' => $matches[1],
109 'attribute' => $matches[2],
110 'value' => $matches[3]);
113 $data['exclude'] = $exclude;
114 unset($data['remove']);
116 $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', dbesc(json_encode($data)), $rr['id']);
117 logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA);
119 set_config('retriever', 'dbversion', '0.8');
121 if (get_config('retriever', 'dbversion') == '0.8') {
122 q("ALTER TABLE `retriever_resource` ADD COLUMN `http-code` smallint(1) unsigned NULL DEFAULT NULL");
123 set_config('retriever', 'dbversion', '0.9');
125 if (get_config('retriever', 'dbversion') == '0.9') {
126 q("ALTER TABLE `retriever_item` DROP COLUMN `parent`");
127 q("ALTER TABLE `retriever_resource` ADD COLUMN `redirect-url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NULL DEFAULT NULL");
128 set_config('retriever', 'dbversion', '0.10');
131 if (get_config('retriever', 'dbversion') != '0.10') {
132 $schema = file_get_contents(dirname(__file__).'/database.sql');
133 $arr = explode(';', $schema);
134 foreach ($arr as $a) {
137 set_config('retriever', 'dbversion', '0.10');
141 function retriever_uninstall() {
142 unregister_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
143 unregister_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
144 unregister_hook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook');
145 unregister_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings');
146 unregister_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post');
147 unregister_hook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu');
148 unregister_hook('cron', 'addon/retriever/retriever.php', 'retriever_cron');
151 function retriever_module() {}
153 function retriever_cron($a, $b) {
154 // 100 is a nice sane number. Maybe this should be configurable.
155 retriever_retrieve_items(100);
159 $retriever_item_count = 0;
161 function retriever_retrieve_items($max_items) {
162 global $retriever_item_count;
164 $retriever_schedule = array(array(1,'minute'),
172 $schedule_clauses = array();
173 for ($i = 0; $i < count($retriever_schedule); $i++) {
174 $num = $retriever_schedule[$i][0];
175 $unit = $retriever_schedule[$i][1];
176 array_push($schedule_clauses,
177 '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . dbesc($unit) .
178 ', ' . intval($num) . ', `last-try`) < now())');
181 $retrieve_items = $max_items - $retriever_item_count;
182 logger('retriever_retrieve_items: asked for maximum ' . $max_items . ', already retrieved ' . $retriever_item_count . ', retrieve ' . $retrieve_items, LOGGER_DEBUG);
184 $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d",
185 dbesc(implode($schedule_clauses, ' OR ')),
186 intval($retrieve_items));
190 if (count($r) == 0) {
193 logger('retriever_retrieve_items: found ' . count($r) . ' waiting resources in database', LOGGER_DEBUG);
194 foreach ($r as $rr) {
195 retrieve_resource($rr);
196 $retriever_item_count++;
198 $retrieve_items = $max_items - $retriever_item_count;
200 while ($retrieve_items > 0);
202 /* Look for items that are waiting even though the resource has
203 * completed. This usually happens because we've been asked to
204 * retrospectively apply a config change. It could also happen
205 * due to a cron job dying or something. */
206 $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d",
207 intval($retrieve_items));
211 logger('retriever_retrieve_items: items waiting even though resource has completed: ' . count($r), LOGGER_DEBUG);
212 foreach ($r as $rr) {
213 $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']);
214 $retriever_item = retriever_get_retriever_item($rr['item']);
215 if (!$retriever_item) {
216 logger('retriever_retrieve_items: no retriever item with id ' . $rr['item'], LOGGER_NORMAL);
219 $item = retriever_get_item($retriever_item);
221 logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri'], LOGGER_NORMAL);
224 $retriever = get_retriever($item['contact-id'], $item['uid']);
226 logger('retriever_retrieve_items: no retriever for item ' .
227 $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id'],
231 retriever_apply_completed_resource_to_item($retriever, $item, $resource[0]);
232 q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
233 intval($retriever_item['id']));
234 retriever_check_item_completed($item);
238 function retriever_tidy() {
239 q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)");
240 q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)");
242 $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null");
243 logger('retriever_tidy: found ' . count($r) . ' retriever_items with no retriever_resource');
244 foreach ($r as $rr) {
245 q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id']));
249 function retrieve_resource($resource) {
252 logger('retrieve_resource: ' . ($resource['num-tries'] + 1) .
253 ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG);
255 $cookiejar = tempnam(get_temppath(), 'cookiejar-retriever-');
256 $fetch_result = z_fetch_url($resource['url'], $resource['binary'], $redirects, array('cookiejar' => $cookiejar));
258 $resource['data'] = $fetch_result['body'];
259 $resource['http-code'] = $a->get_curl_code();
260 $resource['type'] = $a->get_curl_content_type();
261 $resource['redirect-url'] = $fetch_result['redirect_url'];
262 logger('retrieve_resource: got code ' . $resource['http-code'] .
263 ' retrieving resource ' . $resource['id'] .
264 ' final url ' . $resource['redirect-url'], LOGGER_DEBUG);
265 q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1, `http-code` = %d, `redirect-url` = '%s' WHERE id = %d",
266 intval($resource['http-code']),
267 dbesc($resource['redirect-url']),
268 intval($resource['id']));
269 if ($resource['data']) {
270 q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d",
271 dbesc($resource['data']),
272 dbesc($resource['type']),
273 intval($resource['id']));
274 retriever_resource_completed($resource);
278 function get_retriever($contact_id, $uid, $create = false) {
279 $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
280 intval($contact_id), intval($uid));
282 $r[0]['data'] = json_decode($r[0]['data'], true);
286 q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)",
287 intval($uid), intval($contact_id));
288 $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d",
289 intval($contact_id), intval($uid));
294 function retriever_get_retriever_item($id) {
295 $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id));
296 if (count($retriever_items) != 1) {
297 logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_NORMAL);
300 return $retriever_items[0];
303 function retriever_get_item($retriever_item) {
304 $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d",
305 dbesc($retriever_item['item-uri']),
306 intval($retriever_item['item-uid']),
307 intval($retriever_item['contact-id']));
308 if (count($items) != 1) {
309 logger('retriever_get_item: unexpected number of results ' .
310 count($items) . " when searching for item $uri $uid $cid", LOGGER_NORMAL);
316 function retriever_item_completed($retriever_item_id, $resource) {
317 logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG);
319 $retriever_item = retriever_get_retriever_item($retriever_item_id);
320 if (!$retriever_item) {
323 // Note: the retriever might be null. Doesn't matter.
324 $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']);
325 $item = retriever_get_item($retriever_item);
330 retriever_apply_completed_resource_to_item($retriever, $item, $resource);
332 q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d",
333 intval($retriever_item['id']));
334 retriever_check_item_completed($item);
337 function retriever_resource_completed($resource) {
338 logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG);
339 $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']);
340 foreach ($r as $rr) {
341 retriever_item_completed($rr['id'], $resource);
345 function apply_retrospective($retriever, $num) {
346 $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d",
347 intval($retriever['contact-id']), intval($num));
348 foreach ($r as $item) {
349 q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']);
350 q('UPDATE `thread` SET `visible` = 0 WHERE `iid` = %d', $item['id']);
351 retriever_on_item_insert($retriever, $item);
355 function retriever_on_item_insert($retriever, &$item) {
356 if (!$retriever || !$retriever['id']) {
357 logger('retriever_on_item_insert: No retriever supplied', LOGGER_NORMAL);
360 if (!$retriever["data"]['enable'] == "on") {
363 if ($retriever["data"]['pattern']) {
364 $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']);
365 logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA);
368 $url = $item['plink'];
371 $resource = add_retriever_resource($url);
372 $retriever_item_id = add_retriever_item($item, $resource);
375 function add_retriever_resource($url, $binary = false) {
376 logger('add_retriever_resource: ' . $url, LOGGER_DEBUG);
378 $scheme = parse_url($url, PHP_URL_SCHEME);
379 if ($scheme == 'data') {
380 $fp = fopen($url, 'r');
381 $meta = stream_get_meta_data($fp);
382 $type = $meta['mediatype'];
383 $data = stream_get_contents($fp);
386 $url = 'md5://' . hash('md5', $url);
387 $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url));
390 logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
394 logger('retrieve_resource: got data URL type ' . $resource['type'], LOGGER_DEBUG);
395 q("INSERT INTO `retriever_resource` (`type`, `binary`, `url`, `completed`, `data`) " .
396 "VALUES ('%s', %d, '%s', now(), '%s')",
398 intval($binary ? 1 : 0),
401 $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url));
404 retriever_resource_completed($resource);
409 if (strlen($url) > 800) {
410 logger('add_retriever_resource: URL is longer than 800 characters', LOGGER_NORMAL);
413 $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url));
416 logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG);
420 q("INSERT INTO `retriever_resource` (`binary`, `url`) " .
421 "VALUES (%d, '%s')", intval($binary ? 1 : 0), dbesc($url));
422 $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url));
426 function add_retriever_item(&$item, $resource) {
427 logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
429 q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " .
430 "VALUES ('%s', %d, %d, %d)",
431 dbesc($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"]));
432 $r = q("SELECT id FROM `retriever_item` WHERE " .
433 "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC",
434 dbesc($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id']));
436 logger("add_retriever_item: couldn't create retriever item for " .
437 $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'],
441 logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
445 function retriever_get_encoding($resource) {
447 if (preg_match('/charset=(.*)/', $resource['type'], $matches)) {
448 return trim(array_pop($matches));
453 function retriever_apply_xslt_text($xslt_text, $doc) {
455 logger('retriever_apply_xslt_text: empty XSLT text', LOGGER_NORMAL);
458 $xslt_doc = new DOMDocument();
459 if (!$xslt_doc->loadXML($xslt_text)) {
460 logger('retriever_apply_xslt_text: could not load XML', LOGGER_NORMAL);
463 $xp = new XsltProcessor();
464 $xp->importStylesheet($xslt_doc);
465 $result = $xp->transformToDoc($doc);
469 function retriever_apply_dom_filter($retriever, &$item, $resource) {
470 logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
472 if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) {
475 if (!$resource['data']) {
476 logger('retriever_apply_dom_filter: no text to work with', LOGGER_NORMAL);
480 $encoding = retriever_get_encoding($resource);
481 $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
482 $doc = new DOMDocument('1.0', 'UTF-8');
483 if (strpos($resource['type'], 'html') !== false) {
484 @$doc->loadHTML($content);
487 $doc->loadXML($content);
490 $params = array('$spec' => $retriever['data']);
491 $extract_template = get_markup_template('extract.tpl', 'addon/retriever/');
492 $extract_xslt = replace_macros($extract_template, $params);
493 if ($retriever['data']['include']) {
494 $doc = retriever_apply_xslt_text($extract_xslt, $doc);
496 if ($retriever['data']['customxslt']) {
497 $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
500 logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_NORMAL);
504 $components = parse_url($resource['redirect-url']);
505 $rooturl = $components['scheme'] . "://" . $components['host'];
506 $dirurl = $rooturl . dirname($components['path']) . "/";
507 $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
508 $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/');
509 $fix_urls_xslt = replace_macros($fix_urls_template, $params);
510 $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
512 logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_NORMAL);
516 $item['body'] = html2bbcode($doc->saveXML());
517 if (!strlen($item['body'])) {
518 logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_NORMAL);
521 $item['body'] .= "\n\n" . t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
522 $item['body'] .= $item['plink'];
523 $item['body'] .= ']' . $item['plink'] . '[/url]';
524 q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d",
525 dbesc($item['body']), intval($item['id']));
528 function retrieve_images(&$item) {
530 preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1);
532 preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2);
533 $matches = array_merge($matches1[3], $matches2[1]);
534 logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
535 foreach ($matches as $url) {
536 if (strpos($url, get_app()->get_baseurl()) === FALSE) {
537 $resource = add_retriever_resource($url, true);
538 if (!$resource['completed']) {
539 add_retriever_item($item, $resource);
542 retriever_transform_images($item, $resource);
548 function retriever_check_item_completed(&$item)
550 $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' .
551 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0',
552 dbesc($item['uri']), intval($item['uid']),
553 intval($item['contact-id']));
554 $waiting = $r[0]['count(*)'];
555 logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid']
556 . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG);
557 $old_visible = $item['visible'];
558 $item['visible'] = $waiting ? 0 : 1;
559 if (($item['id'] > 0) && ($old_visible != $item['visible'])) {
560 logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG);
561 q("UPDATE `item` SET `visible` = %d WHERE `id` = %d",
562 intval($item['visible']),
563 intval($item['id']));
564 q("UPDATE `thread` SET `visible` = %d WHERE `iid` = %d",
565 intval($item['visible']),
566 intval($item['id']));
570 function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) {
571 logger('retriever_apply_completed_resource_to_item: retriever ' .
572 ($retriever ? $retriever['id'] : 'none') .
573 ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG);
574 if (strpos($resource['type'], 'image') !== false) {
575 retriever_transform_images($item, $resource);
580 if ((strpos($resource['type'], 'html') !== false) ||
581 (strpos($resource['type'], 'xml') !== false)) {
582 retriever_apply_dom_filter($retriever, $item, $resource);
583 if ($retriever["data"]['images'] ) {
584 retrieve_images($item);
589 function retriever_store_photo($item, &$resource) {
590 $hash = photo_new_resource();
592 if (class_exists('Imagick')) {
594 $image = new Imagick();
595 $image->readImageBlob($resource['data']);
596 $resource['width'] = $image->getImageWidth();
597 $resource['height'] = $image->getImageHeight();
599 catch (Exception $e) {
600 logger("ImageMagick couldn't process image " . $resource['id'] . " " . $resource['url'] . ' length ' . strlen($resource['data']) . ': ' . $e->getMessage(), LOGGER_DEBUG);
604 if (!array_key_exists('width', $resource)) {
605 $image = @imagecreatefromstring($resource['data']);
606 if ($image === false) {
607 logger("Couldn't process image " . $resource['id'] . " " . $resource['url'], LOGGER_DEBUG);
610 $resource['width'] = imagesx($image);
611 $resource['height'] = imagesy($image);
612 imagedestroy($image);
615 $url_components = parse_url($resource['url']);
616 $filename = basename($url_components['path']);
617 if (!strlen($filename)) {
620 $r = q("INSERT INTO `photo`
621 ( `uid`, `contact-id`, `guid`, `resource-id`, `created`, `edited`, `filename`, `type`, `album`, `height`, `width`, `datasize`, `data` )
622 VALUES ( %d, %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, %d, '%s' )",
623 intval($item['item-uid']),
624 intval($item['contact-id']),
627 dbesc(datetime_convert()),
628 dbesc(datetime_convert()),
630 dbesc($resource['type']),
631 dbesc('Retrieved Images'),
632 intval($resource['height']),
633 intval($resource['width']),
634 intval(strlen($resource['data'])),
635 dbesc($resource['data'])
641 function retriever_transform_images(&$item, $resource) {
642 if (!$resource["data"]) {
643 logger('retriever_transform_images: no data available for '
644 . $resource['id'] . ' ' . $resource['url'], LOGGER_NORMAL);
648 $hash = retriever_store_photo($item, $resource);
649 if ($hash === false) {
650 logger('retriever_transform_images: unable to store photo '
651 . $resource['id'] . ' ' . $resource['url'], LOGGER_NORMAL);
655 $new_url = get_app()->get_baseurl() . '/photo/' . $hash;
656 logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' .
657 $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG);
658 $transformed = str_replace($resource["url"], $new_url, $item['body']);
659 if ($transformed === $item['body']) {
663 $item['body'] = $transformed;
664 q("UPDATE `item` SET `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d",
665 dbesc($item['body']),
666 dbesc($item['plink']),
667 intval($item['uid']),
668 intval($item['contact-id']));
671 function retriever_content($a) {
673 $a->page['content'] .= "<p>Please log in</p>";
676 if ($a->argv[1] === 'help') {
677 $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'",
679 foreach ($feeds as $k=>$v) {
680 $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id'];
682 $template = get_markup_template('/help.tpl', 'addon/retriever/');
683 $a->page['content'] .= replace_macros($template, array(
684 '$config' => $a->get_baseurl() . '/settings/addon',
685 '$feeds' => $feeds));
689 $retriever = get_retriever($a->argv[1], local_user(), false);
691 if (x($_POST["id"])) {
692 $retriever = get_retriever($a->argv[1], local_user(), true);
693 $retriever["data"] = array();
694 foreach (array('pattern', 'replace', 'enable', 'images', 'customxslt') as $setting) {
695 if (x($_POST['retriever_' . $setting])) {
696 $retriever["data"][$setting] = $_POST['retriever_' . $setting];
699 foreach ($_POST as $k=>$v) {
700 if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) {
701 $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v;
704 // You've gotta have an element, even if it's just "*"
705 foreach ($retriever['data']['include'] as $k=>$clause) {
706 if (!$clause['element']) {
707 unset($retriever['data']['include'][$k]);
710 foreach ($retriever['data']['exclude'] as $k=>$clause) {
711 if (!$clause['element']) {
712 unset($retriever['data']['exclude'][$k]);
715 q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d",
716 dbesc(json_encode($retriever["data"])), intval($retriever["id"]));
717 $a->page['content'] .= "<p><b>Settings Updated";
718 if (x($_POST["retriever_retrospective"])) {
719 apply_retrospective($retriever, $_POST["retriever_retrospective"]);
720 $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts";
722 $a->page['content'] .= ".</p></b>";
725 $template = get_markup_template('/rule-config.tpl', 'addon/retriever/');
726 $a->page['content'] .= replace_macros($template, array(
730 $retriever['data']['enable']),
734 $retriever["data"]['pattern'],
735 t('Regular expression matching part of the URL to replace')),
739 $retriever["data"]['replace'],
740 t('Text to replace matching part of above regular expression')),
743 t('Download Images'),
744 $retriever['data']['images']),
745 '$retrospective' => array(
746 'retriever_retrospective',
747 t('Retrospectively Apply'),
749 t('Reapply the rules to this number of posts')),
750 '$customxslt' => array(
751 'retriever_customxslt',
753 $retriever['data']['customxslt'],
754 t("When standard rules aren't enough, apply custom XSLT to the article")),
755 '$title' => t('Retrieve Feed Content'),
756 '$help' => $a->get_baseurl() . '/retriever/help',
757 '$help_t' => t('Get Help'),
758 '$submit_t' => t('Submit'),
759 '$submit' => t('Save Settings'),
760 '$id' => ($retriever["id"] ? $retriever["id"] : "create"),
761 '$tag_t' => t('Tag'),
762 '$attribute_t' => t('Attribute'),
763 '$value_t' => t('Value'),
764 '$add_t' => t('Add'),
765 '$remove_t' => t('Remove'),
766 '$include_t' => t('Include'),
767 '$include' => $retriever['data']['include'],
768 '$exclude_t' => t('Exclude'),
769 '$exclude' => $retriever["data"]['exclude']));
774 function retriever_contact_photo_menu($a, &$args) {
778 if ($args["contact"]["network"] == "feed") {
779 $args["menu"][ 'retriever' ] = array(t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']);
783 function retriever_post_remote_hook(&$a, &$item) {
784 logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG);
786 $retriever = get_retriever($item['contact-id'], $item["uid"], false);
788 retriever_on_item_insert($retriever, $item);
791 if (get_pconfig($item["uid"], 'retriever', 'oembed')) {
792 // Convert to HTML and back to take advantage of bbcode's resolution of oembeds.
793 $body = html2bbcode(bbcode($item['body']));
795 $item['body'] = $body;
798 if (get_pconfig($item["uid"], 'retriever', 'all_photos')) {
799 retrieve_images($item, null);
802 retriever_check_item_completed($item);
805 function retriever_plugin_settings(&$a,&$s) {
806 $all_photos = get_pconfig(local_user(), 'retriever', 'all_photos');
807 $oembed = get_pconfig(local_user(), 'retriever', 'oembed');
808 $template = get_markup_template('/settings.tpl', 'addon/retriever/');
809 $s .= replace_macros($template, array(
810 '$allphotos' => array(
811 'retriever_all_photos',
814 t('Check this to retrieve photos for all posts')),
819 t('Check this to attempt to retrieve embedded content for all posts - useful e.g. for Facebook posts')),
820 '$submit' => t('Save Settings'),
821 '$title' => t('Retriever Settings'),
822 '$help' => $a->get_baseurl() . '/retriever/help'));
825 function retriever_plugin_settings_post($a,$post) {
826 if ($_POST['retriever_all_photos']) {
827 set_pconfig(local_user(), 'retriever', 'all_photos', $_POST['retriever_all_photos']);
830 del_pconfig(local_user(), 'retriever', 'all_photos');
832 if ($_POST['retriever_oembed']) {
833 set_pconfig(local_user(), 'retriever', 'oembed', $_POST['retriever_oembed']);
836 del_pconfig(local_user(), 'retriever', 'oembed');