]> git.mxchange.org Git - friendica.git/blob - src/Content/OEmbed.php
Add check for allowed URL in OEmbed
[friendica.git] / src / Content / OEmbed.php
1 <?php\r
2 \r
3 /**\r
4  * @file src/Content/OEmbed.php\r
5  */\r
6 \r
7 namespace Friendica\Content;\r
8 \r
9 use Friendica\Core\Cache;\r
10 use Friendica\Core\System;\r
11 use Friendica\Core\Config;\r
12 use Friendica\Database\DBM;\r
13 use Friendica\ParseUrl;\r
14 use dba;\r
15 use DOMDocument;\r
16 use DOMXPath;\r
17 use DOMNode;\r
18 \r
19 require_once 'include/dba.php';\r
20 require_once 'mod/proxy.php';\r
21 \r
22 /**\r
23  * Handles all OEmbed content fetching and replacement\r
24  *\r
25  * OEmbed is a standard used to allow an embedded representation of a URL on\r
26  * third party sites\r
27  *\r
28  * @see https://oembed.com\r
29  *\r
30  * @author Hypolite Petovan <mrpetovan@gmail.com>\r
31  */\r
32 class OEmbed\r
33 {\r
34         public static function replaceCallback($matches)\r
35         {\r
36                 $embedurl = $matches[1];\r
37                 $j = self::fetchURL($embedurl);\r
38                 $s = self::formatObject($j);\r
39 \r
40                 return $s;\r
41         }\r
42 \r
43         /**\r
44          * @brief Get data from an URL to embed its content.\r
45          *\r
46          * @param string $embedurl The URL from which the data should be fetched.\r
47          * @param bool $no_rich_type If set to true rich type content won't be fetched.\r
48          *\r
49          * @return bool|object Returns object with embed content or false if no embedable\r
50          *       content exists\r
51          */\r
52         public static function fetchURL($embedurl, $no_rich_type = false)\r
53         {\r
54                 $embedurl = trim($embedurl, "'");\r
55                 $embedurl = trim($embedurl, '"');\r
56 \r
57                 $a = get_app();\r
58 \r
59                 $condition = array('url' => normalise_link($embedurl));\r
60                 $r = dba::select('oembed', array('content'), $condition, array('limit' => 1));\r
61 \r
62                 if (DBM::is_result($r)) {\r
63                         $txt = $r["content"];\r
64                 } else {\r
65                         $txt = Cache::get($a->videowidth . $embedurl);\r
66                 }\r
67                 // These media files should now be caught in bbcode.php\r
68                 // left here as a fallback in case this is called from another source\r
69 \r
70                 $noexts = array("mp3", "mp4", "ogg", "ogv", "oga", "ogm", "webm");\r
71                 $ext = pathinfo(strtolower($embedurl), PATHINFO_EXTENSION);\r
72 \r
73 \r
74                 if (is_null($txt)) {\r
75                         $txt = "";\r
76 \r
77                         if (!in_array($ext, $noexts)) {\r
78                                 // try oembed autodiscovery\r
79                                 $redirects = 0;\r
80                                 $html_text = fetch_url($embedurl, false, $redirects, 15, "text/*");\r
81                                 if ($html_text) {\r
82                                         $dom = @DOMDocument::loadHTML($html_text);\r
83                                         if ($dom) {\r
84                                                 $xpath = new DOMXPath($dom);\r
85                                                 $entries = $xpath->query("//link[@type='application/json+oembed']");\r
86                                                 foreach ($entries as $e) {\r
87                                                         $href = $e->getAttributeNode("href")->nodeValue;\r
88                                                         $txt = fetch_url($href . '&maxwidth=' . $a->videowidth);\r
89                                                         break;\r
90                                                 }\r
91                                                 $entries = $xpath->query("//link[@type='text/json+oembed']");\r
92                                                 foreach ($entries as $e) {\r
93                                                         $href = $e->getAttributeNode("href")->nodeValue;\r
94                                                         $txt = fetch_url($href . '&maxwidth=' . $a->videowidth);\r
95                                                         break;\r
96                                                 }\r
97                                         }\r
98                                 }\r
99                         }\r
100 \r
101                         $txt = trim($txt);\r
102 \r
103                         if (!$txt || $txt[0] != "{") {\r
104                                 $txt = '{"type":"error"}';\r
105                         } else { //save in cache\r
106                                 $j = json_decode($txt);\r
107                                 if ($j->type != "error") {\r
108                                         dba::insert('oembed', array('url' => normalise_link($embedurl),\r
109                                                 'content' => $txt, 'created' => datetime_convert()), true);\r
110                                 }\r
111 \r
112                                 Cache::set($a->videowidth . $embedurl, $txt, CACHE_DAY);\r
113                         }\r
114                 }\r
115 \r
116                 $j = json_decode($txt);\r
117 \r
118                 if (!is_object($j)) {\r
119                         return false;\r
120                 }\r
121 \r
122                 // Always embed the SSL version\r
123                 if (isset($j->html)) {\r
124                         $j->html = str_replace(array("http://www.youtube.com/", "http://player.vimeo.com/"), array("https://www.youtube.com/", "https://player.vimeo.com/"), $j->html);\r
125                 }\r
126 \r
127                 $j->embedurl = $embedurl;\r
128 \r
129                 // If fetching information doesn't work, then improve via internal functions\r
130                 if (($j->type == "error") || ($no_rich_type && ($j->type == "rich"))) {\r
131                         $data = ParseUrl::getSiteinfoCached($embedurl, true, false);\r
132                         $j->type = $data["type"];\r
133 \r
134                         if ($j->type == "photo") {\r
135                                 $j->url = $data["url"];\r
136                                 //$j->width = $data["images"][0]["width"];\r
137                                 //$j->height = $data["images"][0]["height"];\r
138                         }\r
139 \r
140                         if (isset($data["title"])) {\r
141                                 $j->title = $data["title"];\r
142                         }\r
143 \r
144                         if (isset($data["text"])) {\r
145                                 $j->description = $data["text"];\r
146                         }\r
147 \r
148                         if (is_array($data["images"])) {\r
149                                 $j->thumbnail_url = $data["images"][0]["src"];\r
150                                 $j->thumbnail_width = $data["images"][0]["width"];\r
151                                 $j->thumbnail_height = $data["images"][0]["height"];\r
152                         }\r
153                 }\r
154 \r
155                 call_hooks('oembed_fetch_url', $embedurl, $j);\r
156 \r
157                 return $j;\r
158         }\r
159 \r
160         public static function formatObject($j)\r
161         {\r
162                 $embedurl = $j->embedurl;\r
163                 $jhtml = self::iframe($j->embedurl, (isset($j->width) ? $j->width : null), (isset($j->height) ? $j->height : null));\r
164                 $ret = "<span class='oembed " . $j->type . "'>";\r
165                 switch ($j->type) {\r
166                         case "video":\r
167                                 if (isset($j->thumbnail_url)) {\r
168                                         $tw = (isset($j->thumbnail_width) && intval($j->thumbnail_width)) ? $j->thumbnail_width : 200;\r
169                                         $th = (isset($j->thumbnail_height) && intval($j->thumbnail_height)) ? $j->thumbnail_height : 180;\r
170                                         // make sure we don't attempt divide by zero, fallback is a 1:1 ratio\r
171                                         $tr = (($th) ? $tw / $th : 1);\r
172 \r
173                                         $th = 120;\r
174                                         $tw = $th * $tr;\r
175                                         $tpl = get_markup_template('oembed_video.tpl');\r
176                                         $ret.=replace_macros($tpl, array(\r
177                                                 '$baseurl' => System::baseUrl(),\r
178                                                 '$embedurl' => $embedurl,\r
179                                                 '$escapedhtml' => base64_encode($jhtml),\r
180                                                 '$tw' => $tw,\r
181                                                 '$th' => $th,\r
182                                                 '$turl' => $j->thumbnail_url,\r
183                                         ));\r
184                                 } else {\r
185                                         $ret = $jhtml;\r
186                                 }\r
187                                 //$ret.="<br>";\r
188                                 break;\r
189                         case "photo":\r
190                                 $ret.= "<img width='" . $j->width . "' src='" . proxy_url($j->url) . "'>";\r
191                                 break;\r
192                         case "link":\r
193                                 break;\r
194                         case "rich":\r
195                                 // not so safe..\r
196                                 if (self::isAllowedURL($embedurl)) {\r
197                                         $ret .= proxy_parse_html($jhtml);\r
198                                 }\r
199                                 break;\r
200                 }\r
201 \r
202                 // add link to source if not present in "rich" type\r
203                 if ($j->type != 'rich' || !strpos($j->html, $embedurl)) {\r
204                         $ret .= "<h4>";\r
205                         if (isset($j->title)) {\r
206                                 if (isset($j->provider_name)) {\r
207                                         $ret .= $j->provider_name . ": ";\r
208                                 }\r
209 \r
210                                 $embedlink = (isset($j->title)) ? $j->title : $embedurl;\r
211                                 $ret .= "<a href='$embedurl' rel='oembed'>$embedlink</a>";\r
212                                 if (isset($j->author_name)) {\r
213                                         $ret.=" (" . $j->author_name . ")";\r
214                                 }\r
215                         } elseif (isset($j->provider_name) || isset($j->author_name)) {\r
216                                 $embedlink = "";\r
217                                 if (isset($j->provider_name)) {\r
218                                         $embedlink .= $j->provider_name;\r
219                                 }\r
220 \r
221                                 if (isset($j->author_name)) {\r
222                                         if ($embedlink != "") {\r
223                                                 $embedlink .= ": ";\r
224                                         }\r
225 \r
226                                         $embedlink .= $j->author_name;\r
227                                 }\r
228                                 if (trim($embedlink) == "") {\r
229                                         $embedlink = $embedurl;\r
230                                 }\r
231 \r
232                                 $ret .= "<a href='$embedurl' rel='oembed'>$embedlink</a>";\r
233                         }\r
234                         //if (isset($j->author_name)) $ret.=" by ".$j->author_name;\r
235                         //if (isset($j->provider_name)) $ret.=" on ".$j->provider_name;\r
236                         $ret .= "</h4>";\r
237                 } else {\r
238                         // add <a> for html2bbcode conversion\r
239                         $ret .= "<a href='$embedurl' rel='oembed'>$embedurl</a>";\r
240                 }\r
241                 $ret.="</span>";\r
242                 $ret = str_replace("\n", "", $ret);\r
243                 return mb_convert_encoding($ret, 'HTML-ENTITIES', mb_detect_encoding($ret));\r
244         }\r
245 \r
246         public static function BBCode2HTML($text)\r
247         {\r
248                 $stopoembed = Config::get("system", "no_oembed");\r
249                 if ($stopoembed == true) {\r
250                         return preg_replace("/\[embed\](.+?)\[\/embed\]/is", "<!-- oembed $1 --><i>" . t('Embedding disabled') . " : $1</i><!-- /oembed $1 -->", $text);\r
251                 }\r
252                 return preg_replace_callback("/\[embed\](.+?)\[\/embed\]/is", ['self', 'replaceCallback'], $text);\r
253         }\r
254 \r
255         /**\r
256          * Find <span class='oembed'>..<a href='url' rel='oembed'>..</a></span>\r
257          * and replace it with [embed]url[/embed]\r
258          */\r
259         public static function HTML2BBCode($text)\r
260         {\r
261                 // start parser only if 'oembed' is in text\r
262                 if (strpos($text, "oembed")) {\r
263 \r
264                         // convert non ascii chars to html entities\r
265                         $html_text = mb_convert_encoding($text, 'HTML-ENTITIES', mb_detect_encoding($text));\r
266 \r
267                         // If it doesn't parse at all, just return the text.\r
268                         $dom = @DOMDocument::loadHTML($html_text);\r
269                         if (!$dom) {\r
270                                 return $text;\r
271                         }\r
272                         $xpath = new DOMXPath($dom);\r
273 \r
274                         $xattr = self::buildXPath("class", "oembed");\r
275                         $entries = $xpath->query("//span[$xattr]");\r
276 \r
277                         $xattr = "@rel='oembed'"; //oe_build_xpath("rel","oembed");\r
278                         foreach ($entries as $e) {\r
279                                 $href = $xpath->evaluate("a[$xattr]/@href", $e)->item(0)->nodeValue;\r
280                                 if (!is_null($href)) {\r
281                                         $e->parentNode->replaceChild(new DOMText("[embed]" . $href . "[/embed]"), $e);\r
282                                 }\r
283                         }\r
284                         return self::getInnerHTML($dom->getElementsByTagName("body")->item(0));\r
285                 } else {\r
286                         return $text;\r
287                 }\r
288         }\r
289 \r
290         /**\r
291          * @brief Generates the iframe HTML for an oembed attachment.\r
292          *\r
293          * Width and height are given by the remote, and are regularly too small for\r
294          * the generated iframe.\r
295          *\r
296          * The width is entirely discarded for the actual width of the post, while fixed\r
297          * height is used as a starting point before the inevitable resizing.\r
298          *\r
299          * Since the iframe is automatically resized on load, there are no need for ugly\r
300          * and impractical scrollbars.\r
301          *\r
302          * @param string $src Original remote URL to embed\r
303          * @param string $width\r
304          * @param string $height\r
305          * @return string formatted HTML\r
306          *\r
307          * @see oembed_format_object()\r
308          */\r
309         private static function iframe($src, $width, $height)\r
310         {\r
311                 $a = get_app();\r
312 \r
313                 if (!$height || strstr($height, '%')) {\r
314                         $height = '200';\r
315                 }\r
316                 $width = '100%';\r
317 \r
318                 // Only proxy OEmbed URLs to avoid mixed-content errors\r
319                 if (Config::get('system', 'ssl_policy') == SSL_POLICY_FULL && parse_url($src, PHP_URL_SCHEME) !== 'https') {\r
320                         $src = System::baseUrl() . '/oembed/' . base64url_encode($src);\r
321                 }\r
322                 return '<iframe onload="resizeIframe(this);" class="embed_rich" height="' . $height . '" width="' . $width . '" src="' . $s . '" allowfullscreen scrolling="no" frameborder="no">' . t('Embedded content') . '</iframe>';\r
323         }\r
324 \r
325         /**\r
326          * Generates an XPath query to select elements whose provided attribute contains\r
327          * the provided value in a space-separated list.\r
328          *\r
329          * @brief Generates attribute search XPath string\r
330          *\r
331          * @param string $attr Name of the attribute to seach\r
332          * @param string $value Value to search in a space-separated list\r
333          * @return string\r
334          */\r
335         private static function buildXPath($attr, $value)\r
336         {\r
337                 // https://www.westhoffswelt.de/blog/2009/6/9/select-html-elements-with-more-than-one-css-class-using-xpath\r
338                 return "contains(normalize-space(@$attr), ' $value ') or substring(normalize-space(@$attr), 1, string-length('$value') + 1) = '$value ' or substring(normalize-space(@$attr), string-length(@$attr) - string-length('$value')) = ' $value' or @$attr = '$value'";\r
339         }\r
340 \r
341         /**\r
342          * Returns the inner XML string of a provided DOMNode\r
343          *\r
344          * @brief Returns the inner XML string of a provided DOMNode\r
345          *\r
346          * @param DOMNode $node\r
347          * @return string\r
348          */\r
349         private static function getInnerHTML(DOMNode $node)\r
350         {\r
351                 $innerHTML = '';\r
352                 $children = $node->childNodes;\r
353                 foreach ($children as $child) {\r
354                         $innerHTML .= $child->ownerDocument->saveXML($child);\r
355                 }\r
356                 return $innerHTML;\r
357         }\r
358 \r
359         /**\r
360          * Determines if rich content OEmbed is allowed for the provided URL\r
361          *\r
362          * @brief Determines if rich content OEmbed is allowed for the provided URL\r
363          * @param string $url\r
364          * @return boolean\r
365          */\r
366         private static function isAllowedURL($url)\r
367         {\r
368                 if (!Config::get('system', 'no_oembed_rich_content')) {\r
369                         return true;\r
370                 }\r
371 \r
372                 $domain = parse_url($url, PHP_URL_HOST);\r
373 \r
374                 $str_allowed = Config::get('system', 'allowed_oembed', '');\r
375                 $allowed = explode(',', $str_allowed);\r
376 \r
377                 return allowed_domain($domain, $allowed, true);\r
378         }\r
379 }\r