{
static public function fromBBCode(string $bbcode, int $uri_id): array
{
- $npf = [];
-
$bbcode = self::prepareBody($bbcode);
$html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
}
$doc = new DOMDocument();
+ $doc->formatOutput = true;
if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
return [];
}
$element = $doc->getElementsByTagName('body')->item(0);
- $npf = self::routeChildren($element, $uri_id, 0, $npf);
+ echo $element->ownerDocument->saveHTML($element) . "\n";
+
+ $npf = [];
+ $text = '';
+ $formatting = [];
+
+ self::routeChildren($element, $uri_id, true, [], $npf, $text, $formatting);
- return self::addLinkBlock($uri_id, 0, $npf);
+ return self::addLinkBlockForUriId($uri_id, 0, $npf);
}
static private function prepareBody(string $body): string
return trim($body);
}
- static private function routeChildren(DOMElement $element, int $uri_id, int $level, array $npf): array
+ static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array &$npf, string &$text, array &$formatting)
{
- $text = '';
- $formatting = [];
+ if ($parse_structure && $text) {
+ self::addBlock($text, $formatting, $npf, $callstack);
+ }
+
+ $callstack[] = $element->nodeName;
+ $level = self::getLevelByCallstack($callstack);
foreach ($element->childNodes as $child) {
switch ($child->nodeName) {
- case 'blockquote':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addQuoteBlock($child, $uri_id, $level, $npf);
+ case 'b':
+ case 'strong':
+ self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
break;
- case 'h1':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
+ case 'i':
+ case 'em':
+ self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
break;
- case 'h2':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
+ case 's':
+ self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
break;
-
- case 'h3':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading1');
+
+ case 'u':
+ case 'span':
+ self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
break;
-
- case 'h4':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
+
+ case 'hr':
+ case 'br':
+ if (!empty($text)) {
+ $text .= "\n";
+ }
break;
-
- case 'h5':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
+
+ case '#text':
+ $text .= $child->textContent;
break;
-
- case 'h6':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'heading2');
+
+ case 'table':
+ case 'summary':
+ // Ignore tables and spoilers
break;
-
+
+ case 'a':
+ if ($text) {
+ self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
+ } else {
+ $npf = self::addLinkBlock($child, $uri_id, $level, $npf);
+ }
+ break;
+
+ case 'img':
+ $npf = self::addImageBlock($child, $uri_id, $level, $npf);
+ break;
+
+ case 'ol':
+ case 'div':
+ case 'h1':
+ case 'h2':
+ case 'h3':
+ case 'h4':
+ case 'h5':
+ case 'h6':
+ case 'blockquote':
+ case 'p':
+ case 'pre':
+ case 'code':
case 'ul':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addListBlock($child, $uri_id, $level, $npf, false, 0);
+ case 'li':
+ case 'details':
+ self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
break;
-
+
+ default:
+ print_r($npf);
+ print_r($callstack);
+ die($child . "\n");
+ }
+ }
+
+ if ($parse_structure && $text) {
+ self::addBlock($text, $formatting, $npf, $callstack);
+ }
+ }
+
+ static private function getLevelByCallstack($callstack): int
+ {
+ $level = 0;
+ foreach ($callstack as $entry) {
+ if (in_array($entry, ['ol', 'ul', 'blockquote'])) {
+ ++$level;
+ }
+ }
+ return max(0, $level - 1);
+ }
+
+ static private function getSubTypeByCallstack($callstack): string
+ {
+ $subtype = '';
+ foreach ($callstack as $entry) {
+ switch ($entry) {
case 'ol':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addListBlock($child, $uri_id, $level, $npf, true, 0);
+ $subtype = 'ordered-list-item';
break;
-
- case 'hr':
- case 'br':
- $text .= "\n";
+
+ case 'ul':
+ $subtype = 'unordered-list-item';
+ break;
+
+ case 'h1':
+ $subtype = 'heading1';
break;
- case 'pre':
- case 'code':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf, 'indented');
+ case 'h2':
+ $subtype = 'heading1';
break;
- case 'a':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addMediaBlock($child, $uri_id, $level, $npf);
+ case 'h3':
+ $subtype = 'heading1';
break;
- case 'table':
- // Unsupported
- // $child->ownerDocument->saveHTML($child)
+ case 'h4':
+ $subtype = 'heading2';
break;
- case 'img':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addImageBlock($child, $uri_id, $level, $npf);
+ case 'h5':
+ $subtype = 'heading2';
break;
-
- case 'p':
- case 'div':
- $npf = self::addText($text, $formatting, $npf);
- $npf = self::addTextBlock($child, $uri_id, $level, $npf);
+
+ case 'h6':
+ $subtype = 'heading2';
break;
-
- default:
- $text .= $child->textContent;
+
+ case 'blockquote':
+ case 'pre':
+ case 'code':
+ $subtype = 'indented';
break;
}
}
- return $npf;
+ return $subtype;
}
- static private function addText(string $text, array $formatting, array $npf): array
+ static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array &$npf, string &$text, array &$formatting)
{
- if (empty($text)) {
- return $npf;
+ $start = mb_strlen($text);
+ self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
+
+ if (!empty($type)) {
+ $formatting[] = [
+ 'start' => $start,
+ 'end' => mb_strlen($text),
+ 'type' => $type
+ ];
}
+ }
+
+ static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array &$npf, string &$text, array &$formatting)
+ {
+ $start = mb_strlen($text);
+ self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
+
+ $attributes = [];
+ foreach ($element->attributes as $key => $attribute) {
+ $attributes[$key] = trim($attribute->value);
+ }
+ if (!empty($attributes['href'])) {
+ $formatting[] = [
+ 'start' => $start,
+ 'end' => mb_strlen($text),
+ 'type' => 'link',
+ 'url' => $attributes['href']
+ ];
+ }
+ }
+
+ static private function addBlock(string &$text, array &$formatting, array &$npf, array $callstack)
+ {
$block = [
- 'type' => 'text',
- 'text' => $text,
+ 'callstack' => $callstack,
+ 'type' => 'text',
+ 'text' => $text,
];
if (!empty($formatting)) {
$block['formatting'] = $formatting;
}
+ $level = self::getLevelByCallstack($callstack);
+ if ($level > 0) {
+ $block['indent_level'] = $level;
+ }
+
+ $subtype = self::getSubTypeByCallstack($callstack);
+ if ($subtype) {
+ $block['subtype'] = $subtype;
+ }
+
$npf[] = $block;
+ $text = '';
+ $formatting = [];
+ }
- return $npf;
+ static private function addPoster(array $media, array $block): array
+ {
+ $poster = [];
+ if (!empty($media['preview'])) {
+ $poster['url'] = $media['preview'];
+ }
+ if (!empty($media['preview-width'])) {
+ $poster['width'] = $media['preview-width'];
+ }
+ if (!empty($media['preview-height'])) {
+ $poster['height'] = $media['preview-height'];
+ }
+ if (!empty($poster)) {
+ $block['poster'] = $poster;
+ }
+ return $block;
}
- static private function routeElement(DOMElement $element, int $uri_id, int $level, array $npf): array
+ static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
{
- switch ($element->nodeName) {
- case 'blockquote':
- $npf = self::addQuoteBlock($element, $uri_id, $level, $npf);
- break;
-
- case 'h1':
- $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
- break;
-
- case 'h2':
- $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
- break;
-
- case 'h3':
- $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading1');
- break;
-
- case 'h4':
- $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
- break;
-
- case 'h5':
- $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
- break;
-
- case 'h6':
- $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'heading2');
- break;
-
- case 'ul':
- $npf = self::addListBlock($element, $uri_id, $level, $npf, false, 0);
- break;
-
- case 'ol':
- $npf = self::addListBlock($element, $uri_id, $level, $npf, true, 0);
- break;
-
- case 'hr':
- case 'br':
- break;
-
- case 'pre':
- case 'code':
- $npf = self::addTextBlock($element, $uri_id, $level, $npf, 'indented');
- break;
-
- case 'a':
- $npf = self::addMediaBlock($element, $uri_id, $level, $npf);
- break;
-
- case 'table':
- // Unsupported
- // $element->ownerDocument->saveHTML($element)
- break;
-
- case 'img':
- $npf = self::addImageBlock($element, $uri_id, $level, $npf);
- break;
-
- default:
- $npf = self::addTextBlock($element, $uri_id, $level, $npf);
- break;
+ foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
+ $host = parse_url($link['url'], PHP_URL_HOST);
+ if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
+ $block = [
+ 'type' => 'video',
+ 'provider' => 'youtube',
+ 'url' => $link['url'],
+ ];
+ } elseif (in_array($host, ['vimeo.com'])) {
+ $block = [
+ 'type' => 'video',
+ 'provider' => 'vimeo',
+ 'url' => $link['url'],
+ ];
+ } elseif (in_array($host, ['open.spotify.com'])) {
+ $block = [
+ 'type' => 'audio',
+ 'provider' => 'spotify',
+ 'url' => $link['url'],
+ ];
+ } else {
+ $block = [
+ 'type' => 'link',
+ 'url' => $link['url'],
+ ];
+ if (!empty($link['name'])) {
+ $block['title'] = $link['name'];
+ }
+ if (!empty($link['description'])) {
+ $block['description'] = $link['description'];
+ }
+ if (!empty($link['author-name'])) {
+ $block['author'] = $link['author-name'];
+ }
+ if (!empty($link['publisher-name'])) {
+ $block['site_name'] = $link['publisher-name'];
+ }
+ }
+
+ if ($level > 0) {
+ $block['indent_level'] = $level;
+ }
+
+ $npf[] = self::addPoster($link, $block);
}
return $npf;
}
{
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
- $attributes[$key] = $attribute->value;
+ $attributes[$key] = trim($attribute->value);
}
if (empty($attributes['src'])) {
return $npf;
$block['alt_text'] = $attributes['alt'];
}
- if (!empty($attributes['title']) && ($attributes['alt'] ?? '' != $attributes['title'])) {
+ if (!empty($attributes['title']) && (($attributes['alt'] ?? '') != $attributes['title'])) {
$block['caption'] = $attributes['title'];
}
return $npf;
}
- static private function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
+ static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
- $attributes[$key] = $attribute->value;
+ $attributes[$key] = trim($attribute->value);
}
if (empty($attributes['href'])) {
return $npf;
return $npf;
}
-
- static private function addPoster(array $media, array $block): array
- {
- $poster = [];
- if (!empty($media['preview'])) {
- $poster['url'] = $media['preview'];
- }
- if (!empty($media['preview-width'])) {
- $poster['width'] = $media['preview-width'];
- }
- if (!empty($media['preview-height'])) {
- $poster['height'] = $media['preview-height'];
- }
- if (!empty($poster)) {
- $block['poster'] = $poster;
- }
- return $block;
- }
-
- static private function getTypeForNodeName(string $nodename): string
- {
- switch ($nodename) {
- case 'b':
- case 'strong':
- return 'bold';
-
- case 'i':
- case 'em':
- return 'italic';
-
- case 's':
- return 'strikethrough';
- }
- return '';
- }
-
- static private function fetchText(DOMElement $element, array $text = ['text' => '', 'formatting' => []]): array
- {
- foreach ($element->childNodes as $child) {
- $start = strlen($text['text']);
-
- $type = self::getTypeForNodeName($child->nodeName);
-
- if ($child->nodeName == 'br') {
- $text['text'] .= "\n";
- } elseif (($type != '') || in_array($child->nodeName, ['#text', 'code', 'a', 'p', 'span', 'u', 'img', 'summary', 'ul', 'blockquote', 'h3', 'ol'])) {
- $text['text'] .= $child->textContent;
- } else {
- echo $element->ownerDocument->saveHTML($element) . "\n";
- die($child->nodeName . "\n");
- }
- if (!empty($type)) {
- $text['formatting'][] = ['start' => $start, 'end' => strlen($text['text']), 'type' => $type];
- }
- }
- return $text;
- }
-
- static private function addQuoteBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
- {
- $block = ['type' => 'text', 'subtype' => 'indented'];
-
- if ($level > 0) {
- $block['indent_level'] = $level;
- }
-
- $npf[] = $block;
-
- $npf = self::routeChildren($element, $uri_id, 0, $npf);
-
- return $npf;
- }
-
- static private function addTextBlock(DOMElement $element, int $uri_id, int $level, array $npf, string $subtype = ''): array
- {
- if (empty($subtype) && ($element->textContent == $element->firstChild->textContent) && ($element->firstChild->nodeName != '#text')) {
- return self::routeElement($element->firstChild, $uri_id, $level, $npf);
- }
-
- $block = ['type' => 'text'];
-
- if (!empty($subtype)) {
- $block['subtype'] = $subtype;
- }
-
- $text = self::fetchText($element);
-
- $block['text'] = $text['text'];
- $block['formatting'] = $text['formatting'];
-
- if (empty($subtype)) {
- $type = self::getTypeForNodeName($element->nodeName);
- if (!empty($type)) {
- $block['formatting'][] = ['start' => 0, 'end' => strlen($block['text']), 'type' => $type];
- }
- }
-
- if (empty($block['formatting'])) {
- unset($block['formatting']);
- }
-
- if ($level > 0) {
- $block['indent_level'] = $level;
- }
-
- $npf[] = $block;
-
- return $npf;
- }
-
- static private function addListBlock(DOMElement $element, int $uri_id, int $level, array $npf, bool $ordered): array
- {
- foreach ($element->childNodes as $child) {
- switch ($child->nodeName) {
- case 'ul':
- $npf = self::addListBlock($child, $uri_id, $level++, $npf, false);
- case 'ol':
- $npf = self::addListBlock($child, $uri_id, $level++, $npf, true);
- case 'li':
- $text = self::fetchText($child);
-
- $block = [
- 'type' => 'text',
- 'subtype' => $ordered ? 'ordered-list-item' : 'unordered-list-item',
- 'text' => $text['text']
- ];
- if ($level > 0) {
- $block['indent_level'] = $level;
- }
- if (!empty($text['formatting'])) {
- $block['formatting'] = $text['formatting'];
- }
- $npf[] = $block;
- }
- }
-
- return $npf;
- }
-
- static private function addLinkBlock(int $uri_id, int $level, array $npf): array
- {
- foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
- $host = parse_url($link['url'], PHP_URL_HOST);
- if (in_array($host, ['www.youtube.com', 'youtu.be'])) {
- $block = [
- 'type' => 'video',
- 'provider' => 'youtube',
- 'url' => $link['url'],
- ];
- } elseif (in_array($host, ['vimeo.com'])) {
- $block = [
- 'type' => 'video',
- 'provider' => 'vimeo',
- 'url' => $link['url'],
- ];
- } elseif (in_array($host, ['open.spotify.com'])) {
- $block = [
- 'type' => 'audio',
- 'provider' => 'spotify',
- 'url' => $link['url'],
- ];
- } else {
- $block = [
- 'type' => 'link',
- 'url' => $link['url'],
- ];
- if (!empty($link['name'])) {
- $block['title'] = $link['name'];
- }
- if (!empty($link['description'])) {
- $block['description'] = $link['description'];
- }
- if (!empty($link['author-name'])) {
- $block['author'] = $link['author-name'];
- }
- if (!empty($link['publisher-name'])) {
- $block['site_name'] = $link['publisher-name'];
- }
- }
-
- if ($level > 0) {
- $block['indent_level'] = $level;
- }
-
- $npf[] = self::addPoster($link, $block);
- }
- return $npf;
- }
}