*/
class NPF
{
- static public function fromBBCode(string $bbcode, int $uri_id): array
+ private static $heading_subtype = [];
+
+ /**
+ * Convert BBCode into NPF (Tumblr Neue Post Format)
+ *
+ * @param string $bbcode
+ * @param integer $uri_id
+ * @return array NPF
+ */
+ public static function fromBBCode(string $bbcode, int $uri_id): array
{
$bbcode = self::prepareBody($bbcode);
- $html = BBCode::convert($bbcode, false, BBCode::CONNECTORS);
+ $html = BBCode::convertForUriId($uri_id, $bbcode, BBCode::NPF);
if (empty($html)) {
return [];
}
$doc = new DOMDocument();
+
$doc->formatOutput = true;
if (!@$doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'))) {
return [];
}
- $element = $doc->getElementsByTagName('body')->item(0);
- echo $element->ownerDocument->saveHTML($element) . "\n";
+ self::setHeadingSubStyles($doc);
- $npf = [];
- $text = '';
- $formatting = [];
+ $element = $doc->getElementsByTagName('body')->item(0);
- self::routeChildren($element, $uri_id, true, [], $npf, $text, $formatting);
+ list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, true, []);
return self::addLinkBlockForUriId($uri_id, 0, $npf);
}
- static private function prepareBody(string $body): string
+ /**
+ * Fetch the heading types
+ *
+ * @param DOMDocument $doc
+ * @return void
+ */
+ private static function setHeadingSubStyles(DOMDocument $doc)
{
- $shared = BBCode::fetchShareAttributes($body);
- if (!empty($shared)) {
- $body = $shared['shared'];
+ self::$heading_subtype = [];
+ foreach (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] as $element) {
+ if ($doc->getElementsByTagName($element)->count() > 0) {
+ if (empty(self::$heading_subtype)) {
+ self::$heading_subtype[$element] = 'heading1';
+ } else {
+ self::$heading_subtype[$element] = 'heading2';
+ }
+ }
}
+ }
- $body = BBCode::removeAttachment($body);
+ /**
+ * Prepare the BBCode for the NPF conversion
+ *
+ * @param string $bbcode
+ * @return string
+ */
+ private static function prepareBody(string $bbcode): string
+ {
+ $shared = BBCode::fetchShareAttributes($bbcode);
+ if (!empty($shared)) {
+ $bbcode = $shared['shared'];
+ }
- $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
+ $bbcode = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $bbcode);
- if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
+ if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img=([^\[\]]*)\]([^\[\]]*)\[\/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) {
foreach ($pictures as $picture) {
if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
- $body = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $body);
+ $bbcode = str_replace($picture[0], "\n\n[img=" . str_replace('-1.', '-0.', $picture[2]) . "]" . $picture[3] . "[/img]\n\n", $bbcode);
}
}
}
- $body = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $body);
+ $bbcode = preg_replace("/\[img\=(.*?)\](.*?)\[\/img\]/ism", "\n\n[img=$1]$2[/img]\n\n", $bbcode);
- if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $body, $pictures, PREG_SET_ORDER)) {
+ if (preg_match_all("#\[url=([^\]]+?)\]\s*\[img\]([^\[]+?)\[/img\]\s*\[/url\]#ism", $bbcode, $pictures, PREG_SET_ORDER)) {
foreach ($pictures as $picture) {
if (preg_match('#/photo/.*-[01]\.#ism', $picture[2]) && (preg_match('#/photo/.*-0\.#ism', $picture[1]) || preg_match('#/photos/.*/image/#ism', $picture[1]))) {
- $body = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $body);
+ $bbcode = str_replace($picture[0], "\n\n[img]" . str_replace('-1.', '-0.', $picture[2]) . "[/img]\n\n", $bbcode);
}
}
}
- $body = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $body);
- $body = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", "\n\n[audio]$1[/audio]\n\n", $body);
- $body = preg_replace("/\[video\](.*?)\[\/video\]/ism", "\n\n[video]$1[/video]\n\n", $body);
+ $bbcode = preg_replace("/\[img\](.*?)\[\/img\]/ism", "\n\n[img]$1[/img]\n\n", $bbcode);
do {
- $oldbody = $body;
- $body = str_replace(["\n\n\n"], ["\n\n"], $body);
- } while ($oldbody != $body);
+ $oldbbcode = $bbcode;
+ $bbcode = str_replace(["\n\n\n"], ["\n\n"], $bbcode);
+ } while ($oldbbcode != $bbcode);
- return trim($body);
+ return trim($bbcode);
}
- static private function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array &$npf, string &$text, array &$formatting)
+ /**
+ * Walk recursively through the HTML
+ *
+ * @param DOMElement $element
+ * @param integer $uri_id
+ * @param boolean $parse_structure
+ * @param array $callstack
+ * @param array $npf
+ * @param string $text
+ * @param array $formatting
+ * @return array
+ */
+ private static function routeChildren(DOMElement $element, int $uri_id, bool $parse_structure, array $callstack, array $npf = [], string $text = '', array $formatting = []): array
{
if ($parse_structure && $text) {
- self::addBlock($text, $formatting, $npf, $callstack);
+ list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
}
$callstack[] = $element->nodeName;
- $level = self::getLevelByCallstack($callstack);
+ $level = self::getLevelByCallstack($callstack);
foreach ($element->childNodes as $child) {
switch ($child->nodeName) {
case 'b':
case 'strong':
- self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
+ list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'bold', $callstack, $npf, $text, $formatting);
break;
-
+
case 'i':
case 'em':
- self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
+ list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'italic', $callstack, $npf, $text, $formatting);
break;
-
+
case 's':
- self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
+ list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, 'strikethrough', $callstack, $npf, $text, $formatting);
break;
case 'u':
case 'span':
- self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
+ list($npf, $text, $formatting) = self::addFormatting($child, $uri_id, '', $callstack, $npf, $text, $formatting);
break;
case 'hr':
$text .= "\n";
}
break;
-
+
case '#text':
$text .= $child->textContent;
break;
break;
case 'a':
- if ($text) {
- self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
- } else {
- $npf = self::addLinkBlock($child, $uri_id, $level, $npf);
- }
+ list($npf, $text, $formatting) = self::addInlineLink($child, $uri_id, $callstack, $npf, $text, $formatting);
break;
case 'img':
$npf = self::addImageBlock($child, $uri_id, $level, $npf);
break;
- case 'ol':
- case 'div':
- case 'h1':
- case 'h2':
- case 'h3':
- case 'h4':
- case 'h5':
- case 'h6':
- case 'blockquote':
- case 'p':
- case 'pre':
- case 'code':
- case 'ul':
- case 'li':
- case 'details':
- self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
+ case 'audio':
+ case 'video':
+ $npf = self::addMediaBlock($child, $uri_id, $level, $npf);
break;
default:
- print_r($npf);
- print_r($callstack);
- die($child . "\n");
+ list($npf, $text, $formatting) = self::routeChildren($child, $uri_id, true, $callstack, $npf, $text, $formatting);
+ break;
}
}
if ($parse_structure && $text) {
- self::addBlock($text, $formatting, $npf, $callstack);
+ list($npf, $text, $formatting) = self::addBlock($text, $formatting, $npf, $callstack);
}
+ return [$npf, $text, $formatting];
}
- static private function getLevelByCallstack($callstack): int
+ /**
+ * Return the correct indent level
+ *
+ * @param array $callstack
+ * @return integer
+ */
+ private static function getLevelByCallstack(array $callstack): int
{
$level = 0;
foreach ($callstack as $entry) {
return max(0, $level - 1);
}
- static private function getSubTypeByCallstack($callstack): string
+ /**
+ * Detect the subtype via the HTML element callstack
+ *
+ * @param array $callstack
+ * @param string $text
+ * @return string
+ */
+ private static function getSubTypeByCallstack(array $callstack, string $text): string
{
$subtype = '';
foreach ($callstack as $entry) {
break;
case 'h1':
- $subtype = 'heading1';
+ $subtype = self::$heading_subtype[$entry];
break;
-
+
case 'h2':
- $subtype = 'heading1';
+ $subtype = self::$heading_subtype[$entry];
break;
-
+
case 'h3':
- $subtype = 'heading1';
+ $subtype = self::$heading_subtype[$entry];
break;
-
+
case 'h4':
- $subtype = 'heading2';
+ $subtype = self::$heading_subtype[$entry];
break;
-
+
case 'h5':
- $subtype = 'heading2';
+ $subtype = self::$heading_subtype[$entry];
break;
-
+
case 'h6':
- $subtype = 'heading2';
+ $subtype = self::$heading_subtype[$entry];
break;
-
+
case 'blockquote':
+ $subtype = mb_strlen($text) < 100 ? 'quote' : 'indented';
+ break;
+
case 'pre':
- case 'code':
$subtype = 'indented';
break;
+
+ case 'code':
+ $subtype = 'chat';
+ break;
}
}
return $subtype;
}
- static private function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array &$npf, string &$text, array &$formatting)
+ /**
+ * Add formatting for a text block
+ *
+ * @param DOMElement $element
+ * @param integer $uri_id
+ * @param string $type
+ * @param array $callstack
+ * @param array $npf
+ * @param string $text
+ * @param array $formatting
+ * @return array
+ */
+ private static function addFormatting(DOMElement $element, int $uri_id, string $type, array $callstack, array $npf, string $text, array $formatting): array
{
$start = mb_strlen($text);
- self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
+
+ list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
if (!empty($type)) {
$formatting[] = [
'type' => $type
];
}
+ return [$npf, $text, $formatting];
}
- static private function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array &$npf, string &$text, array &$formatting)
+ /**
+ * Add an inline link for a text block
+ *
+ * @param DOMElement $element
+ * @param integer $uri_id
+ * @param array $callstack
+ * @param array $npf
+ * @param string $text
+ * @param array $formatting
+ * @return array
+ */
+ private static function addInlineLink(DOMElement $element, int $uri_id, array $callstack, array $npf, string $text, array $formatting): array
{
$start = mb_strlen($text);
- self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
+
+ list($npf, $text, $formatting) = self::routeChildren($element, $uri_id, false, $callstack, $npf, $text, $formatting);
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
'url' => $attributes['href']
];
}
+ return [$npf, $text, $formatting];
}
- static private function addBlock(string &$text, array &$formatting, array &$npf, array $callstack)
+ /**
+ * Add a text block
+ *
+ * @param string $text
+ * @param array $formatting
+ * @param array $npf
+ * @param array $callstack
+ * @return array
+ */
+ private static function addBlock(string $text, array $formatting, array $npf, array $callstack): array
{
$block = [
- 'callstack' => $callstack,
- 'type' => 'text',
- 'text' => $text,
+ 'type' => 'text',
+ 'subtype' => '',
+ 'text' => $text,
];
if (!empty($formatting)) {
$block['indent_level'] = $level;
}
- $subtype = self::getSubTypeByCallstack($callstack);
+ $subtype = self::getSubTypeByCallstack($callstack, $text);
if ($subtype) {
$block['subtype'] = $subtype;
+ } else {
+ unset($block['subtype']);
}
$npf[] = $block;
- $text = '';
- $formatting = [];
+ return [$npf, '', []];
}
- static private function addPoster(array $media, array $block): array
+ /**
+ * Add a block for a preview picture
+ *
+ * @param array $media
+ * @param array $block
+ * @return array
+ */
+ private static function addPoster(array $media, array $block): array
{
$poster = [];
if (!empty($media['preview'])) {
$poster['height'] = $media['preview-height'];
}
if (!empty($poster)) {
- $block['poster'] = $poster;
+ $block['poster'] = [$poster];
}
return $block;
}
- static private function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
+ /**
+ * Add a link block from the HTML attachment of a given post uri-id
+ *
+ * @param integer $uri_id
+ * @param integer $level
+ * @param array $npf
+ * @return array
+ */
+ private static function addLinkBlockForUriId(int $uri_id, int $level, array $npf): array
{
foreach (Post\Media::getByURIId($uri_id, [Post\Media::HTML]) as $link) {
$host = parse_url($link['url'], PHP_URL_HOST);
return $npf;
}
- static private function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
+ /**
+ * Add an image block
+ *
+ * @param DOMElement $element
+ * @param integer $uri_id
+ * @param integer $level
+ * @param array $npf
+ * @return array
+ */
+ private static function addImageBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
return $npf;
}
- static private function addLinkBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
+ /**
+ * Add an audio or video block
+ *
+ * @param DOMElement $element
+ * @param integer $uri_id
+ * @param integer $level
+ * @param array $npf
+ * @return array
+ */
+ private static function addMediaBlock(DOMElement $element, int $uri_id, int $level, array $npf): array
{
$attributes = [];
foreach ($element->attributes as $key => $attribute) {
$attributes[$key] = trim($attribute->value);
}
- if (empty($attributes['href'])) {
+ if (empty($attributes['src'])) {
return $npf;
}
- $media = Post\Media::getByURL($uri_id, $attributes['href'], [Post\Media::AUDIO, Post\Media::VIDEO]);
+ $media = Post\Media::getByURL($uri_id, $attributes['src'], [Post\Media::AUDIO, Post\Media::VIDEO]);
if (!empty($media)) {
switch ($media['type']) {
case Post\Media::AUDIO:
$block = [
- 'type' => 'audio',
+ 'type' => 'audio',
'media' => [
'type' => $media['mimetype'],
'url' => $media['url'],
case Post\Media::VIDEO:
$block = [
- 'type' => 'video',
+ 'type' => 'video',
'media' => [
'type' => $media['mimetype'],
'url' => $media['url'],
}
} else {
$block = [
- 'type' => 'text',
- 'text' => $element->textContent,
+ 'type' => 'text',
+ 'text' => $element->textContent,
'formatting' => [
- 'start' => 0,
- 'end' => strlen($element->textContent),
- 'type' => 'link',
- 'url' => $attributes['href']
+ [
+ 'start' => 0,
+ 'end' => mb_strlen($element->textContent),
+ 'type' => 'link',
+ 'url' => $attributes['src']
+ ]
]
];
}