+
+
+ function fixListsIn(DOMNode $body) {
+ $toFix = array();
+
+ foreach ($body->childNodes as $node) {
+ if ($node->nodeType == XML_ELEMENT_NODE) {
+ $el = strtolower($node->nodeName);
+ if ($el == 'dl') {
+ $toFix[] = $node;
+ }
+ }
+ }
+
+ foreach ($toFix as $node) {
+ $this->fixList($node);
+ }
+ }
+
+ function fixList(DOMNode $list) {
+ $toFix = array();
+
+ foreach ($list->childNodes as $node) {
+ if ($node->nodeType == XML_ELEMENT_NODE) {
+ $el = strtolower($node->nodeName);
+ if ($el == 'dt' || $el == 'dd') {
+ $toFix[] = $node;
+ }
+ if ($el == 'dl') {
+ // Sublist.
+ // Technically, these can only appear inside a <dd>...
+ $this->fixList($node);
+ }
+ }
+ }
+
+ foreach ($toFix as $node) {
+ $this->fixListItem($node);
+ }
+ }
+
+ function fixListItem(DOMNode $item) {
+ // The HTML parser in libxml2 doesn't seem to properly handle
+ // many cases of implied close tags, apparently because it doesn't
+ // understand the nesting rules specified in the HTML DTD.
+ //
+ // This leads to sequences of adjacent <dt>s or <dd>s being incorrectly
+ // interpreted as parent->child trees instead of siblings:
+ //
+ // When parsing this input: "<dt>aaa <dt>bbb"
+ // should be equivalent to: "<dt>aaa </dt><dt>bbb</dt>"
+ // but we're seeing instead: "<dt>aaa <dt>bbb</dt></dt>"
+ //
+ // It does at least know that going from dt to dd, or dd to dt,
+ // should make a break.
+
+ $toMove = array();
+
+ foreach ($item->childNodes as $node) {
+ if ($node->nodeType == XML_ELEMENT_NODE) {
+ $el = strtolower($node->nodeName);
+ if ($el == 'dt' || $el == 'dd') {
+ // dt & dd cannot contain each other;
+ // This node was incorrectly placed; move it up a level!
+ $toMove[] = $node;
+ }
+ if ($el == 'dl') {
+ // Sublist.
+ // Technically, these can only appear inside a <dd>.
+ $this->fixList($node);
+ }
+ }
+ }
+
+ $parent = $item->parentNode;
+ $next = $item->nextSibling;
+ foreach ($toMove as $node) {
+ $item->removeChild($node);
+ $parent->insertBefore($node, $next);
+ $this->fixListItem($node);
+ }
+ }