Я написал простой класс, который не использует рекурсию и должен быть быстрее / потреблять меньше памяти, но в основном такая же примитивная идея, как у @Hrant Khachatrian's (перебирать все элементы и искать дочерние теги):
class DomScParser {
public static function find(DOMNode &$parent_node, $tag_name) {
//Check if we already got self-contained node
if (!$parent_node->childNodes->length) {
if ($parent_node->nodeName == $tag_name) {
return $parent_node;
}
}
//Initialize path array
$dom_path = array($parent_node->firstChild);
//Initialize found nodes array
$found_dom_arr = array();
//Iterate while we have elements in path
while ($dom_path_size = count($dom_path)) {
//Get last elemant in path
$current_node = end($dom_path);
//If it is an empty element - nothing to do here,
//we should step back in our path.
if (!$current_node) {
array_pop($dom_path);
continue;
}
if ($current_node->firstChild) {
//If node has children - add it first child to end of path.
//As we are looking for self-contained nodes without children,
//this node is not what we are looking for - change corresponding
//path elament to his sibling.
$dom_path[] = $current_node->firstChild;
$dom_path[$dom_path_size - 1] = $current_node->nextSibling;
} else {
//Check if we found correct node, if not - change corresponding
//path elament to his sibling.
if ($current_node->nodeName == $tag_name) {
$found_dom_arr[] = $current_node;
}
$dom_path[$dom_path_size - 1] = $current_node->nextSibling;
}
}
return $found_dom_arr;
}
public static function replace(DOMNode &$parent_node, $search_tag_name, $replace_tag) {
//Check if we got Node to replace found node or just some text.
if (!$replace_tag instanceof DOMNode) {
//Get DomDocument object
if ($parent_node instanceof DOMDocument) {
$dom = $parent_node;
} else {
$dom = $parent_node->ownerDocument;
}
$replace_tag=$dom->createTextNode($replace_tag);
}
$found_tags = self::find($parent_node, $search_tag_name);
foreach ($found_tags AS &$found_tag) {
$found_tag->parentNode->replaceChild($replace_tag->cloneNode(),$found_tag);
}
}
}
$D = new DOMDocument;
$D->loadHTML('<span>test1<br />test2</span>');
DomScParser::replace($D, 'br', "\n");
PS Также он не должен разбиваться на несколько вложенных тегов, так как не использует рекурсию.Пример HTML:
$html=str_repeat('<b>',100).'<br />'.str_repeat('</b>',100);