Я пытаюсь написать алгоритм для нормализации дочерних элементов тега «p»
<p><i>this text</i> <i>is italic</i></p>
с ожидаемым результатом
<p><i>this text is italic</i></p>
Написанный мной алгоритм:
def add_style_to_spaces_between_tags(card_text)
doc = Nokogiri.HTML(card_text)
add_char = ""
# Iterate on all nodes within p tags
doc.search('p').children.each do |child|
if add_char != ""
first_text_node = find_first_text_node(child)
if first_text_node.present?
first_text_node.content = add_char + first_text_node.content
add_char = ""
end
next
end
#if the node is a space or a nbsp
if child.text == " " || child.text == 160.chr(Encoding::UTF_8)
prev_node = child.previous.name
next_node = child.next.name
# Only if the the surrounding nodes to space/nbsp are matching and the previous is not a br tag
# then set the add_char flag to move the space/nbsp to the next node
if prev_node != 'br' && prev_node == next_node
case(child.text)
when " "
add_char = child.content
when 160.chr(Encoding::UTF_8)
add_char = 160.chr(Encoding::UTF_8)
end
child.remove
end
end
end
doc.search('p').each do |p_node|
doc = merge_text_nodes(p_node)
end
card_text = doc.inner_html
card_text
end
def find_first_text_node(node)
result = nil
node.search('.//text()').each { |t|
if t.content.length > 0
result = t
break
end
}
result
end
def merge_text_nodes(node)
puts 'text node'
puts node
prev_is_text = false
newnodes = []
node.children.each do |element|
if element.text?
if prev_is_text
newnodes[-1].content += element.text
else
newnodes << element
end
element.remove
prev_is_text = true
else
newnodes << merge_text_nodes(element)
element.remove
prev_is_text = false
end
end
node.children.remove
newnodes.each do |item|
node.add_child(item)
end
return node
end
но результат:
<p><i>this text</i><i> is italic</i></p>
Как я могу получить желаемый результат?
Набор модульных тестов Rspe c:
describe "Italicize spaces between italics" do
it "production case" do
node = '<p><span tts:fontStyle="italic">der</span> <span tts:fontStyle="italic">Tri-Circle-D Ranch begrüßt:</span></p>'
expect(helper.add_style_to_spaces_between_tags(node)).
to eq('<p><span tts:fontstyle="italic">der Tri-Circle-D Ranch begrüßt:</span></p>')
end
it "production case with nbsp" do
node = '<p><span tts:fontStyle="italic">der</span> <span tts:fontStyle="italic">Tri-Circle-D Ranch begrüßt:</span></p>'
expect(helper.add_style_to_spaces_between_tags(node)).
to eq("<p><span tts:fontstyle=\"italic\">der\u00A0Tri-Circle-D Ranch begrüßt:</span></p>")
end
it "space between 2 i tags" do
node = '<p><i>this text</i> <i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq('<p><i>this text is italic</i></p>')
end
it "non breaking space between 2 i tags" do
node = '<p><i>this text</i> <i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq("<p><i>this text\u00A0is italic</i></p>")
end
it "No space between 2 i tags" do
node = '<p><i>this text</i><i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq('<p><i>this textis italic</i></p>')
end
it "space between 2 i tags with br included before space" do
node = '<p><i>this text</i><br> <i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq('<p><i>this text</i><br> <i>is italic</i></p>')
end
it "non breaking space between 2 i tags with br included before nbsp" do
node = '<p><i>this text</i><br> <i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq("<p><i>this text</i><br>\u00A0<i>is italic</i></p>")
end
it "space between 2 i tags with br included after space" do
node = '<p><i>this text</i> <br><i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq('<p><i>this text</i> <br><i>is italic</i></p>')
end
it "non breaking space between 2 i tags with br included after nbsp" do
node = '<p><i>this text</i> <br><i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq("<p><i>this text</i>\u00A0<br><i>is italic</i></p>")
end
it "space between 2 i tags with br included before and after space" do
node = '<p><i>this text</i> <br> <i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq('<p><i>this text</i> <br> <i>is italic</i></p>')
end
it "non breaking space between 2 i tags with br included before and after nbsp" do
node = '<p><i>this text</i> <br> <i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq("<p><i>this text</i>\u00A0<br>\u00A0<i>is italic</i></p>")
end
it "space between 2 i tags with a nested u tag in the first i tag" do
node = '<p><i><u>this text</u></i> <i>is italic</i></p>'
expect(helper.add_style_to_spaces_between_tags(node)).to eq('<p><i><u>this text</u> is italic</i></p>')
end
end