попробуйте этот код (python 3.x):
notags=('img','br','hr')
def substring2(html,size):
if len(html) <= size:
return html
result,tag,count='','',0
tags=[]
for c in html:
result += c
if c == '<':
intag=True
elif c=='>':
intag=False
tag=tag.split()[0]
if tag[0] == '/':
tag = tag.replace('/','')
if tag not in notags:
tags.pop()
else:
if tag[-1] != '/' and tag not in notags:
tags.append(tag)
tag=''
else:
if intag:
tag += c
else:
count+=1
if count>=size: break
while len(tags)>0:
result += '</{0}>'.format(tags.pop())
return result
s='<div class="main">html <code>substring</code> function written by <span>imxylz</span>, using <a href="http://www.python.org">python</a> language</div>'
print(s)
for size in (30,40,55):
print(substring2(s,size))
выход
<div class="main">html <code>substring</code> function written by <span>imxylz</span>, using <a href="http://www.python.org">python</a> language</div>
<div class="main">html <code>substring</code> function writte</div>
<div class="main">html <code>substring</code> function written by <span>imxyl</span></div>
<div class="main">html <code>substring</code> function written by <span>imxylz</span>, using <a href="http://www.python.org">python</a></div>
1012 * более *
См. Код в github .
Другой вопрос .