Я не думаю, что это можно сделать, если многострочные куски текста могли быть удалены.Однако если вы контролируете процесс тегирования, вы можете включить в тег оригинальный номер строки:
{ foo:12 }
, а затем восстановить его - тривиально
original = int(re.search(r'\d+', tag).group(0))
Эта измененная версия вашего кода:
import re
def annotate_tags(content: str) -> str:
"""Annotate tags with line numbers."""
tag_pattern = re.compile(r'(\{(?P<tag_value>[^}]+)\})')
lines = content.splitlines(True)
annotated_lines = []
for idx, line in enumerate(lines, 1):
annotated_lines.append(tag_pattern.sub(r'{\g<tag_value>:%s}' % idx, line))
annotated = ''.join(annotated_lines)
return annotated
def modify(content: str) -> str:
supported_tags = {
re.compile(r'(\{tag:\d+\})'): r'',
re.compile(r'(\{Tagish:\d+\})'): r''
}
for pattern, replace in supported_tags.items():
matches = pattern.findall(content)
if matches:
content = pattern.sub(replace, content)
return content
def highlight_nonmodified(content: str) -> str:
regex = re.compile(r'(?s)(\{.*?[^\}]+\})', re.I | re.S)
replace = r'#\1'
content = regex.sub(replace, content)
return content
def get_line(string_t: str, original: str) -> int:
tag_pattern = re.compile(r'(\{[^}]+:(?P<line_no>\d+)\})')
match = tag_pattern.search(string_t)
if match:
return match.group('line_no')
return -1
def highlight_merge(original: str, modified: str) -> str:
tag_regex = re.compile(r'#(?s)(\{.*?[^\}]+\})', re.I | re.S)
for line in modified.splitlines(True):
if tag_regex.search(line):
numer = get_line(line, original)
error = "#Tag not supported at line{0}\n".format(numer)
error = error + line
modified = modified.replace(line, error)
return modified
if __name__ == '__main__':
file = 'textfile.txt'
raw = ""
with open(file, 'rt', encoding='utf-8') as f:
for i, s in enumerate(f, 1):
raw += "{}. {}".format(i, s)
original = modified = raw
modified = annotate_tags(modified)
modified = modify(modified)
modified = highlight_nonmodified(modified)
modified = highlight_merge(original, modified)
with open("modified.txt", 'w', encoding='utf-8') as f:
f.write(modified)
Генерирует этот вывод:
1. Here goes some text. A wonderful day. It's soon cristmas.
#Tag not supported at line2
2. Happy 2019, soon. #{Some useful tag!:2} Something else goes here.
3. Happy ending. Yeppe! See you.
4.
#Tag not supported at line5
5. #{begin:5}
6. Happy KKK!
7. Happy B-Day!
#Tag not supported at line8
8. #{end:8}
9.
10. Universe is cool!
11.
12. .
13.
#Tag not supported at line14
14. #{Slugish:14}. Here goes another line. #{Slugish:14} since this is a new sentence.
15.
16. endline.