Я новичок в python,
, пытаюсь извлечь и заменить регулярным выражением подстроки в файле.
мой шаблон:
(u «^ ([Az \ xc3 \ xa1 \ xc3 \ xA9 \ xc3 \ XAD \ xc3 \ xb3 \ xc3 \ Xba \ xc3 \ xa0 \ xc3 \ XA3 \ xc3 \ xa2 \ xc3 \ XB5 \ xc3 \ xa2 \ xc3 \ хаа \ xc3 \ xb4]. *?) <', u' $ 1 <') </p>
мой код:
# encoding: utf-8
# encoding: iso-8859-1
# encoding: win-1252
#coding: utf-8
import json
import sys
import codecs
import re
from datetime import datetime
#import os
#import fileinput
def myReplace(filename, replacePattern):
contador = 0
begin = datetime.now()
with codecs.open(replacePattern, 'r', encoding='latin-1') as f:
replacePattern_json = json.load(f)
with codecs.open(filename, 'r', encoding='latin-1') as f:
filedata = f.read()
for pattern in replacePattern_json['padrao']:
print(pattern['buscar'] , pattern['substituirPor'])
print ('este padrão acima sera compilado agora')
buscarPor = re.compile(pattern['buscar'], re.X)
substituirPor = re.compile(pattern['substituirPor'])
filedata = re.sub(buscarPor , pattern['substituirPor'], filedata)
contador +=1
newfilename = codecs.open ("new_"+ filename.split('.')[0] + ".xml","w+", encoding='utf-8')
newfilename.write(filedata)
newfilename.close()
end = datetime.now()
timeelapsed = end -begin
print(timeelapsed.seconds)
if __name__== "__main__":
myReplace(sys.argv[1], sys.argv[2])
в моем оригинале json :
{
"buscar":"^([a-Záéíóúàãâõâêô].*?)<",
"substituirPor":"<denominacao>$1</denominacao><"
}
ошибка: :
Traceback (most recent call last):
File "pageConverter.py", line 40, in <module>
myReplace(sys.argv[1], sys.argv[2])
File "pageConverter.py", line 28, in myReplace
buscarPor = re.compile(pattern['buscar'], re.X)
File "/usr/lib/python2.7/re.py", line 194, in compile
return _compile(pattern, flags)
File "/usr/lib/python2.7/re.py", line 251, in _compile
raise error, v # invalid expression
sre_constants.error: bad character range
Python версия : Python 2.7.17
Пожалуйста, кто-нибудь может мне помочь?