Ух ты, я попробовал, не ожидая большой разницы между регулярным выражением и методом без регулярных выражений, и черт, я был удивлен!
Вот моя версия функции без регулярного выражения:
def firm_parserV2(line):
pattern_found = False
matches = []
lineCpy = line
while 1:
idx = lineCpy.find('P')
if idx < 0: break
lineCpy = lineCpy[idx+1:]
if not lineCpy[0].isnumeric():
continue
matches.append([
lineCpy[1:3], # The 2 letters
int(lineCpy[0]) # The number
])
for letters, number in matches:
if letters == 'RV':
message = "Request cycle counter"
elif letters == 'RJ':
message = "Request adjustment date and status"
elif letters == 'RM':
message = "Request pipetting channel temperature"
print(message, line)
Я сравнил времена для маленькой строки (P4RJasd
), и вот результаты:
+------------------------------+------------------------+
| Function | Time |
+------------------------------+------------------------+
| Original | .003547472953796386 ms |
+------------------------------+------------------------+
| Original with rx compilation | .002606389522552490 ms |
| outside the function | |
+------------------------------+------------------------+
| New version | .000612576007843017 ms |
+------------------------------+------------------------+
Вот полный код, который я использовал для сравнения 3 функций :
import re
import re
import time
import random
def firm_parser(line):
RgxFrm_PX={
"CycleCount":{
"prtn":re.search(re.compile(r"P\d{1}RV"),line),
"desc":"Request cycle counter",
"deet":""},
"LastAdjustDate":{
"prtn":re.search(re.compile(r"P\d{1}RJ"),line),
"desc":"Request adjustment date and status",
"deet":""},
"ChanTemp":{
"prtn":re.search(re.compile(r"P\d{1}RM"),line),
"desc":"Request pipetting channel temperature",
"deet":""}
}
for key,value in RgxFrm_PX.items():
if value["prtn"]:
pass
# print(f"{value['desc']} {line}")
rx_rv = re.compile(r"P\dRV")
rx_rj = re.compile(r"P\dRJ")
rx_rm = re.compile(r"P\dRM")
def firm_parser_no_rx(line):
RgxFrm_PX={
"CycleCount":{
"prtn":re.search(rx_rv, line),
"desc":"Request cycle counter",
"deet":""},
"LastAdjustDate":{
"prtn":re.search(rx_rj, line),
"desc":"Request adjustment date and status",
"deet":""},
"ChanTemp":{
"prtn":re.search(rx_rm, line),
"desc":"Request pipetting channel temperature",
"deet":""}
}
for key,value in RgxFrm_PX.items():
if value["prtn"]:
pass
# print(f"{value['desc']} {line}")
def firm_parserV2(line):
pattern_found = False
matches = []
lineCpy = line
while 1:
idx = lineCpy.find('P')
if idx < 0: break
lineCpy = lineCpy[idx+1:]
if not lineCpy[0].isnumeric():
continue
matches.append([
lineCpy[1:3], # The 2 letters
int(lineCpy[0]) # The number
])
for letters, number in matches:
if letters == 'RV':
message = "Request cycle counter"
elif letters == 'RJ':
message = "Request adjustment date and status"
elif letters == 'RM':
message = "Request pipetting channel temperature"
# print(message, line)
loop_nb = 100000
test_string = 'P4RJasd'
funcs = [
firm_parser,
firm_parser_no_rx,
firm_parserV2
]
times = {}
for func in funcs: times[func.__name__] = 0
for i in range(loop_nb):
# Scrambling the array...
funcs = sorted(funcs, key = lambda x: random.random() )
for func in funcs:
start = time.time()
func(test_string)
end = time.time()
times[func.__name__] += (end - start)
for func, time in times.items():
print(func + '\t', time / loop_nb)