Это мой ответ (написан без доступа к вашему классу, поэтому интерфейс немного отличается, но я прилагаю его как есть, чтобы вы могли проверить, достаточно ли он быстр):
======================= file graph_array.py ======================= ===
import collections
import numpy
def find_subtree(pids, subtree_id):
N = len(pids)
assert 1 <= subtree_id <= N
subtreeids = numpy.zeros(pids.shape, dtype=bool)
todo = collections.deque([subtree_id])
iter = 0
while todo:
id = todo.popleft()
assert 1 <= id <= N
subtreeids[id - 1] = True
sons = (pids == id).nonzero()[0] + 1
#print 'id={0} sons={1} todo={2}'.format(id, sons, todo)
todo.extend(sons)
iter = iter+1
if iter>N:
raise ValueError()
return subtreeids
======================= файл graph_array_test.py ==================== ======
import numpy
from graph_array import find_subtree
def _random_graph(n, maxsons):
import random
pids = numpy.zeros(n, dtype=int)
sons = numpy.zeros(n, dtype=int)
available = []
for id in xrange(1, n+1):
if available:
pid = random.choice(available)
sons[pid - 1] += 1
if sons[pid - 1] == maxsons:
available.remove(pid)
else:
pid = -1
pids[id - 1] = pid
available.append(id)
assert sons.max() <= maxsons
return pids
def verify_subtree(pids, subtree_id, subtree):
ids = set(subtree.nonzero()[0] + 1)
sons = set(ids) - set([subtree_id])
fathers = set(pids[id - 1] for id in sons)
leafs = set(id for id in ids if not (pids == id).any())
rest = set(xrange(1, pids.size+1)) - fathers - leafs
assert fathers & leafs == set()
assert fathers | leafs == ids
assert ids & rest == set()
def test_linear_graph_gen(n, genfunc, maxsons):
assert maxsons == 1
pids = genfunc(n, maxsons)
last = -1
seen = set()
for _ in xrange(pids.size):
id = int((pids == last).nonzero()[0]) + 1
assert id not in seen
seen.add(id)
last = id
assert seen == set(xrange(1, pids.size + 1))
def test_case1():
"""
1
/ \
2 4
/
3
"""
pids = numpy.array([-1, 1, 2, 1])
subtrees = {1: [True, True, True, True],
2: [False, True, True, False],
3: [False, False, True, False],
4: [False, False, False, True]}
for id in xrange(1, 5):
sub = find_subtree(pids, id)
assert (sub == numpy.array(subtrees[id])).all()
verify_subtree(pids, id, sub)
def test_random(n, genfunc, maxsons):
pids = genfunc(n, maxsons)
for subtree_id in numpy.arange(1, n+1):
subtree = find_subtree(pids, subtree_id)
verify_subtree(pids, subtree_id, subtree)
def test_timing(n, genfunc, maxsons):
import time
pids = genfunc(n, maxsons)
t = time.time()
for subtree_id in numpy.arange(1, n+1):
subtree = find_subtree(pids, subtree_id)
t = time.time() - t
print 't={0}s = {1:.2}ms/subtree = {2:.5}ms/subtree/node '.format(
t, t / n * 1000, t / n**2 * 1000),
def pytest_generate_tests(metafunc):
if 'case' in metafunc.function.__name__:
return
ns = [1, 2, 3, 4, 5, 10, 20, 50, 100, 1000]
if 'timing' in metafunc.function.__name__:
ns += [10000, 100000, 1000000]
pass
for n in ns:
func = _random_graph
for maxsons in sorted(set([1, 2, 3, 4, 5, 10, (n+1)//2, n])):
metafunc.addcall(
funcargs=dict(n=n, genfunc=func, maxsons=maxsons),
id='n={0} {1.__name__}/{2}'.format(n, func, maxsons))
if 'linear' in metafunc.function.__name__:
break
=================== py.test --tb = short -v -s test_graph_array.py ============
...
test_graph_array.py:72: test_timing[n=1000 _random_graph/1] t=13.4850590229s = 13.0ms/subtree = 0.013485ms/subtree/node PASS
test_graph_array.py:72: test_timing[n=1000 _random_graph/2] t=0.318281888962s = 0.32ms/subtree = 0.00031828ms/subtree/node PASS
test_graph_array.py:72: test_timing[n=1000 _random_graph/3] t=0.265519142151s = 0.27ms/subtree = 0.00026552ms/subtree/node PASS
test_graph_array.py:72: test_timing[n=1000 _random_graph/4] t=0.24147105217s = 0.24ms/subtree = 0.00024147ms/subtree/node PASS
test_graph_array.py:72: test_timing[n=1000 _random_graph/5] t=0.211434841156s = 0.21ms/subtree = 0.00021143ms/subtree/node PASS
test_graph_array.py:72: test_timing[n=1000 _random_graph/10] t=0.178458213806s = 0.18ms/subtree = 0.00017846ms/subtree/node PASS
test_graph_array.py:72: test_timing[n=1000 _random_graph/500] t=0.209936141968s = 0.21ms/subtree = 0.00020994ms/subtree/node PASS
test_graph_array.py:72: test_timing[n=1000 _random_graph/1000] t=0.245707988739s = 0.25ms/subtree = 0.00024571ms/subtree/node PASS
...
Здесь берется каждое поддерево каждого дерева, и интересным значением является среднее время для извлечения дерева: ~ 0,2 мс на поддерево, за исключением строго линейных деревьев. Я не уверен, что здесь происходит.