Я пытаюсь использовать несколько ядер с DBSCAN от sklearn, но время выполнения, похоже, не меняется при изменении n_jobs = -1
(используйте все процессоры для запуска параллельных заданий, как предлагается в документации) , Чего мне не хватает?
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from time import time
from sklearn.cluster import DBSCAN
# generate a symmetric distance matrix
num_training_examples = 10000
num_features = 10
X = np.random.randint(5, size=(num_training_examples, num_features))
D = euclidean_distances(X,X)
# DBSCAN parameters
eps = 0.25
kmedian_thresh = 0.005
min_samples = 5
# case 1: omit n_jobs arg from DBSCAN
start = time()
db = DBSCAN(eps=eps,
min_samples = min_samples,
metric='precomputed').fit(D)
end = time()
total_time = end - start
print('DBSCAN took {} seconds for {} training examples without n_jobs arg'\
.format(total_time,num_training_examples))
# case 2: add n_jobs arg to DBSCAN
n_jobs = -1
start = time()
db = DBSCAN(eps=eps,
min_samples = min_samples,
metric='precomputed',
n_jobs=n_jobs).fit(D)
end = time()
total_time = end - start
print('DBSCAN took {} seconds for {} training examples with n_jobs arg'\
.format(total_time,num_training_examples,n_jobs))
DBSCAN took 0.710000038147 seconds for 10000 training examples without n_jobs arg
DBSCAN took 0.707999944687 seconds for 10000 training examples with n_jobs = -1