Вопрос новичка:
Почему клон используется ниже? Кроме того - поскольку цикл равен for _ in range(n_trees)
, как нам перейти к следующему случайному лесу при вызове метода клонирования?
clone(grid_search_cv.best_estimator_)
Ссылка: См. Ответ на # 7 в Решениях для упражнений по https://github.com/ageron/handson-ml/blob/master/06_decision_trees.ipynb
Полный код приведен ниже:
from sklearn.model_selection import GridSearchCV
params = {'max_leaf_nodes': list(range(2, 100)), 'min_samples_split': [2, 3, 4]}
grid_search_cv = GridSearchCV(DecisionTreeClassifier(random_state=42), params, n_jobs=-1, verbose=1, cv=3)
grid_search_cv.fit(X_train, y_train)
from sklearn.model_selection import ShuffleSplit
n_trees = 1000
n_instances = 100
mini_sets = []
rs = ShuffleSplit(n_splits=n_trees, test_size=len(X_train) - n_instances, random_state=42)
for mini_train_index, mini_test_index in rs.split(X_train):
X_mini_train = X_train[mini_train_index]
y_mini_train = y_train[mini_train_index]
mini_sets.append((X_mini_train, y_mini_train))
from sklearn.base import clone
**forest = [clone(grid_search_cv.best_estimator_) for _ in range(n_trees)]**
accuracy_scores = []
for tree, (X_mini_train, y_mini_train) in zip(forest, mini_sets):
tree.fit(X_mini_train, y_mini_train)
y_pred = tree.predict(X_test)
accuracy_scores.append(accuracy_score(y_test, y_pred))
np.mean(accuracy_scores)