выборочная проверка мл моделей. Как мне соответствовать всем моделям я использую цикл - PullRequest
0 голосов
/ 27 марта 2020

Я выборочно проверяю кучу регрессионных моделей. Как подгонять несколько мл моделей, могу ли я использовать для l oop и делать model.fit

#Variables 
alpha= [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
#function
def get_models(model=dict()):
    model['lr'] = LinearRegression()

    for values in alpha:
        model["Lasso"]=Lasso(alpha=values)
        model["Ridge"]=Ridge(alpha=values)

    model["Huber"]=HuberRegressor()
    model["Lars"]=Lars()
    model["Lasso_l"]=LassoLars()
    model["PA"]=PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
    model["RANSAC"]=RANSACRegressor()
    model["SGD"]=SGDRegressor(max_iter=1000, tol=1e-3) 
    model["theil"]=TheilSenRegressor()
    model["cart"] = DecisionTreeRegressor()
    model["extra"] = ExtraTreeRegressor()
    model["svml"] = SVR(kernel='linear')
    model["svmp"] = SVR(kernel='poly')
    #Loaded data and have X and y 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
#fitting models

Ответы [ 2 ]

0 голосов
/ 28 марта 2020

Вы можете легко l oop через несколько моделей Scikit Learn, а также выполнить все настройки. Попробуйте приведенный ниже пример кода и возьмите ссылки внизу моего поста.

import warnings
warnings.filterwarnings('ignore')
import numpy as np
from sklearn import datasets
from sklearn.linear_model import SGDClassifier, LogisticRegression, \
    Perceptron, PassiveAggressiveClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid, RadiusNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, DotProduct, Matern, StationaryKernelMixin, WhiteKernel
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

from utilities import *
from universal_params import *


def gen_classification_data(n=None):
    """
    uses the iris data
    :return: x, y
    """

    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    if n:
        half = int(n/2)
        np.concatenate((x[:half], x[-half:]), 1), np.concatenate((y[:half], y[-half:]), 0)

    return x, y

linear_models_n_params = [
    (SGDClassifier,
     {'loss': ['hinge', 'log', 'modified_huber', 'squared_hinge'],
      'alpha': [0.0001, 0.001, 0.1],
      **penalty_12none
      }),

    (LogisticRegression,
     {**penalty_12, **max_iter, **tol, ** warm_start, **C,
      'solver': ['liblinear']
      }),

    (Perceptron,
     {**penalty_all, **alpha, **n_iter, **eta0, **warm_start
      }),

    (PassiveAggressiveClassifier,
     {**C, **n_iter, **warm_start,
      'loss': ['hinge', 'squared_hinge'],
      })
]

linear_models_n_params_small = linear_models_n_params

svm_models_n_params = [
    (SVC,
     {**C, **kernel, **degree, **gamma, **coef0, **shrinking, **tol, **max_iter_inf2}),

    (NuSVC,
     {**nu, **kernel, **degree, **gamma, **coef0, **shrinking, **tol
      }),

    (LinearSVC,
     { **C, **penalty_12, **tol, **max_iter,
       'loss': ['hinge', 'squared_hinge'],
       })
]

svm_models_n_params_small = [
    (SVC,
     {**kernel, **degree, **shrinking
      }),

    (NuSVC,
     {**nu_small, **kernel, **degree, **shrinking
      }),

    (LinearSVC,
     { **C_small,
       'penalty': ['l2'],
       'loss': ['hinge', 'squared_hinge'],
       })
]

neighbor_models_n_params = [

    (KMeans,
     {'algorithm': ['auto', 'full', 'elkan'],
      'init': ['k-means++', 'random']}),

    (KNeighborsClassifier,
     {**n_neighbors, **neighbor_algo, **neighbor_leaf_size, **neighbor_metric,
      'weights': ['uniform', 'distance'],
      'p': [1, 2]
      }),

    (NearestCentroid,
     {**neighbor_metric,
      'shrink_threshold': [1e-3, 1e-2, 0.1, 0.5, 0.9, 2]
      }),

    (RadiusNeighborsClassifier,
     {**neighbor_radius, **neighbor_algo, **neighbor_leaf_size, **neighbor_metric,
      'weights': ['uniform', 'distance'],
      'p': [1, 2],
      'outlier_label': [-1]
      })
]

gaussianprocess_models_n_params = [
    (GaussianProcessClassifier,
     {**warm_start,
      'kernel': [RBF(), ConstantKernel(), DotProduct(), WhiteKernel()],
      'max_iter_predict': [500],
      'n_restarts_optimizer': [3],
      })
]

bayes_models_n_params = [
    (GaussianNB, {})
]

nn_models_n_params = [
    (MLPClassifier,
     { 'hidden_layer_sizes': [(16,), (64,), (100,), (32, 32)],
       'activation': ['identity', 'logistic', 'tanh', 'relu'],
       **alpha, **learning_rate, **tol, **warm_start,
       'batch_size': ['auto', 50],
       'max_iter': [1000],
       'early_stopping': [True, False],
       'epsilon': [1e-8, 1e-5]
       })
]

nn_models_n_params_small = [
    (MLPClassifier,
     { 'hidden_layer_sizes': [(64,), (32, 64)],
       'batch_size': ['auto', 50],
       'activation': ['identity', 'tanh', 'relu'],
       'max_iter': [500],
       'early_stopping': [True],
       **learning_rate_small
       })
]

tree_models_n_params = [

    (RandomForestClassifier,
     {'criterion': ['gini', 'entropy'],
      **max_features, **n_estimators, **max_depth,
      **min_samples_split, **min_impurity_split, **warm_start, **min_samples_leaf,
      }),

    (DecisionTreeClassifier,
     {'criterion': ['gini', 'entropy'],
      **max_features, **max_depth, **min_samples_split, **min_impurity_split, **min_samples_leaf
      }),

    (ExtraTreesClassifier,
     {**n_estimators, **max_features, **max_depth,
      **min_samples_split, **min_samples_leaf, **min_impurity_split, **warm_start,
      'criterion': ['gini', 'entropy']})
]


tree_models_n_params_small = [

    (RandomForestClassifier,
     {**max_features_small, **n_estimators_small, **min_samples_split, **max_depth_small, **min_samples_leaf
      }),

    (DecisionTreeClassifier,
     {**max_features_small, **max_depth_small, **min_samples_split, **min_samples_leaf
      }),

    (ExtraTreesClassifier,
     {**n_estimators_small, **max_features_small, **max_depth_small,
      **min_samples_split, **min_samples_leaf})
]



def run_linear_models(x, y, small = True, normalize_x = True):
    return big_loop(linear_models_n_params_small if small else linear_models_n_params,
                    StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)

def run_svm_models(x, y, small = True, normalize_x = True):
    return big_loop(svm_models_n_params_small if small else svm_models_n_params,
                    StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)

def run_neighbor_models(x, y, normalize_x = True):
    return big_loop(neighbor_models_n_params,
                    StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)

def run_gaussian_models(x, y, normalize_x = True):
    return big_loop(gaussianprocess_models_n_params,
                    StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)

def run_nn_models(x, y, small = True, normalize_x = True):
    return big_loop(nn_models_n_params_small if small else nn_models_n_params,
                    StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)

def run_tree_models(x, y, small = True, normalize_x = True):
    return big_loop(tree_models_n_params_small if small else tree_models_n_params,
                    StandardScaler().fit_transform(x) if normalize_x else x, y, isClassification=True)

def run_all(x, y, small = True, normalize_x = True, n_jobs=cpu_count()-1):

    all_params = (linear_models_n_params_small if small else linear_models_n_params) + \
                 (nn_models_n_params_small if small else nn_models_n_params) + \
                 ([] if small else gaussianprocess_models_n_params) + \
                 neighbor_models_n_params + \
                 (svm_models_n_params_small if small else svm_models_n_params) + \
                 (tree_models_n_params_small if small else tree_models_n_params)

    return big_loop(all_params,
                    StandardScaler().fit_transform(x) if normalize_x else x, y,
                    isClassification=True, n_jobs=n_jobs)



if __name__ == '__main__':

    x, y = gen_classification_data()
    run_all(x, y, n_jobs=1)

Вот несколько примеров, которым вы можете следовать.

https://github.com/PyDataBlog/Python-for-Data-Science/blob/master/Tutorials/Yellow%20brick.ipynb

https://medium.com/vickdata/a-simple-guide-to-scikit-learn-pipelines-4ac0d974bdcf

https://machinelearningmastery.com/compare-machine-learning-algorithms-python-scikit-learn/

0 голосов
/ 27 марта 2020

Да, когда ваш диктант заполнен get_models(), вы можете установить на модели for l oop:

for model in models:
    model.fit(X_train, y_train)
...