Во время работы run_train_SVM.py значения функций вычисляются, но они не уверены, добавляются ли они.
Ниже train_svm
модель
from load_MITBIH import *
from evaluation_AAMI import *
from aggregation_voting_strategies import *
from oversampling import *
from cross_validation import *
from feature_selection import *
import sklearn
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn import decomposition
import os
def create_svm_model_name(model_svm_path, winL, winR, do_preprocess,
maxRR, use_RR, norm_RR, compute_morph, use_weight_class, feature_selection,
oversamp_method, leads_flag, reduced_DS, pca_k, delimiter):
if reduced_DS == True:
model_svm_path = model_svm_path + delimiter + 'exp_2'
if leads_flag[0] == 1:
model_svm_path = model_svm_path + delimiter + 'MLII'
if leads_flag[1] == 1:
model_svm_path = model_svm_path + delimiter + 'V1'
if oversamp_method:
model_svm_path = model_svm_path + delimiter + oversamp_method
if feature_selection:
model_svm_path = model_svm_path + delimiter + feature_selection
if do_preprocess:
model_svm_path = model_svm_path + delimiter + 'rm_bsln'
if maxRR:
model_svm_path = model_svm_path + delimiter + 'maxRR'
if use_RR:
model_svm_path = model_svm_path + delimiter + 'RR'
if norm_RR:
model_svm_path = model_svm_path + delimiter + 'norm_RR'
for descp in compute_morph:
model_svm_path = model_svm_path + delimiter + descp
if use_weight_class:
model_svm_path = model_svm_path + delimiter + 'weighted'
if pca_k > 0:
model_svm_path = model_svm_path + delimiter + 'pca_' + str(pca_k)
return model_svm_path
# Eval the SVM model and export the results
def eval_model(svm_model, features, labels, multi_mode, voting_strategy, output_path, C_value, gamma_value, DS):
if multi_mode == 'ovo':
decision_ovo = svm_model.decision_function(features)
if voting_strategy == 'ovo_voting':
predict_ovo, counter = ovo_voting(decision_ovo, 4)
elif voting_strategy == 'ovo_voting_both':
predict_ovo, counter = ovo_voting_both(decision_ovo, 4)
elif voting_strategy == 'ovo_voting_exp':
predict_ovo, counter = ovo_voting_exp(decision_ovo, 4)
# svm_model.predict_log_proba svm_model.predict_proba svm_model.predict ...
perf_measures = compute_AAMI_performance_measures(predict_ovo, labels)
"""
elif multi_mode == 'ovr':cr
decision_ovr = svm_model.decision_function(features)
predict_ovr = svm_model.predict(features)
perf_measures = compute_AAMI_performance_measures(predict_ovr, labels)
"""
# Write results and also predictions on DS2
if not os.path.exists(output_path):
os.makedirs(output_path)
if gamma_value != 0.0:
write_AAMI_results( perf_measures, output_path + '/' + DS + 'C_' + str(C_value) + 'g_' + str(gamma_value) +
'_score_Ijk_' + str(format(perf_measures.Ijk, '.2f')) + '_' + voting_strategy + '.txt')
else:
write_AAMI_results( perf_measures, output_path + '/' + DS + 'C_' + str(C_value) +
'_score_Ijk_' + str(format(perf_measures.Ijk, '.2f')) + '_' + voting_strategy + '.txt')
# Array to .csv
if multi_mode == 'ovo':
if gamma_value != 0.0:
np.savetxt(output_path + '/' + DS + 'C_' + str(C_value) + 'g_' + str(gamma_value) +
'_decision_ovo.csv', decision_ovo)
np.savetxt(output_path + '/' + DS + 'C_' + str(C_value) + 'g_' + str(gamma_value) +
'_predict_' + voting_strategy + '.csv', predict_ovo.astype(int), '%.0f')
else:
np.savetxt(output_path + '/' + DS + 'C_' + str(C_value) +
'_decision_ovo.csv', decision_ovo)
np.savetxt(output_path + '/' + DS + 'C_' + str(C_value) +
'_predict_' + voting_strategy + '.csv', predict_ovo.astype(int), '%.0f')
elif multi_mode == 'ovr':
np.savetxt(output_path + '/' + DS + 'C_' + str(C_value) +
'_decision_ovr.csv', prob_ovr)
np.savetxt(output_path + '/' + DS + 'C_' + str(C_value) +
'_predict_' + voting_strategy + '.csv', predict_ovr.astype(int), '%.0f')
print("Results writed at " + output_path + '/' + DS + 'C_' + str(C_value))
def create_oversamp_name(reduced_DS, do_preprocess, compute_morph, winL, winR, maxRR, use_RR, norm_RR, pca_k):
oversamp_features_pickle_name = ''
if reduced_DS:
oversamp_features_pickle_name += '_reduced_'
if do_preprocess:
oversamp_features_pickle_name += '_rm_bsline'
if maxRR:
oversamp_features_pickle_name += '_maxRR'
if use_RR:
oversamp_features_pickle_name += '_RR'
if norm_RR:
oversamp_features_pickle_name += '_norm_RR'
for descp in compute_morph:
oversamp_features_pickle_name += '_' + descp
if pca_k > 0:
oversamp_features_pickle_name += '_pca_' + str(pca_k)
oversamp_features_pickle_name += '_wL_' + str(winL) + '_wR_' + str(winR)
return oversamp_features_pickle_name
def main(multi_mode='ovo', winL=90, winR=90, do_preprocess=True, use_weight_class=True,
maxRR=True, use_RR=True, norm_RR=True, compute_morph={''}, oversamp_method = '', pca_k = '', feature_selection = '', do_cross_val = '', C_value = 0.001, gamma_value = 0.0, reduced_DS = False, leads_flag = [1,0]):
print("Runing train_SVM.py!")
db_path = '/users/meghshyam/downloads/ECG/mitdb/m_learning/scikit/'
# Load train data
[tr_features, tr_labels, tr_patient_num_beats] = load_mit_db('DS1', winL, winR, do_preprocess,
maxRR, use_RR, norm_RR, compute_morph, db_path, reduced_DS, leads_flag)
# Load Test data
[eval_features, eval_labels, eval_patient_num_beats] = load_mit_db('DS2', winL, winR, do_preprocess,
maxRR, use_RR, norm_RR, compute_morph, db_path, reduced_DS, leads_flag)
if reduced_DS == True:
np.savetxt('mit_db/' + 'exp_2_' + 'DS2_labels.csv', eval_labels.astype(int), '%.0f')
else:
np.savetxt('mit_db/' + 'DS2_labels.csv', eval_labels.astype(int), '%.0f')
#if reduced_DS == True:
# np.savetxt('mit_db/' + 'exp_2_' + 'DS1_labels.csv', tr_labels.astype(int), '%.0f')
#else:
#np.savetxt('mit_db/' + 'DS1_labels.csv', tr_labels.astype(int), '%.0f')
##############################################################
# 0) TODO if feature_Selection:
# before oversamp!!?????
# TODO perform normalization before the oversampling?
if oversamp_method:
# Filename
oversamp_features_pickle_name = create_oversamp_name(reduced_DS, do_preprocess, compute_morph, winL, winR, maxRR, use_RR, norm_RR, pca_k)
# Do oversampling
tr_features, tr_labels = perform_oversampling(oversamp_method, db_path + 'oversamp/python_mit', oversamp_features_pickle_name, tr_features, tr_labels)
# Normalization of the input data
# scaled: zero mean unit variance ( z-score )
scaler = StandardScaler()
scaler.fit(tr_features)
tr_features_scaled = scaler.transform(tr_features)
# scaled: zero mean unit variance ( z-score )
eval_features_scaled = scaler.transform(eval_features)
##############################################################
# 0) ????????????? feature_Selection: also after Oversampling???
if feature_selection:
print("Runing feature selection")
best_features = 7
tr_features_scaled, features_index_sorted = run_feature_selection(tr_features_scaled, tr_labels, feature_selection, best_features)
eval_features_scaled = eval_features_scaled[:, features_index_sorted[0:best_features]]
# 1)
if pca_k > 0:
# Load if exists??
# NOTE PCA do memory error!
# NOTE 11 Enero: TEST WITH IPCA!!!!!!
start = time.time()
print("Runing IPCA " + str(pca_k) + "...")
# Run PCA
IPCA = sklearn.decomposition.IncrementalPCA(pca_k, batch_size=pca_k) # gamma_pca
#tr_features_scaled = KPCA.fit_transform(tr_features_scaled)
IPCA.fit(tr_features_scaled)
# Apply PCA on test data!
tr_features_scaled = IPCA.transform(tr_features_scaled)
eval_features_scaled = IPCA.transform(eval_features_scaled)
"""
print("Runing TruncatedSVD (singular value decomposition (SVD)!!!) (alternative to PCA) " + str(pca_k) + "...")
svd = decomposition.TruncatedSVD(n_components=pca_k, algorithm='arpack')
svd.fit(tr_features_scaled)
tr_features_scaled = svd.transform(tr_features_scaled)
eval_features_scaled = svd.transform(eval_features_scaled)
"""
end = time.time()
print("Time runing IPCA (rbf): " + str(format(end - start, '.2f')) + " sec" )
##############################################################
# 2) Cross-validation:
if do_cross_val:
print("Runing cross val...")
start = time.time()
# TODO Save data over the k-folds and ranked by the best average values in separated files
perf_measures_path = create_svm_model_name('/users/meghshyam/dowmloads/ECG/code/ecg_classification/python/results/' + multi_mode, winL, winR, do_preprocess,
maxRR, use_RR, norm_RR, compute_morph, use_weight_class, feature_selection, oversamp_method, leads_flag, reduced_DS, pca_k, '/')
# TODO implement this method! check to avoid NaN scores....
if do_cross_val == 'pat_cv': # Cross validation with one fold per patient
cv_scores, c_values = run_cross_val(tr_features_scaled, tr_labels, tr_patient_num_beats, do_cross_val, len(tr_patient_num_beats))
if not os.path.exists(perf_measures_path):
os.makedirs(perf_measures_path)
np.savetxt(perf_measures_path + '/cross_val_k-pat_cv_F_score.csv', (c_values, cv_scores.astype(float)), "%f")
elif do_cross_val == 'beat_cv': # cross validation by class id samples
k_folds = {5}
for k in k_folds:
ijk_scores, c_values = run_cross_val(tr_features_scaled, tr_labels, tr_patient_num_beats, do_cross_val, k)
# TODO Save data over the k-folds and ranked by the best average values in separated files
perf_measures_path = create_svm_model_name('/users/meghshyam/dowmloads/ECG/code/ecg_classification/python/results/' + multi_mode, winL, winR, do_preprocess,
maxRR, use_RR, norm_RR, compute_morph, use_weight_class, feature_selection, oversamp_method, leads_flag, reduced_DS, pca_k, '/')
if not os.path.exists(perf_measures_path):
os.makedirs(perf_measures_path)
np.savetxt(perf_measures_path + '/cross_val_k-' + str(k) + '_Ijk_score.csv', (c_values, ijk_scores.astype(float)), "%f")
end = time.time()
print("Time runing Cross Validation: " + str(format(end - start, '.2f')) + " sec" )
else:
################################################################################################
# 3) Train SVM model
# TODO load best params from cross validation!
use_probability = False
model_svm_path = db_path + 'svm_models/' + multi_mode + '_rbf'
model_svm_path = create_svm_model_name(model_svm_path, winL, winR, do_preprocess,
maxRR, use_RR, norm_RR, compute_morph, use_weight_class, feature_selection,
oversamp_method, leads_flag, reduced_DS, pca_k, '_')
if gamma_value != 0.0:
model_svm_path = model_svm_path + '_C_' + str(C_value) + '_g_' + str(gamma_value) +'.joblib.pkl'
else:
model_svm_path = model_svm_path + '_C_' + str(C_value) + '.joblib.pkl'
print("Training model on MIT-BIH DS1: " + model_svm_path + "...")
if os.path.isfile(model_svm_path):
# Load the trained model!
svm_model = joblib.load(model_svm_path)
else:
class_weights = {}
for c in range(4):
class_weights.update({c:len(tr_labels) / float(np.count_nonzero(tr_labels == c))})
#class_weight='balanced',
if gamma_value != 0.0: # NOTE 0.0 means 1/n_features default value
svm_model = svm.SVC(C=C_value, kernel='rbf', degree=3, gamma=gamma_value,
coef0=0.0, shrinking=True, probability=use_probability, tol=0.001,
cache_size=200, class_weight=class_weights, verbose=False,
max_iter=-1, decision_function_shape=multi_mode, random_state=None)
else:
svm_model = svm.SVC(C=C_value, kernel='rbf', degree=3, gamma='auto',
coef0=0.0, shrinking=True, probability=use_probability, tol=0.001,
cache_size=200, class_weight=class_weights, verbose=False,
max_iter=-1, decision_function_shape=multi_mode, random_state=None)
# Let's Train!
start = time.time()
svm_model.fit(tr_features_scaled, tr_labels)
end = time.time()
# TODO assert that the class_ID appears with the desired order,
# with the goal of ovo make the combinations properly
print("Trained completed!\n\t" + model_svm_path + "\n \
\tTime required: " + str(format(end - start, '.2f')) + " sec" )
# Export model: save/write trained SVM model
joblib.dump(svm_model, model_svm_path)
# TODO Export StandardScaler()
#########################################################################
# 4) Test SVM model
print("Testing model on MIT-BIH DS2: " + model_svm_path + "...")
############################################################################################################
# EVALUATION
############################################################################################################
# Evaluate the model on the training data
perf_measures_path = create_svm_model_name('/users/meghshyam/dowmloads/ECG/code/ecg_classification/python/results/' + multi_mode, winL, winR, do_preprocess,
maxRR, use_RR, norm_RR, compute_morph, use_weight_class, feature_selection, oversamp_method, leads_flag, reduced_DS, pca_k, '/')
# ovo_voting:
# Simply add 1 to the win class
print("Evaluation on DS1 ...")
eval_model(svm_model, tr_features_scaled, tr_labels, multi_mode, 'ovo_voting', perf_measures_path, C_value, gamma_value, 'Train_')
# Let's test new data!
print("Evaluation on DS2 ...")
eval_model(svm_model, eval_features_scaled, eval_labels, multi_mode, 'ovo_voting', perf_measures_path, C_value, gamma_value, '')
# ovo_voting_exp:
# Consider the post prob adding to both classes
print("Evaluation on DS1 ...")
eval_model(svm_model, tr_features_scaled, tr_labels, multi_mode, 'ovo_voting_exp', perf_measures_path, C_value, gamma_value, 'Train_')
# Let's test new data!
print("Evaluation on DS2 ...")
eval_model(svm_model, eval_features_scaled, eval_labels, multi_mode, 'ovo_voting_exp', perf_measures_path, C_value, gamma_value, '')
Во время запуска run_train_SVM .py, значения объектов вычисляются, но не уверены, что они добавляются.
train_SVM.py в основном (multi_mode, winL, winR, do_preprocess, use_weight_class, maxRR, use_RR, norm_RR, compute_morph, oversamp_method , pca_k, feature_selection, do_cross_val, C_value, gamma_value, Reduced_DS, приводит_flag)
195 # масштабировано: нулевая среднеквадратичная дисперсия (z-оценка)
196 Scaler = StandardScaler () - -> 197 scaler.fit (tr_features) ошибка публикации здесь
198 tr_features_scaled = scaler.transform (tr_features)