Декодирование лейбла в Наив Байес - PullRequest
0 голосов
/ 11 октября 2018

У меня вопрос о том, как декодировать лейбл в наивный байес.Я пытаюсь расшифровать этикетку, но не удалось.Это мой код.

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

class bayesNaive:
    def __init__(self, dataTrainInput):
        self.data = pd.read_csv(dataTrainInput, delimiter=";", encoding="latin-1")
def encoderLabel(self):
    self.lb = LabelEncoder()
    df = pd.DataFrame(self.data,
                      columns=["laboratory_registration_id", "Albumin", "Asam Urat", "Basofil", "Basofil Absolut","BE", "Berat Jenis", "Besi (Fe/iron)", "Eosinofil", "Eosinofil Absolut", "Eritrosit","Ferritin", "Free T4", "Glukosa Darah 2 jam PP", "Glukosa Darah Puasa","Glukosa Darah Sewaktu", "Hb-A1c", "Hematokrit", "Hemoglobin", "Kalium (K)","Klorida (Cl)", "Kolesterol HDL", "Kolesterol LDL", "Kolesterol Total", "Kreatinin", "Leukosit", "Limfosit", "Limfosit Absolut", "MCH", "MCHC", "MCV", "Monosit", "Monosit Absolut", "MPV", "Natrium (Na)", "Neutrofil Absolut", "Neutrofil Segmen", "O2 Saturasi", "pCO2", "PDW", "pH", "pO2", "RDW-CV", "RDW-SD", "T CO2", "TIBC","T3 Total", "T4 Total", "Trigliserida", "Trombosit", "Troponin T", "TSH", "Ureum", "Age", "Gender", "Disease"])
    data1 = self.data["Bakteri"]
    data2 = self.data["Bilirubin"]
    data3 = self.data["Blood"]
    data5 = self.data["Epitel"]
    data6 = self.data["Eritrosit Urin"]
    data7 = self.data["Faktor Rheumatoid (RF)"]
    data8 = self.data["Glukosa"]
    data9 = self.data["HBsAg"]
    data10 = self.data["Kejernihan"]
    data11 = self.data["Keton"]
    data12 = self.data["Kristal"]
    data13 = self.data["Leukosit Urin"]
    data14 = self.data["Nitrit"]
    data15 = self.data["Protein"]
    data16 = self.data["Silinder"]
    data17 = self.data["Urobilinogen"]
    data18 = self.data["Warna"]

    x1 = self.lb.fit_transform(data1)
    x2 = self.lb.fit_transform(data2)
    x3 = self.lb.fit_transform(data3)
    x5 = self.lb.fit_transform(data5)
    x6 = self.lb.fit_transform(data6)
    x7 = self.lb.fit_transform(data7)
    x8 = self.lb.fit_transform(data8)
    x9 = self.lb.fit_transform(data9)
    x10 = self.lb.fit_transform(data10)
    x11 = self.lb.fit_transform(data11)
    x12 = self.lb.fit_transform(data12)
    x13 = self.lb.fit_transform(data13)
    x14 = self.lb.fit_transform(data14)
    x15 = self.lb.fit_transform(data15)
    x16 = self.lb.fit_transform(data16)
    x17 = self.lb.fit_transform(data17)
    x18 = self.lb.fit_transform(data18)

    df1 = pd.DataFrame(x1, columns=['Bakteri'])
    df2 = pd.DataFrame(x2, columns=['Bilirubin'])
    df3 = pd.DataFrame(x3, columns=['Blood'])
    df5 = pd.DataFrame(x5, columns=['Epitel'])
    df6 = pd.DataFrame(x6, columns=['Eritrosit Urin'])
    df7 = pd.DataFrame(x7, columns=['Faktor Rheumatoid (RF)'])
    df8 = pd.DataFrame(x8, columns=['Glukosa'])
    df9 = pd.DataFrame(x9, columns=['HBsAg'])
    df10 = pd.DataFrame(x10, columns=['Kejernihan'])
    df11 = pd.DataFrame(x11, columns=['Keton'])
    df12 = pd.DataFrame(x12, columns=['Kristal'])
    df13 = pd.DataFrame(x13, columns=['Leukosit Urin'])
    df14 = pd.DataFrame(x14, columns=['Nitrit'])
    df15 = pd.DataFrame(x15, columns=['Protein'])
    df16 = pd.DataFrame(x16, columns=['Silinder'])
    df17 = pd.DataFrame(x17, columns=['Urobilinogen'])
    df18 = pd.DataFrame(x18, columns=['Warna'])

    return pd.concat([df1, df2, df3, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df], axis=1)

def decodeLabel(self, result):
    data1 = result["Bakteri"]
    data2 = result["Bilirubin"]
    data3 = result["Blood"]
    data5 = result["Epitel"]
    data6 = result["Eritrosit Urin"]
    data7 = result["Faktor Rheumatoid (RF)"]
    data8 = result["Glukosa"]

    x1 = self.lb.inverse_transform(data1)
    x2 = self.lb.inverse_transform(data2)
    x3 = self.lb.inverse_transform(data3)
    x5 = self.lb.inverse_transform(data5)
    x6 = self.lb.inverse_transform(data6)
    x7 = self.lb.inverse_transform(data7)
    x8 = self.lb.inverse_transform(data8)

    df1 = pd.DataFrame(x1, columns=['Bakteri'])
    df2 = pd.DataFrame(x2, columns=['Bilirubin'])
    df3 = pd.DataFrame(x3, columns=['Blood'])
    df5 = pd.DataFrame(x5, columns=['Epitel'])
    df6 = pd.DataFrame(x6, columns=['Eritrosit Urin'])
    df7 = pd.DataFrame(x7, columns=['Faktor Rheumatoid (RF)'])
    df8 = pd.DataFrame(x8, columns=['Glukosa'])

    return pd.concat([df1, df2, df3, df5, df6, df7, df8], axis=1)

def split_label(self):
    res = self.encoderLabel()
    X = res.iloc[:, :-1].values
    Y = res['Disease'].values
    return X, Y

def test_split(self):
    X, Y = self.split_label()
    X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=42)
    return X_train, X_test, Y_train, Y_test

def Classify_lab(self):
    try:
        X_train, X_test, Y_train, Y_test = self.test_split()
        # print(X_test.shape)
        model = GaussianNB()
        model_train = model.fit(X_train, Y_train)

        model_score = model_train.score(X_test, Y_test)
        accuracy = round(model_score * 100, 2)
        predicted = model_train.predict(X_test)
        Y_prob = model_train.predict_proba(X_test)
        report=classification_report(Y_test, predicted)
        conf_m = confusion_matrix(Y_test, predicted)

        test = pd.DataFrame(X_test)

        test.columns = ["Bakteri", "Bilirubin", "Blood", "Epitel","Eritrosit Urin","Faktor Rheumatoid (RF)", "Glukosa", "HBsAg", "Kejernihan", "Keton", "Kristal", "Leukosit Urin", "Nitrit", "Protein", "Silinder", "Urobilinogen", "Warna", "laboratory_registration_id", "Albumin", "Asam Urat", "Basofil", "Basofil Absolut","BE", "Berat Jenis", "Besi (Fe/iron)", "Eosinofil", "Eosinofil Absolut", "Eritrosit","Ferritin", "Free T4", "Glukosa Darah 2 jam PP", "Glukosa Darah Puasa","Glukosa Darah Sewaktu", "Hb-A1c", "Hematokrit", "Hemoglobin", "Kalium (K)","Klorida (Cl)", "Kolesterol HDL", "Kolesterol LDL", "Kolesterol Total", "Kreatinin","Leukosit", "Limfosit", "Limfosit Absolut", "MCH", "MCHC", "MCV", "Monosit","Monosit Absolut", "MPV", "Natrium (Na)", "Neutrofil Absolut", "Neutrofil Segmen","O2 Saturasi", "pCO2", "PDW", "pH", "pO2", "RDW-CV", "RDW-SD", "T CO2", "TIBC","T3 Total", "T4 Total", "Trigliserida", "Trombosit", "Troponin T", "TSH", "Ureum","Age", "Gender"]
        labels = pd.DataFrame(predicted)
        probability = pd.DataFrame(Y_prob)
        labels.columns = ["Disease"]
        probability.columns = ["Diabetes mellitus", "Ginjal","Jantung", "Thalassemia"]
        result = pd.concat([test, labels, probability], axis=1)
        decode = self.decodeLabel(X_test)
        print(decode)
        exit()
        return [result, str(accuracy), report, conf_m]
    except ValueError as v:
        print(v)
    except FileNotFoundError:
        print('File not found')

if __name__ == '__main__':
    datatestingInput = "testing.csv"
    dataTrainInput = "perc4.csv"
    obj = bayesNaive(dataTrainInput)
    print(obj.Classify_lab())
    # print(obj.Testing_lab(datatestingInput))

У меня есть функция для декодирования, то есть decodeLabel ().Я пытаюсь напечатать результат в

decode = self.decodeLabel(X_test)
print(decode)

, но ошибка.Это ошибка.

IndexError: допустимыми являются только целые числа, срезы (:), многоточие (...), numpy.newaxis (None) и целые или логические массивы

Пожалуйста, покажи мне, в чем проблема .. Спасибо

...