Моя повторная реализация QDA имеет слишком низкий уровень точности - PullRequest
0 голосов
/ 09 ноября 2019
from math import *
import pandas as pd
import numpy as np
from operator import itemgetter
import numpy.linalg
import os
import seaborn as sns
import matplotlib.pyplot as plt
from numpy.core._multiarray_umath import ndarray


class QDA_classifier(object):

    def __init__(self):
        self.detminant_1 = 0
        self.determiant_2 = 0
        self.moyenne_1 = []
        self.moyenne_2 = []
        self.covariance_1 = []
        self.covariance_2 = []
        self.moyenne_covariance_det_list = []
        self.result = 0
        self.accuracy_result = 0

    def fit(self, set_tr):
        """mesure mean and ratio for bananas and apple from a dataframe"""
        set_tr = df.to_numpy()
        set_tr[:, -1] = np.where(set_tr[:, -1] > 0, 1, -1)
        k = 0
        set_tr = np.array(sorted(set_tr, key=itemgetter(-1)))
        mean_and_var = [0] * 4
        apple = np.zeros(set_tr.shape)
        banana = np.zeros(set_tr.shape)
        i = 0
        n = 0
        index_condition = np.where(set_tr[:, -1] < 0)
        banana = np.zeros(set_tr.size)
        banana = set_tr[index_condition[0]]
        index_condition = np.where(set_tr[:, -1] > 0)
        apple = np.zeros(set_tr.size)
        apple = set_tr[index_condition[0]]
        banana = np.delete(banana, -1, 1)
        apple = np.delete(apple, -1, 1)
        moyenne_apple = [None] * np.size(apple, 1)
        moyenne_banane = [None] * np.size(banana, 1)
        covariance_apple = np.zeros(apple.shape)
        covariance_banane = np.zeros(banana.shape)
        for k in range(np.size(banana, 1)):
            moyenne_banane = np.mean(banana)
        for k in range(np.size(apple, 1)):
            moyenne_apple[k] = np.mean(apple[:, k])
        covariance_apple = np.cov(apple.T)
        covariance_banane = np.cov(banana.T)
        """mesure le determinant de chacunes des matrices de variance co variance"""
        determinant_banane = np.linalg.det(covariance_banane)
        determinant_apple = np.linalg.det(covariance_apple)
        determinant_1 = 2 * pi * determinant_apple
        determinant_2 = 2 * pi * determinant_banane
        covariance_apple_inversee = np.linalg.inv(covariance_apple)
        covariance_banane_inversee = np.linalg.inv(covariance_banane)
        covariance_det_list = [[covariance_apple], [covariance_banane],
                               [determinant_1], [determinant_2]]
        self.moyenne_banane = moyenne_banane
        self.moyenne_apple = moyenne_apple
        self.covariance_det_list = covariance_det_list
        self.covariance_banane_inversee = covariance_banane_inversee
        self.covariance_apple_inversee = covariance_apple_inversee

        return set_tr

    def compute_likelihood_ratio(self, set_test):
        covariance_apple = self.covariance_det_list[0]
        covariance_banane = self.covariance_det_list[1]
        determinant_1 = self.covariance_det_list[2]
        determinant_2 = self.covariance_det_list[3]
        moyenne_banane = self.moyenne_banane
        moyenne_apple = self.moyenne_banane
        covariance_apple_inversee = self.covariance_apple_inversee
        covariance_banane_inversee = self.covariance_banane_inversee
        determinant_1 = float(determinant_1[0])
        determinant_2 = float(determinant_2[0])
        covariance_apple = np.array(covariance_apple[0])
        covariance_banane = np.array(covariance_banane[0])
        likelihood_1 = 0
        likelihood_1 = np.vectorize(likelihood_1)
        likelihood_2 = 0
        likelihood_2 = np.vectorize(likelihood_2)
        likelihood_1 = pow(determinant_1, 0.5) * exp(
            -0.5 * (set_test - moyenne_apple).T @ covariance_apple_inversee @ (set_test - moyenne_apple))
        likelihood_2 = pow(determinant_2, 0.5) * exp(
            -0.5 * (set_test - moyenne_banane).T @ covariance_banane_inversee @ (set_test - moyenne_banane))
        likelihood_ratio = likelihood_1 / likelihood_2
        if likelihood_ratio > 0:
            result = 1
            return 1
            print("Object from class 1")
        else:
            result = -1
            return -1
            self.result = result
            print("Object from class 2")

    def accuracy_score(self, set_test):
        i = 0
        """input a df and convert it to numpy conserving the target values and return a percentage of accuracy"""
        size_of_set_test = set_test.shape[0]
        for n in range(size_of_set_test):
            if set_test[n, -1] == 0:
                set_test[n, -1] -= 1
        target_values_test = [None] * size_of_set_test
        for f in range(size_of_set_test):
            target_values_test[f] = [set_test[f, -1]]
        set_test = np.delete(set_test, -1, 1)
        results_compute = [None] * size_of_set_test
        for n in range(size_of_set_test):  # you need to train your model before
            results_compute[n] = test_class.compute_likelihood_ratio(set_test[n, :])
            if results_compute[n] == target_values_test[n][0]:
                i += 1
            else:
                i -= 1
        accuracy_result = (i / size_of_set_test) * 100
        print("accuracy score = ", accuracy_result, "%")
        self.accuracy_result = accuracy_result


# illustrate the step for training and testing your values with local dataframes
os.chdir("C:/Users/victo/Downloads/Dataset")
df = pd.read_csv('bill_authentication.csv')
test_class = QDA_classifier()
test_class.fit(df)
df = df.to_numpy()
test_class.accuracy_score(df)

Я не могу объяснить, почему это не работает, я использую отношение правдоподобия, чтобы измерить вероятность получения яблока или банана. Мой проект состоит в предсказании картинки. Apple имеет 1 в качестве целевого значения и -1 для бананов. И я думаю, что проблема связана с моим вычислительным методом. Я все еще новичок в Python, который объясняет его сложную конструкцию. Помощь была бы очень благодарна.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...