Я пытаюсь создать программу, которая будет брать данные из двух CSV, а затем вычислять расстояние и классифицировать объекты. Мой код в настоящее время работает правильно только для первой итерации, и весь код работал нормально, пока я не изменил возвращаемую функцию weight_based_approach(
), раньше weight_based_approach()
возвращал 2 значения с плавающей запятой, и теперь он возвращает только одно значение, здесь изменилосьчасть кода:
return (eucl_weight_prediction_count / len(test_data) * 100), (
manhattan_metric_prediction_count / len(test_data) * 100)
Текущий полный код:
import numpy as np
import matplotlib.pyplot as plt
class Implementation:
def __init__(self):
pass
def Distancess(self, training_sub_data, query_instance):
query_params = query_instance[:10]
eucl = np.sqrt(np.sum((training_sub_data - query_params) ** 2, axis=-1))
return eucl, np.argsort(eucl)
def weight(self, training_data, distances, sorted_indices, k):
i = 1
samples_class = training_data[sorted_indices[:k]][:, -1]
nearest_distances = distances[sorted_indices[:k]]
nearest_weights = np.divide(1, np.square(nearest_distances))
class_0_weights_sum = np.sum(nearest_weights[samples_class == 0])
class_1_weights_sum = np.sum(nearest_weights[samples_class == 1])
class_2_weights_sum = np.sum(nearest_weights[samples_class == 2])
if class_0_weights_sum > class_1_weights_sum and class_0_weights_sum > class_2_weights_sum:
return 0
elif class_1_weights_sum > class_0_weights_sum and class_1_weights_sum > class_2_weights_sum:
return 1
else:
return 2
def weight_based_approach(training_data, test_data, kn_k_value):
training_data_10_columns = training_data[:, :10]
kn = Implementation()
eucl_weight_prediction_count = 0
for query_instance in test_data:
distances, euclidean_indices = kn.Distancess(training_data_10_columns, query_instance)
weight_based_average = kn.weight(training_data, distances, euclidean_indices, kn_k_value)
if query_instance[-1] == weight_based_average:
eucl_weight_prediction_count += 1
return eucl_weight_prediction_count / len(test_data) * 100
def main():
global accuracies
euclidean_accuracies = []
k_samples = []
k_samples.extend(list(range(1, 4, 1)))
print("Range" + str(k_samples))
for k in k_samples:
training_file = "classification/trainingData.csv"
test_file = "classification/testData.csv"
kn_k_value = k
training_data = np.genfromtxt(training_file, delimiter=",")
test_data = np.genfromtxt(test_file, delimiter=",")
accuracies = weight_based_approach(training_data, test_data, kn_k_value)
euclidean_accuracies.append(accuracies[0])
print("distance: " + str(euclidean_accuracies))
plt.plot(k_samples, euclidean_accuracies, 'r')
plt.xlabel('K{Number of Nearest Neighbour(s)}')
plt.ylabel('Accuracy %')
plt.title('K vs Accuracy graph')
plt.grid(True)
plt.show()
if __name__ == '__main__':
main()
Ошибка:
Traceback (most recent call last):
File "A:\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3296, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-e142ccdf3a2c>", line 1, in <module>
runfile('A:/Workspace/PML/R00182527/Part2a.py', wdir='A:/Workspace/PML/R00182527')
File "C:\Program Files\JetBrains\PyCharm 2019.2.2\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2019.2.2\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "A:/Workspace/P7/Part.py", line 143, in <module>
main()
File "A:/Workspace/P7/Part.py", line 126, in main
euclidean_accuracies.append(accuracies[0])
TypeError: 'float' object is not subscriptable
Ожидается точность% в качестве выхода