Я работаю над проектом интеллектуального анализа данных (как сторонний программист) и пытаюсь выполнить анализ K-Nearest Neighbor. Тем не менее, я продолжаю получать ошибку «никакие пропущенные значения не допускаются». В моих данных отсутствуют пропущенные значения, поэтому с моим кодом что-то не так. Кто-нибудь может помочь?
AirbnbNYCApril = read.delim(file=file.choose(),
header = T,
sep = ",",
stringsAsFactors = F)
> str(AirbnbNYCApril)
AirbnbNYCApril = na.omit(AirbnbNYCApril)
set.seed(1)
n = nrow(AirbnbNYCApril)
Train_indices = 1:round(0.75*n)
Test_indices = (round(0.75*n)+1):n
AirbnbNYCApril_shuffle = AirbnbNYCApril[sample(n),]
AirbnbNYCApril_Train = AirbnbNYCApril_shuffle[Train_indices,]
AirbnbNYCApril_Test = AirbnbNYCApril_shuffle[Test_indices,]
Train_labels = AirbnbNYCApril_Train$neighborhood
Test_labels = AirbnbNYCApril_Test$neighborhood
AirbnbNYCApril_Train$neighborhood = NULL
AirbnbNYCApril_Test$neighborhood = NULL
min_reviews = min(AirbnbNYCApril_Train$reviews)
max_reviews = max(AirbnbNYCApril_Train$reviews)
AirbnbNYCApril_Train$reviews = (AirbnbNYCApril_Train$reviews - min_reviews)/(max_reviews - min_reviews)
AirbnbNYCApril_Test$reviews = (AirbnbNYCApril_Test$reviews - min_reviews)/(max_reviews - min_reviews)
min_accommodates = min(AirbnbNYCApril_Train$accommodates)
max_accommodates = max(AirbnbNYCApril_Train$accommodates)
AirbnbNYCApril_Train$accommodates = (AirbnbNYCApril_Train$accommodates - min_accommodates)/(max_accommodates - min_accommodates)
min_price = min(AirbnbNYCApril_Train$price)
max_price = max(AirbnbNYCApril_Train$price)
AirbnbNYCApril_Train$price = (AirbnbNYCApril_Train$price - min_price)/(max_price - min_price)
AirbnbNYCApril_Test$price = (AirbnbNYCApril_Test$price - min_price)/(max_price - min_price)
min_lat = min(AirbnbNYCApril_Train$latitude)
max_lat = max(AirbnbNYCApril_Train$latitutde)
AirbnbNYCApril_Train$latitude = (AirbnbNYCApril_Train$latitude - min_lat)/(max_lat - min_lat)
AirbnbNYCApril_Test$latitude = (AirbnbNYCApril_Test$latitude - min_lat)/(max_lat - min_lat)
min_lon = min(AirbnbNYCApril_Train$longitude)
max_lon = max(AirbnbNYCApril_Train$longitude)
AirbnbNYCApril_Train$longitude = (AirbnbNYCApril_Train$longitude - min_lon)/(max_lon - min_lon)
AirbnbNYCApril_Test$longitude = (AirbnbNYCApril_Test$longitude - min_lon)/(max_lon - min_lon)
neighborhood_prediction = knn(train = AirbnbNYCApril_Train[,-1], test = AirbnbNYCApril_Test[,-1], cl = Train_labels, k=5)
confusion_matrix = table(Test_labels, neighborhood_prediction)
print(confusion_matrix)