NA / NaN / Inf в вызове сторонней функции (arg 6) в алгоритме KNN - PullRequest
0 голосов
/ 05 июля 2019

Я пытаюсь предсказать категорию на основе knn alog. но я не знаю, почему я получаю вышеуказанную ошибку, такую ​​как «NA / NaN / Inf в вызове внешней функции (аргумент 6)» Я уже удалил значения NA, используя функцию na.omit (A), но все еще получаю ошибку NA.

data.csv

RegionName,RetailerId,PartyName,Address1,Address2,Area,City,ContactPerson,CSTNumber,Email,LicenseNumber,Telephone,MobileNumber
MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES,"2,GROUND FLOOR,ABDUL REHAMAN CHAWL,MAROL GAON",SHREE HANUMAN MANDIR ROAD,MAROL,ANDHERI EAST,HARSHIT JAIN,20 Z6 59 90B,BHAGWATIMEDICAL7@YAHOO.COM,21 Z6 59 90B,29207788 / 07666464888,"82,864,534,619,867,000,000"
MUMBAI,297,BHAGWATI MEDICAL [MAROL],"SHRI HANUMAN MANDIR RD;MAROL GAON,","ANDHERI[E],MUMBAI-59.",,ANDHERI [E],MR.DINESH KOTHARI,20Z-6-59-908,BHAGWATIMEDICAL7@YAHOO.COM,21Z-6-59-908,29207788,
MUMBAI,297,BHAGWATI MEDICAL [MAROL],"SHRI HANUMAN MANDIR RD;MAROL GAON,","ANDHERI[E],MUMBAI-59.",,ANDHERI [E],MR.DINESH KOTHARI,20Z-6-59-908,BHAGWATIMEDICAL7@YAHOO.COM,21Z-6-59-908,29207788,
MUMBAI,297,BHAGWATI MEDICAL [MAROL],"SHRI HANUMAN MANDIR RD;MAROL GAON,","ANDHERI[E],MUMBAI-59.",,ANDHERI [E],MR.DINESH KOTHARI,20Z-6-59-908,BHAGWATIMEDICAL7@YAHOO.COM,21Z-6-59-908,29207788,
MUMBAI,297,BHAGWATI MEDICAL & GENRAL STORE,"SHRI HANUMAN MANDIR ROAD,",MAROL VILLAGE,MAROL,MUMBAI,DINESH,20/Z-6/59/908,BHAGWATIMEDICAL7@YAHOO.COM,20C/Z-6/59/908,29207788/8286453461,98670976670
MUMBAI,297,$BHAGWATI MEDICAL.,"SHOP NO.2,ABDUL REHMAN CHAWL SHRI HANUMAN MANDIR ROAD",,ANDHERI(E),MUMABAI,,20-21-Z-1,BHAGWATIMEDICAL7@YAHOO.COM,59-908-20C,29207788/8286453461,
MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORE,SHOP NO.2 ABDUL REHMAN CHAWL  SHRI HANUMAN MANDIR MARG,"MAROL VILLAGE,",,ANDHERI (E),,20/Z-6/59/908,BHAGWATIMEDICAL7@YAHOO.COM,21/Z-6/59/908,29207788  / 9867097667,7666464888
MUMBAI,297,BHAGWATI MED. & GEN. STORES.,"SHREE HANUMAN MANDIR ROAD, MAROL VILLEG",,MAROL,MUMBAI,DINESH BHIMRAJ,20Z-6/59/908,BHAGWATIMEDICAL7@YAHOO.COM,20C-Z-6/59/940,29207788,9869260832
MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES.,"SHOP NO.2, GR FLOOR, MEZZAINI FLR,ABDULREHMAN CHAWL,","SHREE HANUMAN MANDIR ROAD,MAROL GAON",ANDHERI(E),MUMBAI,,"20,21/Z-6/59/90B",BHAGWATIMEDICAL7@YAHOO.COM,20C/Z-6/59/940,7977458967,9867097667
MUMBAI,297,BHAGWATI MEDICAL,"SHRI HANUMAN MANDIR RD,","MAROL GAON,MAROL, ANDHERI(E)",VP(E)-A(E)-MA,MUMBAI,,"20,21/Z-6/59/908",,20C/Z-6/59/940,29207788,7738788474
MUMBAI,297,BHAGWATI MEDICAL & GENERAL  STORES.,"SHOP NO.2,ABDUL REHMAN CHWAL,HANUMAN MANDIR,MAROL VILLADGE REZY COELHO CHAWL,",ANDHERI(E),ANDHERI (E),MUMBAI,DINESH BHAI,21Z-6/59/908,BHAGWATIMEDICAL7@YAHOO.COM,20Z-6/59/908,29207788/7666464888,
MUMBAI,297,BHAGWATI MED.& GEN. ST.,2 GR.FL.ABDUL REHMAN CHAWL,HANUMAN MANDIR RD.,MAROL GAON,ANDHERI-E,DINESH KOTHARI,"20,21/Z-6/59/908",BHAGWATIMEDICAL7@YAHOO.COM,20C/Z-6/59/940,9869260832,29207788
MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES.,SHOP NO 2.ABDUL REHMAN CHAWL.,"SHRI HANUMAN MANDIR ROAD, MAROL VILLAGE",MAROL - ANDHERI - EAST,MUMBAI,MAROL,20-Z6/59/908,BHAGWATIMEDICAL7@YAHOO.COM,21-Z6/59/908,29207788/7738788474/9869260832,9867097667
MUMBAI,297,BHAGWATI  MEDICAL,"SHRI   HANUMAN  MANDIR  ROAD,","MAROL  GAON,",ANDHERI (E),MUMBAI,,,,,29207788/8286453461,
MUMBAI,297,BHAGWATI MEDI & GEN.STORES,SHRI HANUMAN MANDIR ROAD MAROL VILLAGE,MAROL,,MAROL,,20/Z/6/59/749,,20 C/Z-6/59/788,29207788,
MUMBAI,297,BHAGWATI MED ST 29207788,2 GR FL MEZZANIN ABDUL REHAMAN,CHAWLHUMAN MANDIR RDMAROL,ANDHERI,,,27390646287V,BHAGWATIMEDICAL7@YAHOO.COM,20-21Z-59-908-20CZ6-59-940,,7666464888
MUMBAI,297,BHAGWATI MEDICAL,"SHRI HANUMAN MANDIR ROAD,MAROL GAON,MAROL,ANDHERI-E",,,,,,,,,8286453461
MUMBAI,297,BHAGWATI MED & GEN STORES,,ANDHERI (E),ANDHERI [W],,,,,/,,
MUMBAI,297,BHAGWATI MEDICAL STORE,SH NO.2BRFLR.MAZALIN FLR.,ABDUL REHMAN CHL.HANUMAN MAND,ANDHERI (WEST),,,27390646287 V,BHAGWATIMEDICAL7@YAHOO.COM,20-21-Z-6-59-90B,9867097667 / 8286453461,
MUMBAI,297,BHAGWATI MEDICAL   MAROL,SHOP NO 2 ABDULREHMAN CHAWL SH,ANDHERI E,,GENERAL,,20/21-Z6-59-908,,20C-Z6-59-940,29207788,
MUMBAI,297,BHAGWATI MEDICAL & GENERAL STORES,"SHRI HANUMAN MANDIR ROAD,, MAROL VILLAGE,, ANDHERI (E),",", MUMBAI.",ANDHERI (E),MUMBAI,,C_00121689190,MUMBAI,20/21-Z-6/59/908,,9867097667
MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST. #,"22,LAXMI CHAYYA BLDG","L.T.ROAD,BABHAI NAKA",BORIVLI,BORIVALI WEST,8959202,20/Z7/92/2221,GOPALKRISHNAMED22@GMAIL.COM,21/Z7/92/2221,9821287221/28959202,
MUMBAI,389,GOPAL KRISHNA MED & GEN STORES,"22,LAXMI CHHAYA,L.T.ROAD","BABAI NAKA ,EKSAR ROAD",BORIVALI (WEST),MUMBAI,MR CHANDRAKANT,20/Z7/92/2221,GOPALKRISHNAMED22@GMAIL.COM,21/Z7/92/2221,28959202/983381929,9821287221
MUMBAI,389,GOPAL KRISHNA MEDICAL & GENERAL STORES,"22, LAXMI CHHAYA, L.T.ROAD",BABHAI NAKA,BORIVALI W,MUMBAI,,20/Z/7/92/2221,GOPALKRISHNAMED22@GMAIL.COM,21/Z/7/92/2221,28959202,
MUMBAI,389,NEW GOPAL KRISHNA MEDICAL & GEN.STORES,"22, LAXMI CHHAYA, BABHAI NAKA",EKSAR ROAD,L.T.ROAD,BORIVALI (W),CHANDHUBHAI,20-MH-MZ7-192791,GOPALKRISHNAMED22@GMAIL.COM,21-MH-MZ7/192792,28959202,9833819296/9821287221
MUMBAI,389,GOPAL KRISHNA MED.&GEN.STORES,"22,LAXMI CHHAYA,L.T.ROAD,BABHAI","NAKA,WEST MUMBAI",,BORIVALI,CHANDRAKANTBHAI,20Z-7/92/2221,GOPALKRISHNAMED22@GMAIL.COM,21Z-7/92/2221,28959202/69931501,9833819296
MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST;[BORIVALI-W],"22,LAXMI CHHAYA ,L.T.RD;BHABAI NAKA,","BORIVALI[W],MUMBAI-92.",,BORIVALI [W],MR.CHANDUBHAI,20-Z-7/92/2221,,21-Z-7/92/2221,28959202,
MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST;[BORIVALI-W],"22,LAXMI CHHAYA ,L.T.RD;BHABAI NAKA,","BORIVALI[W],MUMBAI-92.",,BORIVALI [W],MR.CHANDUBHAI,20-Z-7/92/2221,,21-Z-7/92/2221,28959202,
MUMBAI,389,GOPAL KRISHNA MED.& GEN.ST;[BORIVALI-W],"22,LAXMI CHHAYA ,L.T.RD;BHABAI NAKA,","BORIVALI[W],MUMBAI-92.",,BORIVALI [W],MR.CHANDUBHAI,20-Z-7/92/2221,,21-Z-7/92/2221,28959202,
MUMBAI,389,GOPAL KRISHNA MED &. GENERAL STORES,"22, LAXMI CHHAYA BLDG,","BABHAI NAKA, EKSAR RD,",BORIVALI (W),MUMBAI,,20/Z/7/92/2221,,21/Z/7/92/2221,28959202 / 9821287221,
MUMBAI,389,GOPAL KRISHNA MED. & GEN. STORES,"22,LAXMI CHHAYA,","L.T. ROAD,BABHAI NAKA,",,BORIVALI{WEST},,20&21-Z-7/92/2221,GOPALKRISHNAMED22@GMAIL.COM,20C-Z-7/92/2124,"289,592,029,821,287,000",9833819296
MUMBAI,389,GOPAL KRISHNA MEDICAL,22LAXMI CHHAYYA,BABHAI NAKA EKSAR ROAD,(S) BORIVALI (WEST).,,,,,20-Z-7/92/187121-Z-7/92/1871 20C-Z-7/92/1817. DT.6.10.08,9821287221/9892695575,
MUMBAI,389,GOPALKRISHNA MEDICAL STORE,,,BORIVALI (WEST),MUMBAI,,,,,28959202,
MUMBAI,389,GOPAL KRISHNA MED &. GENERAL STORES,"22, LAXMI CHHAYA BLDG,L.T.RD","BABHAI NAKA, EKSAR RD,",BORIVALI (W),MUMBAI,,20-MH-MZ7-192791,GOPALKRISHNAMED22@GMAIL.COM,21-MH-MZ7-192792,28959202 / 9821287221,
MUMBAI,389,ZZGOPAL KRISHNA MED.ST.,22 LAXMI CHAYA,BABHAI NAKA,L.T.RD,BORIVALI-W,CHANDU BHAI,"20,21/Z-7/92/2221",GOPALKRISHNAMED22@GMAIL.COM,20C/Z-7/92/2124,28959202,
MUMBAI,389,GOPAL KRISHNA MED & GEN STORES,"22,LAXMI CHHAYA, L.T.RD,BABHAI NAKA",,,BORIVALI-W,,"20-Z-7/92/1536,21-Z-7/92/1536",,21-C-Z/92/1481,,
MUMBAI,389,GOPALKRISHNA MEDICAL.,"L.T.ROAD, BABHAI NAKA",BORIVALI (W),,BORIVALI (W),,,,,9821287221,
MUMBAI,389,GOPAL KRISHNA MEDICAL,"SH-22,L.T.RD,BABAI NAKA",,BORIVALI(W),MUMBAI,,,,,9821287221/28959202,
MUMBAI,389,GOPAL KRISHNA MED.&GEN.STORE,22/LAXMI CHHAYA; L.T.ROAD,BORIVALI  (WEST) BABHAI  NAKA,BORIVALI,,CHANDU BHAI - 9833819296,27480593421V,GOPALKRISHNAMED22@GMAIL.COM,20-Z-7/92/2221*21-Z-7/92/2221 20C-Z-7/92/2124,28959202,
MUMBAI,389,GOPAL KRISHNA  MED.(CLOSED-,"22,LAXMI CHHAYA,","L.T.ROAD,BABHAI NAKA, BORAVALI WEST,MUMBAI-400092",,BORIVALI- WEST,,20-Z-7/92/1536,,21-Z-7/92/1536,28959202,
MUMBAI,389,GOPAL KRISHNA MED & GEN STO,22 LAXMI CHHAYA L T RD,BABHAI NAKABORIVLI W MUM-92,BORIVALI,,9821287221 9892695575,27480593421.V,GOPALKRISHNAMED22@GMAIL.COM,20-21Z7922221 20C2124,28959202,
MUMBAI,389,GOPAL KRISHNA MED & GEN STORE,22/LAXMI CHHAYA,L.T.ROAD,BORIVALI (WEST),,,,,20-7-7/92/1536 /21-Z-7/92/1536,,

RCODE

A = read.csv("data.csv")
A = data.frame(na.omit(A))
str(A)
#######
# split training adn testing set
#######
set.seed(123)
sf = sample(2,nrow(A),replace = T,prob = c(0.9,0.1))
trd = A[sf == 1,]
tsd = A[sf == 2,]

# lists out the variables that are problematic
which(sapply(A, function(x) length(unique(x))<2))

# Converts Dependent Variable into Factor
Train_RetailerId = as.factor(trd[,2])

#######
# KNN
#######
library(class)
Predicted.RetailerId = knn(trd,tsd,Train_RetailerId, k=1)

print(mean(A$RetailerId != Predicted.RetailerId))

Result = cbind(Predicted.RetailerId,tsd)

confusionMatrix(Predicted.RetailerId,tsd$RetailerId)

Структура набора данных

> str(A)
'data.frame':   42 obs. of  13 variables:
 $ RegionName   : Factor w/ 1 level "MUMBAI": 1 1 1 1 1 1 1 1 1 1 ...
 $ RetailerId   : int  297 297 297 297 297 297 297 297 297 297 ...
 $ PartyName    : Factor w/ 32 levels "$BHAGWATI MEDICAL.",..: 12 15 15 15 14 1 11 5 13 8 ...
 $ Address1     : Factor w/ 36 levels "","2 GR FL MEZZANIN ABDUL REHAMAN",..: 4 32 32 32 34 27 25 29 26 31 ...
 $ Address2     : Factor w/ 31 levels "",", MUMBAI.",..: 29 7 7 7 26 1 27 1 30 25 ...
 $ Area         : Factor w/ 19 levels "","(S) BORIVALI (WEST).",..: 16 1 1 1 16 7 1 16 7 19 ...
 $ City         : Factor w/ 16 levels "","ANDHERI-E",..: 5 4 4 4 16 15 3 16 16 16 ...
 $ ContactPerson: Factor w/ 16 levels "","8959202","9821287221 9892695575",..: 12 16 16 16 8 1 1 10 1 1 ...
 $ CSTNumber    : Factor w/ 26 levels "","20-21-Z-1",..: 8 18 18 18 14 2 14 19 11 10 ...
 $ Email        : Factor w/ 4 levels "","BHAGWATIMEDICAL7@YAHOO.COM",..: 2 2 2 2 2 2 2 2 2 1 ...
 $ LicenseNumber: Factor w/ 30 levels "","/","20-21-Z-6-59-90B",..: 24 28 28 28 14 30 25 11 15 15 ...
 $ Telephone    : Factor w/ 18 levels "","289,592,029,821,287,000",..: 9 7 7 7 12 12 8 7 13 7 ...
 $ MobileNumber : Factor w/ 12 levels "","29207788",..: 5 1 1 1 11 1 3 12 10 4 ...

1 Ответ

2 голосов
/ 05 июля 2019

Первая строка исходного кода knn (если вы наберете knn) на вашей консоли: train <- as.matrix(train), которая преобразует data.frame в matrix. А поскольку матрица может содержать только один тип данных, она преобразуется в символьную матрицу. Очевидно, что knn и почти любой другой алгоритм требует числовой матрицы для выполнения вычислений.

trd_mat <- as.matrix(trd)
typeof(trd_mat)
#[1] "character"

Все ваши переменные имеют фактор типа и содержат довольно много меток. Единственный способ его работы - сначала преобразовать его в фиктивные переменные (чтобы в нем было 0-1 переменных), а затем запустить knn для этого data.frame. Учитывая, что ваши факторные переменные имеют много уровней, результирующий data.frame будет очень разреженным, что может сделать knn менее эффективным.

Существует множество обучающих программ о том, как преобразовать ваши факторы в фиктивные переменные, если вы хотите следовать по этому пути. Я связываю один .

В качестве альтернативы случайный лес может дать вам лучшие результаты с учетом ваших факторных переменных.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...