Я применил классификатор SVM к своим данным из 104 обучающих файлов MFCC и протестировал их с использованием 78 тестовых файлов MFCC, но это дает мне очень низкую точность. Я новичок в Python, может кто-нибудь сказать мне, верен ли код и какие ошибки я делаю?
Я использую пушту для разработки ASR.
train_list и test_list содержат mfccs каждого волнового файла в течение (1420).
train_labels и test_labels - это имена папок, в которых находятся эти волновые файлы.
Ваша забота будет оценена.
train_list=[]
train_mfcc=np.array([feature,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,feature11,feature12,feature13,feature14,feature15,feature16,feature17,feature18,feature19,feature20,feature21,feature22,feature23,feature24,feature25,feature26,feature27,feature28,feature29,feature30,feature31,feature32,feature33,feature34,feature35,feature36,feature37,feature38,feature39,feature40,feature41,feature42,feature43,feature44,feature45,feature46,feature47,feature48,feature49,feature50,feature51,feature52,feature53,feature54,feature55,feature56,feature57,feature58,feature59,feature60,feature61,feature62,feature63,feature64,feature65,feature66,feature67,feature68,feature69,feature70,feature71,feature72,feature73,feature74,feature75,feature76,feature77,feature78,feature79,feature80,feature81,feature82,feature83,feature84,feature85,feature86,feature87,feature88,feature89,feature90,feature91,feature92,feature93,feature94,feature95,feature96,feature97,feature98,feature99,feature100,feature101,feature102,feature103])
train_list= np.vstack(train_mfcc)
train_labels=np.array(['62-sifer', '62-sifer', '62-sifer', '62-sifer','63-yow', '63-yow', '63-yow', '63-yow','64-dwa', '64-dwa', '64-dwa', '64-dwa','65-dray','65-dray', '65-dray', '65-dray','66-celour', '66-celour', '66-celour', '66-celour','67-pinza', '67-pinza', '67-pinza', '67-pinza', '68-shpeg', '68-shpeg', '68-shpeg', '68-shpeg','69-ova', '69-ova', '69-ova', '69-ova','70-ata', '70-ata', '70-ata', '70-ata','71-naha', '71-naha', '71-naha', '71-naha','72-las', '72-las', '72-las', '72-las','73-yawlas', '73-yawlas', '73-yawlas','73-yawlas','74-dwlas', '74-dwlas', '74-dwlas', '74-dwlas','75-dyarlas', '75-dyarlas', '75-dyarlas', '75-dyarlas','76-swarlas', '76-swarlas', '76-swarlas', '76-swarlas','77-pinzalas', '77-pinzalas', '77-pinzalas', '77-pinzalas','78-shparlas', '78-shparlas', '78-shparlas', '78-shparlas','79-ovlas', '79-ovlas', '79-ovlas', ' 79-ovlas','80-atalas', '80-atalas', '80-atalas', '80-atalas','81-nolas', '81-nolas', '81-nolas', '81-nolas','82-shal', '82-shal', '82-shal', '82-shal','83-yawisht', '83-yawisht', '83-yawisht', '83-yawisht','84-dwaisht', '84-dwaisht', ' 84-dwaisht', '84-dwaisht','85-draywisht', ' 85-draywisht', '85-draywisht', '85-draywisht','86-saleerisht', '86-saleerisht', '86-saleerisht', '86-saleerisht', '87-pinzeesht', '87-pinzeesht', '87-pinzeesht','87-pinzeesht'])
test_list=[]
test_mfcc=np.array([testfeature1,testfeature2,testfeature3,testfeature4,testfeature5,testfeature6,testfeature7,testfeature8,testfeature9,testfeature10,testfeature11,testfeature12,testfeature13,testfeature14,testfeature15,testfeature16,testfeature17,testfeature18,testfeature19,testfeature20,testfeature21,testfeature22,testfeature23,testfeature24,testfeature25,testfeature26,testfeature27,testfeature28,testfeature29,testfeature30,testfeature31,testfeature32,testfeature33,testfeature34,testfeature35,testfeature36,testfeature37,testfeature38,testfeature39,testfeature40,testfeature41,testfeature42,testfeature43,testfeature44,testfeature45,testfeature46,testfeature47,testfeature48,testfeature49,testfeature50,testfeature51,testfeature52,testfeature53,testfeature54,testfeature55,testfeature56,testfeature57,testfeature58,testfeature59,testfeature60,testfeature61,testfeature62,testfeature63,testfeature64,testfeature65,testfeature66,testfeature67,testfeature68,testfeature69,testfeature70,testfeature71,testfeature72,testfeature73,testfeature74,testfeature75,testfeature76,testfeature77,testfeature78])
test_list= np.vstack(test_mfcc)
test_labels=np.array(['62-sifer', '62-sifer', '62-sifer', '63-yow', '63-yow', '63-yow','64-dwa', '64-dwa', '64-dwa', '65-dray', '65-dray', '65-dray','66-celour', '66-celour', '66-celour', '67-pinza', '67-pinza', '67-pinza', '68-shpeg', '68-shpeg', '68-shpeg', '69-ova', '69-ova', '69-ova','70-ata', '70-ata', '70-ata', '71-naha', '71-naha', '71-naha','72-las', '72-las', '72-las', '73-yawlas', '73-yawlas','73-yawlas','74-dwlas', '74-dwlas', '74-dwlas', '75-dyarlas', '75-dyarlas', '75-dyarlas','76-swarlas', '76-swarlas', '76-swarlas', '77-pinzalas', '77-pinzalas', '77-pinzalas','78-shparlas', '78-shparlas', '78-shparlas', '79-ovlas', '79-ovlas', ' 79-ovlas','80-atalas', '80-atalas', '80-atalas', '81-nolas', '81-nolas', '81-nolas','82-shal', '82-shal', '82-shal', '83-yawisht', '83-yawisht', '83-yawisht','84-dwaisht', '84-dwaisht', ' 84-dwaisht', ' 85-draywisht', '85-draywisht', '85-draywisht','86-saleerisht', '86-saleerisht', '86-saleerisht', '87-pinzeesht', '87-pinzeesht','87-pinzeesht'])
X=train_list
X_test=test_list
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components=65).fit(X_scaled)
X_pca = pca.transform(X_scaled)
X_test_pca = pca.transform(X_test_scaled)
print(sum(pca.explained_variance_ratio_))
clf=SVC(kernel='poly',degree=3, C=10.0)
clf.fit(X_pca,train_labels)
print(accuracy_score(clf.predict(X_test_pca), test_labels))
Это дает мне оценку точности 0.0641, что довольно плохо.