Я адаптировал алгоритмы персептрона из этого великого поста: https://machinelearningmastery.com/implement-perceptron-algorithm-scratch-python/
Первоначально я планировал использовать .dBFS из AudioSegment, но, похоже, это не сработало - Тренировочная функция просто увеличивала уклон и зарплату. После этого я попытался возиться с max () и min () из SoundFile.
import argparse
import os
import wave
import soundfile as np
from pydub import AudioSegment
def dir_path(string):
if os.path.isdir(string):
return string
else:
raise NotADirectoryError(string)
# Make a prediction with weights
def predict(row, weights):
activation = weights[0]
for i in range(len(row) - 1):
activation += weights[i + 1] * row[i]
return 1.0 if activation >= 0.0 else 0.0
# Estimate Perceptron weights using stochastic gradient descent
def train_weights(train, l_rate, n_epoch):
weights = [0.0 for i in range(len(train[0]))]
for epoch in range(n_epoch):
sum_error = 0.0
for row in train:
prediction = predict(row, weights)
error = row[-1] - prediction
sum_error += error ** 2
weights[0] = weights[0] + l_rate * error
for i in range(len(row) - 1):
weights[i + 1] = weights[i + 1] + l_rate * error * row[i]
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
print(weights)
return weights
parser = argparse.ArgumentParser(description="Separating silent waves from louder waves")
parser.add_argument('-p', '--path', type=dir_path, metavar='', required=True, help="give path to the file with waves.")
args = parser.parse_args()
if __name__ == '__main__':
os.chdir(args.path)
print(os.getcwd() + "\n\n")
training_list = []
testing_list = []
for root, sub, files in os.walk(args.path):
for f in files:
if 'wav' in f:
abs_path = os.path.abspath(root)
# Populate training list
if abs_path.endswith('silence_training'):
x, fs = np.read(os.path.join(abs_path, f))
training_list.append([x.max(),x.min(), 0])
elif abs_path.endswith('voiced_training'):
x, fs = np.read(os.path.join(abs_path, f))
training_list.append([x.max(),x.min(), 1])
# Populate testing list
elif abs_path.endswith('silence'):
x, fs = np.read(os.path.join(abs_path, f))
testing_list.append([x.max(), x.min(), 0])
elif abs_path.endswith('voiced'):
x, fs = np.read(os.path.join(abs_path, f))
testing_list.append([x.max(), x.min(), 1])
l_rate = 0.1
n_epoch = 1000
weights = train_weights(training_list,l_rate, n_epoch) code here
Кто-нибудь знает хороший способ справиться с этим?