Код предназначен для извлечения данных из PRAAT, посредством чего он должен извлекать данные, найденные в wav-файле и текстовой сетке, которые использовались для аннотирования в PRAAT. Затем код должен вывести информацию, извлеченную в файл CSV.
Он возвратил следующую ошибку, однако после отладки проблемы кажется, что экстрактор не извлекает какие-либо функции, и, следовательно, массивы пусты.
from praatio import tgio
from praatio import tgio
from os.path import join
from scipy.io import wavfile
import subprocess
import os
import numpy as np
import csv
def extract_features(wavfile):
"""
Extract features for an audio file.
:param wavfile: Absolute path to a WAV file.
:return: Praat emotion features
"""
features_script_path = join(os.getcwd(),'features.praat')
result = subprocess.check_output(['C:\Intel\Praat.exe',
'--run',
features_script_path, wavfile])
result = result.decode("utf-8")
result = result.split()
features = np.asarray([float(val) for val in result])
return features
def get_snippet_features(wav_file_path, start_time, end_time):
fs, data = wavfile.read(wav_file_path)
start_time_sample = int(start_time*fs)
end_time_sample = int(end_time*fs)
temp_file_path = join(os.getcwd(), 'data', 'temp', 'temp.wav')
wavfile.write(temp_file_path,rate=fs,data=data[start_time_sample:end_time_sample])
features = extract_features(wavfile=temp_file_path)
os.remove(temp_file_path)
return features
def get_textgrid_features(filename,normalize=True):
file_path = join(os.getcwd(), 'data', filename)
tg = tgio.openTextgrid(fnFullPath=file_path)
wav_file_path = file_path.replace('.TextGrid','.wav')
print(tg.tierNameList)
code_switch_tier = tg.tierDict['Orthographic']
print(code_switch_tier.entryList)
orthographic_tier = tg.tierDict['CodeSwitch']
print(orthographic_tier.entryList)
por_tier = tg.tierDict['PointOfReference']
print(por_tier.entryList)
features = []
labels = []
for item in por_tier.entryList:
file_features = get_snippet_features(wav_file_path=wav_file_path,start_time=item.start,end_time=item.end)
labels.append(item.label)
features.append(file_features)
# normalization
if normalize:
mean = np.mean(features,axis=0)
std = np.std(features,axis=0)
features = (features-mean)/std
return labels, features
def generate_csv(labels,features,output_path):
if os.path.isfile(output_path):
os.remove(output_path)
with open(output_path,'w') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['CS_TYPE','PITCH_AVG','PITCH_DIR','PITCH_DYN','PITCH_JIT','INT_AVG','INT_DYN','INT_JIT','SPC_SLO','SPC_JIT'])
for index, label in enumerate(labels):
filewriter.writerow([label,
features[index][0],
features[index][1],
features[index][2],
features[index][3],
features[index][4],
features[index][5],
features[index][6],
features[index][7],
features[index][8]])
csvfile.close()
labels, features = get_textgrid_features(filename='Ian.TextGrid',normalize=False)
print(labels, features)
generate_csv(labels=labels,features=features,output_path=join(os.getcwd(),'data','csv','ian.csv'))