Попытка построить проект распознавания говорящего с использованием Python 2.x
.Как зависимость, проект опирается на scipy
и numpy
.Однако, когда код выполняется, генерируется трассировка стека:
Traceback (most recent call last):
File "../python/raw2ivec.py", line 227, in <module>
USEHAMMING = True)
File "/Users/shaheenakader/Downloads/vbs_demo/python/features.py", line 108, in mfcc_htk
x = framing(x.astype("float"), window.size, window.size-noverlap).copy()
File "/Users/shaheenakader/Downloads/vbs_demo/python/features.py", line 14, in framing
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
File "/Users/shaheenakader/anaconda2/envs/voicebio/lib/python2.7/site-packages/numpy/lib/stride_tricks.py", line 102, in as_strided
array = np.asarray(DummyArray(interface, base=x))
File "/Users/shaheenakader/anaconda2/envs/voicebio/lib/python2.7/site-packages/numpy/core/numeric.py", line 501, in asarray
return array(a, dtype, copy=False, order=order)
TypeError: 'float' object cannot be interpreted as an index
Я пробовал существующие решения для подобных вопросов:
Однако, это не помогло решить проблему.
Соответствующие фрагменты кода, связанные с соответствующими файлами, были добавлены ниже:
raw2ivec.py
print ' Extracting features',
fea = features.mfcc_htk(sig,
window = WINDOWSIZE/SOURCERATE,
noverlap = (WINDOWSIZE-TARGETRATE)/SOURCERATE,
fbank_mx = fbank_mx,
_0 = 'first',
NUMCEPS = NUMCEPS,
RAWENERGY = RAWENERGY,
PREEMCOEF = PREEMCOEF,
CEPLIFTER = CEPLIFTER,
ZMEANSOURCE = ZMEANSOURCE,
ENORMALISE = ENORMALISE,
ESCALE = 0.1,
SILFLOOR = 50.0,
USEHAMMING = True)
print '[n=' + repr(len(fea)) + ' frames]'
print ' Adding derivatives'
# [add_deriv] step
fea = features.add_deriv(fea,(deltawindow,accwindow))
features.py
def mfcc_htk(x, window, noverlap, fbank_mx, nfft=None,
_0="last", _E=None, NUMCEPS=12,
USEPOWER=False, RAWENERGY=True, PREEMCOEF=0.97, CEPLIFTER=22.0, ZMEANSOURCE=False,
ENORMALISE=True, ESCALE=0.1, SILFLOOR=50.0, USEHAMMING=True):
"""MFCC Mel Frequency Cepstral Coefficients
Returns NUMCEPS-by-M matrix of MFCC coeficients extracted form signal x,
where M is the number of extracted frames, which can be computed as
floor((length(x)-noverlap)/(window-noverlap)). Remaining parameters
have the following meaning:
x - input signal
window - frame window lentgth (in samples, i.e. WINDOWSIZE/SOURCERATE)
or vector of widow weights override default windowing function
(see option USEHAMMING)
noverlap - overlapping between frames (in samples, i.e window-TARGETRATE/SOURCERATE)
fbank_mx - array with (Mel) filter bank (as returned by function mel_fbank_mx()).
Note that this must be compatible with the parameter 'nfft'.
nfft - number of samples for FFT computation. By default, it is set in the
HTK-compatible way to the window length rounded up to the next higher
pover of two.
_0, _E - include C0 or/and energy as the "first" or the "last" coefficient(s)
of each feature vector. The possible values are: "first", "last", None.
If both C0 and energy are used, energy will be the very first or the
very last coefficient.
Remaining options have exactly the same meaning as in HTK.
See also:
mel_fbank_mx:
to obtain the matrix for the parameter fbank_mx
add_deriv:
for adding delta, double delta, ... coefficients
add_dither:
for adding dithering in HTK-like fashion
"""
dct_mx = dct_basis(NUMCEPS+1,fbank_mx.shape[1]).T
dct_mx[:,0] = np.sqrt(2.0/fbank_mx.shape[1])
if type(USEPOWER) == bool:
USEPOWER += 1
if np.isscalar(window):
window = np.hamming(window) if USEHAMMING else np.ones(window)
if nfft is None:
nfft = 2**int(np.ceil(np.log2(window.size)))
x = framing(x.astype("float"), window.size, window.size-noverlap).copy()
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - - SUSPECTED LINE WHERE ERROR IS CAUSED
if ZMEANSOURCE:
x -= x.mean(axis=1)[:,np.newaxis]
if _E is not None and RAWENERGY:
energy = np.log((x**2).sum(axis=1))
if PREEMCOEF is not None:
x = preemphasis(x, PREEMCOEF)
x *= window
if _E is not None and not RAWENERGY:
energy = np.log((x**2).sum(axis=1))
#x = np.abs(scipy.fftpack.fft(x, nfft))
#x = x[:,:x.shape[1]/2+1]
x = np.abs(np.fft.rfft(x, nfft))
x = np.log(np.maximum(1.0, (x**USEPOWER).dot(fbank_mx))).dot(dct_mx)
if CEPLIFTER is not None and CEPLIFTER > 0:
x *= 1.0 + 0.5 * CEPLIFTER * np.sin(np.pi * np.arange(NUMCEPS+1) / CEPLIFTER)
if _E is not None and ENORMALISE:
energy = (energy - energy.max()) * ESCALE + 1.0
min_val = -np.log(10**(SILFLOOR/10.)) * ESCALE + 1.0
energy[energy < min_val] = min_val
return np.hstack(([energy[:,np.newaxis]] if _E == "first" else []) +
([x[:,:1]] if _0 == "first" else []) +
[x[:,1:]] +
([x[:,:1]] if (_0 in ["last", True]) else []) +
([energy[:,np.newaxis]] if (_E in ["last", True]) else []))
def framing(a, window, shift=1):
shape = ((a.shape[0] - window) / shift + 1, window) + a.shape[1:]
strides = (a.strides[0]*shift,a.strides[0]) + a.strides[1:]
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
Что, возможно, является причиной проблемы, и как лучше всего это исправить?Любая помощь будет высоко ценится.