TypeError: объект 'float' не может быть интерпретирован как индекс, возможный способ решить эту проблему? - PullRequest
0 голосов
/ 06 декабря 2018

Попытка построить проект распознавания говорящего с использованием Python 2.x.Как зависимость, проект опирается на scipy и numpy.Однако, когда код выполняется, генерируется трассировка стека:

  Traceback (most recent call last):
  File "../python/raw2ivec.py", line 227, in <module>
    USEHAMMING  = True)
  File "/Users/shaheenakader/Downloads/vbs_demo/python/features.py", line 108, in mfcc_htk
    x = framing(x.astype("float"), window.size, window.size-noverlap).copy()
  File "/Users/shaheenakader/Downloads/vbs_demo/python/features.py", line 14, in framing
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
  File "/Users/shaheenakader/anaconda2/envs/voicebio/lib/python2.7/site-packages/numpy/lib/stride_tricks.py", line 102, in as_strided
    array = np.asarray(DummyArray(interface, base=x))
  File "/Users/shaheenakader/anaconda2/envs/voicebio/lib/python2.7/site-packages/numpy/core/numeric.py", line 501, in asarray
    return array(a, dtype, copy=False, order=order)
    TypeError: 'float' object cannot be interpreted as an index

Я пробовал существующие решения для подобных вопросов:

Однако, это не помогло решить проблему.

Соответствующие фрагменты кода, связанные с соответствующими файлами, были добавлены ниже:

raw2ivec.py

    print '  Extracting features',
    fea = features.mfcc_htk(sig, 
                            window      = WINDOWSIZE/SOURCERATE,
                            noverlap    = (WINDOWSIZE-TARGETRATE)/SOURCERATE,
                            fbank_mx    = fbank_mx,
                            _0          = 'first',
                            NUMCEPS     = NUMCEPS,
                            RAWENERGY   = RAWENERGY,
                            PREEMCOEF   = PREEMCOEF,
                            CEPLIFTER   = CEPLIFTER,
                            ZMEANSOURCE = ZMEANSOURCE,
                            ENORMALISE  = ENORMALISE,
                            ESCALE      = 0.1,
                            SILFLOOR    = 50.0,
                            USEHAMMING  = True)

    print '[n=' + repr(len(fea)) + ' frames]'

    print '  Adding derivatives'
    # [add_deriv] step 
    fea = features.add_deriv(fea,(deltawindow,accwindow))

features.py

def mfcc_htk(x, window, noverlap, fbank_mx, nfft=None,
         _0="last", _E=None, NUMCEPS=12,
         USEPOWER=False, RAWENERGY=True, PREEMCOEF=0.97, CEPLIFTER=22.0, ZMEANSOURCE=False,
         ENORMALISE=True, ESCALE=0.1, SILFLOOR=50.0, USEHAMMING=True):
"""MFCC Mel Frequency Cepstral Coefficients
Returns NUMCEPS-by-M matrix of MFCC coeficients extracted form signal x,
where M is the number of extracted frames, which can be computed as
floor((length(x)-noverlap)/(window-noverlap)). Remaining parameters
have the following meaning:
x         - input signal
window    - frame window lentgth (in samples, i.e. WINDOWSIZE/SOURCERATE) 
            or vector of widow weights override default windowing function
            (see option USEHAMMING)
noverlap  - overlapping between frames (in samples, i.e window-TARGETRATE/SOURCERATE)
fbank_mx  - array with (Mel) filter bank (as returned by function mel_fbank_mx()).
            Note that this must be compatible with the parameter 'nfft'.
nfft      - number of samples for FFT computation. By default, it is  set in the
            HTK-compatible way to the window length rounded up to the next higher
            pover of two.
_0, _E    - include C0 or/and energy as the "first" or the "last" coefficient(s)
            of each feature vector. The possible values are: "first", "last", None.
            If both C0 and energy are used, energy will be the very first or the
            very last coefficient.

Remaining options have exactly the same meaning as in HTK.

See also:
  mel_fbank_mx:
      to obtain the matrix for the parameter fbank_mx
  add_deriv: 
      for adding delta, double delta, ... coefficients
  add_dither:
      for adding dithering in HTK-like fashion
"""

dct_mx = dct_basis(NUMCEPS+1,fbank_mx.shape[1]).T
dct_mx[:,0] = np.sqrt(2.0/fbank_mx.shape[1])
if type(USEPOWER) == bool:
    USEPOWER += 1
if np.isscalar(window):
    window = np.hamming(window) if USEHAMMING else np.ones(window)
if nfft is None:
    nfft = 2**int(np.ceil(np.log2(window.size)))
x = framing(x.astype("float"), window.size, window.size-noverlap).copy()
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - - SUSPECTED LINE WHERE ERROR IS CAUSED
if ZMEANSOURCE:
    x -= x.mean(axis=1)[:,np.newaxis]
if _E is not None and RAWENERGY:
    energy = np.log((x**2).sum(axis=1))
if PREEMCOEF is not None:
    x = preemphasis(x, PREEMCOEF)
x *= window
if _E is not None and not RAWENERGY:
    energy = np.log((x**2).sum(axis=1))
#x = np.abs(scipy.fftpack.fft(x, nfft))
#x = x[:,:x.shape[1]/2+1]
x = np.abs(np.fft.rfft(x, nfft))
x = np.log(np.maximum(1.0, (x**USEPOWER).dot(fbank_mx))).dot(dct_mx)
if CEPLIFTER is not None and CEPLIFTER > 0:
    x *= 1.0 + 0.5 * CEPLIFTER * np.sin(np.pi * np.arange(NUMCEPS+1) / CEPLIFTER)
if _E is not None and ENORMALISE:
    energy = (energy - energy.max())       * ESCALE + 1.0
    min_val  = -np.log(10**(SILFLOOR/10.)) * ESCALE + 1.0
    energy[energy < min_val] = min_val

return np.hstack(([energy[:,np.newaxis]] if _E == "first" else []) +
                 ([x[:,:1]]              if _0 == "first" else []) +
                  [x[:,1:]] +
                 ([x[:,:1]]              if (_0 in ["last", True])  else []) +
                 ([energy[:,np.newaxis]] if (_E in ["last", True])  else []))

def framing(a, window, shift=1):
shape = ((a.shape[0] - window) / shift + 1, window) + a.shape[1:]
strides = (a.strides[0]*shift,a.strides[0]) + a.strides[1:]
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

Что, возможно, является причиной проблемы, и как лучше всего это исправить?Любая помощь будет высоко ценится.

...