Я установил кластер Dask из 100 ядер на 5 узлах через PBS.Затем я прочитал около 1000 плиток MODIS (hdf5), используя open_mfdataset Xarray.После объединения массивов, в которых собраны все временные шаги (92 на плитку), я пытаюсь вычислить евклидово расстояние одной точки данных q от всех остальных точек и использую argtopk для получения 500 наименьших из них.Когда я вызываю compute для этого массива из 500 результатов, я получаю: RuntimeError: NetCDF: ошибка HDF
Пробует с разными размерами кластера и читает файлы из NFS и Luster
# create random sketch vectors with elements either + or - one
sketch_len = 10
rv = np.random.randint(2, size=(92,sketch_len))
rv = rv + (rv - 1)
rv_da = xr.DataArray(rv, dims=['time','rv'])
conus_tile_sketches = []
for ct in conus_tiles:
tile_ts=xr.open_mfdataset(tiles,concat_dim='time',mask_and_scale=False,
combine='nested',parallel=True)['500m 16 days NDVI']
tile_ts = tile_ts.transpose('y','x','time')
tile_ts = tile_ts.chunk((100,100,92))
tile_sketch = tile_ts.dot(rv_da)
tile_sketch = client.persist(tile_sketch)
conus_tile_sketches.append(tile_sketch)
flat_sketches = da.concatenate(conus_tile_sketches,axis=1)
flat_sketches = client.persist(flat_sketches)
q = flat_sketches[:,30123456]
q=q.reshape(10,1)
dist = da.linalg.norm(flat_sketches - q, axis = 0)
dist = client.persist(dist)
closest_idx = dist.argtopk(-501)
closest_idx=closest_idx.compute()
should return value of closest_idx array. Instead I get the following stack trace.
Note, my dist dask array is large:
>>> dist
dask.array<pow, shape=(63360000,), dtype=float64, chunksize=(19200,), chunktype=numpy.ndarray>
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/dask/base.py", line 175, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/dask/base.py", line 446, in compute
results = schedule(dsk, keys, **kwargs)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/distributed/client.py", line 2520, in get
results = self.gather(packed, asynchronous=asynchronous, direct=direct)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/distributed/client.py", line 1820, in gather
asynchronous=asynchronous,
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/distributed/client.py", line 754, in sync
self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/distributed/utils.py", line 337, in sync
raise exc.with_traceback(tb)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/distributed/utils.py", line 321, in f
result[0] = yield future
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/tornado/gen.py", line 735, in run
value = future.result()
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/distributed/client.py", line 1676, in _gather
raise exception.with_traceback(traceback)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/dask/array/core.py", line 108, in getter
c = np.asarray(c)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/numpy/core/_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/core/indexing.py", line 452, in __array__
return np.asarray(self.array, dtype=dtype)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/numpy/core/_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/core/indexing.py", line 610, in __array__
return np.asarray(self.array, dtype=dtype)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/numpy/core/_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/core/indexing.py", line 516, in __array__
return np.asarray(array[self.key], dtype=None)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/conventions.py", line 42, in __getitem__
return np.asarray(self.array[key], dtype=self.dtype)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/numpy/core/_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/core/indexing.py", line 516, in __array__
return np.asarray(array[self.key], dtype=None)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/backends/netCDF4_.py", line 70, in __getitem__
self._getitem)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/core/indexing.py", line 784, in explicit_indexing_adapter
result = raw_indexing_method(raw_key.tuple)
File "/home7/jcbecker/.conda/envs/geo/lib/python3.7/site-packages/xarray/backends/netCDF4_.py", line 81, in _getitem
array = getitem(original_array, key)
File "netCDF4/_netCDF4.pyx", line 4351, in netCDF4._netCDF4.Variable.__getitem__
File "netCDF4/_netCDF4.pyx", line 5296, in netCDF4._netCDF4.Variable._get
File "netCDF4/_netCDF4.pyx", line 1857, in netCDF4._netCDF4._ensure_nc_success
RuntimeError: NetCDF: HDF error