Ошибка Google ML zlib при отправке работы - PullRequest
0 голосов
/ 03 октября 2018

В попытке поместить мой код keras в Google ML Engine с использованием иерархии папок, как описано (setup.py, модуль с исходным кодом и yaml, описывающим конфигурацию), создается zlib.Я не могу найти документацию по этой ошибке.Я использую Python 3.6.6 (но определить 3,5, который будет использоваться в конфигурации).Кроме того:

export BUCKET_NAME=data200x200
export JOB_NAME="Benchmarking_$(date +%Y%m%d_%H%M%S)"
export CLOUD_CONFIG=trainer/cloudml-gpu.yaml
export JOB_DIR=gs://$BUCKET_NAME/jobs/$JOB_NAME
export MODULE=trainer.gcloud_script
export PACKAGE_PATH=trainer
export REGION=europe-west1

gcloud ml-engine jobs submit training $JOB_NAME \
  --stream-logs \
  --job-dir $JOB_DIR \
  --package-path $PACKAGE_PATH \
  --module-name $MODULE \
  --region $REGION \
  --config=$CLOUD_CONFIG \
  --runtime-version 1.10

Setup.py:

from setuptools import setup
from setuptools import find_packages

setup(name='Benchmark',
    version='0.1',
    packages=find_packages(),
    description='Benchmarking',
    author='Jony Van Puymbroeck',
    author_email='***',
    include_package_data=True,
    install_requires=[
      'keras',
      'h5py'
    ])

cloudml-gpu.yaml:

trainingInput:
  pythonVersion: "3.5"
  scaleTier: CUSTOM
  masterType: standard_gpu

Выдается ошибка:

Job [Benchmarking_20181003_160926] submitted successfully.
INFO    2018-10-03 16:09:28 +0200   service     Validating job requirements...
INFO    2018-10-03 16:09:29 +0200   service     Job creation request has been successfully validated.
INFO    2018-10-03 16:09:29 +0200   service     Waiting for job to be provisioned.
INFO    2018-10-03 16:09:29 +0200   service     Job Benchmarking_20181003_160926 is queued.
INFO    2018-10-03 16:09:34 +0200   service     Waiting for training program to start.
INFO    2018-10-03 16:11:06 +0200   master-replica-0        Running task with arguments: --cluster={"master": ["127.0.0.1:2222"]} --task={"type": "master", "index": 0} --job={  "scale_tier": "CUSTOM",  "master_type": "standard_gpu",  "package_uris": ["gs://data200x200/jobs/Benchmarking_20181003_160926/packages/5616f5b1404c853afddfaf76abc9cd98b66ff3be7b39e88ac0121fb17f1c16ac/Benchmark-0.2.tar.gz"],  "python_module": "trainer.gcloud_script",  "region": "europe-west1",  "runtime_version": "1.10",  "job_dir": "gs://data200x200/jobs/Benchmarking_20181003_160926",  "run_on_raw_vm": true,  "python_version": "3.5"}
INFO    2018-10-03 16:11:17 +0200   master-replica-0        Running module trainer.gcloud_script.
INFO    2018-10-03 16:11:17 +0200   master-replica-0        Downloading the package: gs://data200x200/jobs/Benchmarking_20181003_160926/packages/5616f5b1404c853afddfaf76abc9cd98b66ff3be7b39e88ac0121fb17f1c16ac/Benchmark-0.2.tar.gz
INFO    2018-10-03 16:11:17 +0200   master-replica-0        Running command: gsutil -q cp gs://data200x200/jobs/Benchmarking_20181003_160926/packages/5616f5b1404c853afddfaf76abc9cd98b66ff3be7b39e88ac0121fb17f1c16ac/Benchmark-0.2.tar.gz Benchmark-0.2.tar.gz
INFO    2018-10-03 16:11:26 +0200   master-replica-0        Installing the package: gs://data200x200/jobs/Benchmarking_20181003_160926/packages/5616f5b1404c853afddfaf76abc9cd98b66ff3be7b39e88ac0121fb17f1c16ac/Benchmark-0.2.tar.gz
INFO    2018-10-03 16:11:26 +0200   master-replica-0        Running command: pip3 install --user --upgrade --force-reinstall --no-deps Benchmark-0.2.tar.gz
INFO    2018-10-03 16:11:28 +0200   master-replica-0        Processing ./Benchmark-0.2.tar.gz
ERROR   2018-10-03 16:11:28 +0200   master-replica-0        Exception:
ERROR   2018-10-03 16:11:28 +0200   master-replica-0        Traceback (most recent call last):
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/basecommand.py", line 141, in main
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            status = self.run(options, args)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/commands/install.py", line 299, in run
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            resolver.resolve(requirement_set)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/resolve.py", line 102, in resolve
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            self._resolve_one(requirement_set, req)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/resolve.py", line 256, in _resolve_one
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            abstract_dist = self._get_abstract_dist_for(req_to_install)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/resolve.py", line 209, in _get_abstract_dist_for
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            self.require_hashes
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/operations/prepare.py", line 283, in prepare_linked_requirement
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            progress_bar=self.progress_bar
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/download.py", line 823, in unpack_url
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            unpack_file_url(link, location, download_dir, hashes=hashes)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/download.py", line 728, in unpack_file_url
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            unpack_file(from_path, location, content_type, link)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/utils/misc.py", line 581, in unpack_file
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            tarfile.is_tarfile(filename) or
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 2448, in is_tarfile
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            t = open(name)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            return func(name, "r", fileobj, **kwargs)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1629, in gzopen
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            t = cls.taropen(name, mode, fileobj, **kwargs)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1605, in taropen
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            return cls(name, mode, fileobj, **kwargs)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1470, in __init__
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            self.firstmember = self.next()
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 2279, in next
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            tarinfo = self.tarinfo.fromtarfile(self)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1082, in fromtarfile
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/gzip.py", line 274, in read
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            return self._buffer.read(size)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/_compression.py", line 68, in readinto
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            data = self.read(len(byte_view))
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/gzip.py", line 469, in read
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            uncompress = self._decompressor.decompress(buf, size)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0        zlib.error: Error -3 while decompressing data: invalid distance too far back
WARNING 2018-10-03 16:11:28 +0200   master-replica-0        Installation of package failed on try 1/2: Command '['pip3', 'install', '--user', '--upgrade', '--force-reinstall', '--no-deps', 'Benchmark-0.2.tar.gz']' returned non-zero exit status 2
WARNING 2018-10-03 16:11:28 +0200   master-replica-0        Retrying ...
INFO    2018-10-03 16:11:28 +0200   master-replica-0        Running command: pip3 install --user --upgrade --force-reinstall --no-deps Benchmark-0.2.tar.gz
INFO    2018-10-03 16:11:28 +0200   master-replica-0        Processing ./Benchmark-0.2.tar.gz
ERROR   2018-10-03 16:11:28 +0200   master-replica-0        Exception:
ERROR   2018-10-03 16:11:28 +0200   master-replica-0        Traceback (most recent call last):
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/basecommand.py", line 141, in main
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            status = self.run(options, args)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/commands/install.py", line 299, in run
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            resolver.resolve(requirement_set)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/resolve.py", line 102, in resolve
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            self._resolve_one(requirement_set, req)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/resolve.py", line 256, in _resolve_one
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/resolve.py", line 209, in _get_abstract_dist_for
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            self.require_hashes
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/operations/prepare.py", line 283, in prepare_linked_requirement
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            progress_bar=self.progress_bar
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            unpack_file_url(link, location, download_dir, hashes=hashes)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            unpack_file(from_path, location, content_type, link)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/local/lib/python3.5/dist-packages/pip/_internal/utils/misc.py", line 581, in unpack_file
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            tarfile.is_tarfile(filename) or
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 2448, in is_tarfile
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            t = open(name)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1557, in open
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            return func(name, "r", fileobj, **kwargs)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            t = cls.taropen(name, mode, fileobj, **kwargs)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1605, in taropen
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            return cls(name, mode, fileobj, **kwargs)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1470, in __init__
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            self.firstmember = self.next()
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 2279, in next
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            tarinfo = self.tarinfo.fromtarfile(self)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/tarfile.py", line 1082, in fromtarfile
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            buf = tarfile.fileobj.read(BLOCKSIZE)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/gzip.py", line 274, in read
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            return self._buffer.read(size)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/_compression.py", line 68, in readinto
ERROR   2018-10-03 16:11:28 +0200   master-replica-0          File "/usr/lib/python3.5/gzip.py", line 469, in read
ERROR   2018-10-03 16:11:28 +0200   master-replica-0            uncompress = self._decompressor.decompress(buf, size)
ERROR   2018-10-03 16:11:28 +0200   master-replica-0        zlib.error: Error -3 while decompressing data: invalid distance too far back
ERROR   2018-10-03 16:11:29 +0200   master-replica-0        Command '['pip3', 'install', '--user', '--upgrade', '--force-reinstall', '--no-deps', 'Benchmark-0.2.tar.gz']' returned non-zero exit status 2
INFO    2018-10-03 16:11:29 +0200   master-replica-0        Module completed; cleaning up.
INFO    2018-10-03 16:11:29 +0200   master-replica-0        Clean up finished.
ERROR   2018-10-03 16:11:41 +0200   service     The replica master 0 exited with a non-zero status of 2. 
...