Вот следующие шаги:
mkdir spark_lib; cd spark_lib
pip install jsonpath_rw_ext==1.1.3 -t .
zip -r9 ../spark_lib.zip *
- Инициализировать контекстную переменную искры
sc
sc.addPyFile('spark_lib.zip')
def f(x):
import jsonpath_rw_ext
return jsonpath_rw_ext.match('$.a.b', x)
sc.parallelize([{"a":{"b":10}}]).map(f).collect()
Это дает мне ошибку версии pbr
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 268, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "<ipython-input-11-2619fc011e24>", line 6, in f
File "./spark_lib.zip/jsonpath_rw_ext/__init__.py", line 18, in <module>
File "./spark_lib.zip/pbr/version.py", line 467, in version_string
return self.semantic_version().brief_string()
File "./spark_lib.zip/pbr/version.py", line 462, in semantic_version
self._semantic = self._get_version_from_pkg_resources()
File "./spark_lib.zip/pbr/version.py", line 449, in _get_version_from_pkg_resources
result_string = packaging.get_version(self.package)
File "./spark_lib.zip/pbr/packaging.py", line 824, in get_version
name=package_name))
Exception: Versioning for this project requires either an sdist tarball,
or access to an upstream git repository. It's also possible that
there is a mismatch between the package name in setup.cfg
and the argument given to pbr.version.VersionInfo.
Project name jsonpath_rw_ext was given, but was not able to be found.