У меня очень простое задание рабочего процесса oozie, выполняющее всего одно действие pyspark из SHOW DATABASES;на только что установленной виртуальной машине Cloudera Quickstart.
и выдает следующее сообщение об ошибке:
Traceback (most recent call last):
File "test_pyspark.py", line 13, in <module>
main()
File "test_pyspark.py", line 9, in main
results=sqlContext.sql("SHOW DATABASES").collect()
File "/yarn/nm/usercache/cloudera/appcache/application_1542753977327_0023/container_1542753977327_0023_01_000001/pyspark.zip/pyspark/sql/context.py", line 580, in sql
File "/yarn/nm/usercache/cloudera/appcache/application_1542753977327_0023/container_1542753977327_0023_01_000001/py4j-0.9-src.zip/py4j/java_gateway.py", line 813, in __call__
File "/yarn/nm/usercache/cloudera/appcache/application_1542753977327_0023/container_1542753977327_0023_01_000001/pyspark.zip/pyspark/sql/utils.py", line 45, in deco
File "/yarn/nm/usercache/cloudera/appcache/application_1542753977327_0023/container_1542753977327_0023_01_000001/py4j-0.9-src.zip/py4j/protocol.py", line 308, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o42.sql.
: java.lang.NoSuchMethodError: org.apache.curator.utils.ZKPaths.fixForNamespace(Ljava/lang/String;Ljava/lang/String;Z)Ljava/lang/String;
at org.apache.curator.framework.imps.NamespaceImpl.fixForNamespace(NamespaceImpl.java:82)
at org.apache.curator.framework.imps.CuratorFrameworkImpl.fixForNamespace(CuratorFrameworkImpl.java:579)
at org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:434)
at org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:44)
at org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager.setContext(ZooKeeperHiveLockManager.java:85)
at org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager.getLockManager(DummyTxnManager.java:72)
at org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager.acquireLocks(DummyTxnManager.java:101)
at org.apache.hadoop.hive.ql.Driver.acquireLocksAndOpenTxn(Driver.java:994)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1185)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049)
at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:495)
at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:484)
at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:290)
at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:237)
at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:236)
at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:279)
at org.apache.spark.sql.hive.client.ClientWrapper.runHive(ClientWrapper.scala:484)
at org.apache.spark.sql.hive.client.ClientWrapper.runSqlHive(ClientWrapper.scala:474)
at org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:624)
at org.apache.spark.sql.hive.execution.HiveNativeCommand.run(HiveNativeCommand.scala:33)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145)
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:209)
at java.lang.Thread.run(Thread.java:745)
Я проверил в oozie share lib jar куратора (2.7.1) в общей папке sparkпапка lib.
hadoop fs -ls /user/oozie/share/lib/lib_20171023091808/spark/
-rw-r--r-- 1 hdfs supergroup 69500 2018-11-20 16:42 /user/oozie/share/lib/lib_20171023091808/spark/curator-client-2.7.1.jar
-rw-r--r-- 1 hdfs supergroup 186273 2018-11-20 16:42 /user/oozie/share/lib/lib_20171023091808/spark/curator-framework-2.7.1.jar
-rw-r--r-- 1 hdfs supergroup 270342 2018-11-20 16:42 /user/oozie/share/lib/lib_20171023091808/spark/curator-recipes-2.7.1.jar
Что-то еще мне не хватает?