Я пытаюсь настроить рабочий процесс EMR (с DynamoDB и Hive), используя API Python boto.Я мог запустить скрипт вручную с помощью консоли Amazon EMR.Однако с помощью boto не удается создать таблицы.
Вот сценарий boto, который устанавливает рабочий процесс EMR:
args1 = [u's3://us-east-1.elasticmapreduce/libs/hive/hive-script',
u'--base-path',
u's3://us-east-1.elasticmapreduce/libs/hive/',
u'--install-hive',
u'--hive-versions',
u'0.7.1.3']
args2 = [u's3://us-east-1.elasticmapreduce/libs/hive/hive-script',
u'--base-path',
u's3://us-east-1.elasticmapreduce/libs/hive/',
u'--hive-versions',
u'0.7.1.3',
u'--run-hive-script',
u'--args',
u'-f',
u's3://foo/foobar/hiveexample.sql']
steps = []
for name, args in zip(('Setup Hive','Run Hive Script'),(args1,args2)):
step = JarStep(name,
's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
step_args=args
)
steps.append(step)
conn = boto.connect_emr()
job_id = conn.run_jobflow('EpisodePlay', u's3://foo/foobar/logs/',
steps=steps,
master_instance_type='m1.small',
slave_instance_type='m1.small',
num_instances=5,
hadoop_version="0.20.205",
ami_version="2.0")
Однако выполнение сценария завершается неудачей со следующим исключением.
Hive history file=/mnt/var/lib/hive_07_1/tmp/history/hive_job_log_hadoop_201203161922_1801322338.txt
java.lang.NoSuchMethodError: com.amazonaws.http.JsonErrorResponseHandler.<init>(Ljava/util/List;)V
at com.amazonaws.services.dynamodb.AmazonDynamoDBClient.invoke(AmazonDynamoDBClient.java:663)
at com.amazonaws.services.dynamodb.AmazonDynamoDBClient.describeTable(AmazonDynamoDBClient.java:525)
at org.apache.hadoop.hive.dynamodb.DynamoDBClient$1.call(DynamoDBClient.java:73)
at org.apache.hadoop.hive.dynamodb.DynamoDBClient$1.call(DynamoDBClient.java:70)
at org.apache.hadoop.hive.dynamodb.DynamoDBFibonacciRetryer.runWithRetry(DynamoDBFibonacciRetryer.java:65)
at org.apache.hadoop.hive.dynamodb.DynamoDBClient.describeTable(DynamoDBClient.java:70)
at org.apache.hadoop.hive.dynamodb.DynamoDBSerDe.verifyDynamoDBWriteThroughput(DynamoDBSerDe.java:139)
at org.apache.hadoop.hive.dynamodb.DynamoDBSerDe.initialize(DynamoDBSerDe.java:52)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:199)
at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:253)
at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:484)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:455)
at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:3159)
at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:215)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:130)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1063)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:900)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:748)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:171)
at org.apache.hadoop.hive.cli.CliDriver.processLineInternal(CliDriver.java:253)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:234)
at org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:284)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:461)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
FAILED: Execution Error, return code -101 from org.apache.hadoop.hive.ql.exec.DDLTask
Command exiting with ret '255'