Друид бросает java .lang.Exception: java .lang.RuntimeException: java .lang.ClassNotFoundException: Класс org. apache .had oop .fs.s3a.S3AFileSystem не найден - PullRequest
0 голосов
/ 02 марта 2020

Я пытаюсь использовать S3 в качестве моего каталога глубокого хранения druid и получаю следующее исключение:

2020-03-02T11:47:29,971 INFO [Thread-18] org.apache.hadoop.mapred.LocalJobRunner - reduce task executor complete.
2020-03-02T11:47:29,982 WARN [Thread-18] org.apache.hadoop.mapred.LocalJobRunner - job_local404421415_0001
java.lang.Exception: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
 at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) ~[hadoop-mapreduce-client-common-2.8.5.jar:?]
 at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:556) [hadoop-mapreduce-client-common-2.8.5.jar:?]
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
 at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2369) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2793) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2810) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:100) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2849) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2831) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:389) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.Path.getFileSystem(Path.java:356) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.druid.indexer.IndexGeneratorJob$IndexGeneratorReducer.reduce(IndexGeneratorJob.java:788) ~[druid-indexing-hadoop-0.17.0.jar:0.17.0]
 at org.apache.druid.indexer.IndexGeneratorJob$IndexGeneratorReducer.reduce(IndexGeneratorJob.java:572) ~[druid-indexing-hadoop-0.17.0.jar:0.17.0]
 at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171) ~[hadoop-mapreduce-client-core-2.8.5.jar:?]
 at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627) ~[hadoop-mapreduce-client-core-2.8.5.jar:?]
 at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) ~[hadoop-mapreduce-client-core-2.8.5.jar:?]
 at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346) ~[hadoop-mapreduce-client-common-2.8.5.jar:?]
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_131]
 at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_131]
 at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_131]
 at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_131]
 at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_131]
Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
 at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2273) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2367) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2793) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2810) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:100) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2849) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2831) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:389) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.hadoop.fs.Path.getFileSystem(Path.java:356) ~[hadoop-common-2.8.5.jar:?]
 at org.apache.druid.indexer.IndexGeneratorJob$IndexGeneratorReducer.reduce(IndexGeneratorJob.java:788) ~[druid-indexing-hadoop-0.17.0.jar:0.17.0]
 at org.apache.druid.indexer.IndexGeneratorJob$IndexGeneratorReducer.reduce(IndexGeneratorJob.java:572) ~[druid-indexing-hadoop-0.17.0.jar:0.17.0]
 at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171) ~[hadoop-mapreduce-client-core-2.8.5.jar:?]
 at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627) ~[hadoop-mapreduce-client-core-2.8.5.jar:?]
 at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) ~[hadoop-mapreduce-client-core-2.8.5.jar:?]
 at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:346) ~[hadoop-mapreduce-client-common-2.8.5.jar:?]
 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_131]
 at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_131]
 at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_131]
 at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_131]
 at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_131]
2020-03-02T11:47:30,599 INFO [main] org.apache.hadoop.mapreduce.Job -  map 100% reduce 38%
2020-03-02T11:47:30,599 INFO [main] org.apache.hadoop.mapreduce.Job - Job job_local404421415_0001 failed with state FAILED due to: NA
2020-03-02T11:47:30,619 INFO [main] org.apache.hadoop.mapreduce.Job - Counters: 39

Я использую индексирование командной строки oop, и вот мое json:

{
   "type":"index_hadoop",
   "spec":{
      "dataSchema":{
         "dataSource":"some_datasource_v1",
         "parser":{
            "type":"parquet",
            "parseSpec":{
               "format":"timeAndDims",
               "timestampSpec":{
                  "column":"date_hour",
                  "format":"yyyy-MM-dd HH:mm:ss"
               },
               "dimensionsSpec":{
                  "dimensions":[
                     "date_hour",
                     "A",
                     "B",
                     "C",
                     "D"
                  ]
               }
            }
         },
         "metricsSpec":[
            {
               "type":"longSum",
               "name":"count_rec",
               "fieldName":"count_rec"
            }
         ],
         "granularitySpec":{
            "type":"uniform",
            "segmentGranularity":"hour",
            "queryGranularity":"hour",
            "intervals":[
               "2020-02-10T00:00:00/2020-02-10T01:00:00"
            ]
         }
      },
      "ioConfig":{
         "inputSpec":{
            "type":"static",
            "paths":"hdfs://SOME_IP/PATH_TO_FILE/part-00000-0a91d78b-109a-4852-9b9e-f2dbe9f147b3-c000.snappy.parquet,SOME OTHE PATHS",
            "filter":"part-",
            "inputFormat":"org.apache.druid.data.input.parquet.DruidParquetInputFormat"

         },
         "type":"hadoop",
         "metadataUpdateSpec":{
            "type":"mysql",
            "connectURI":"jdbc:mysql://127.0.0.1:3306/druid?characterEncoding=UTF-8",
            "user":"druid",
            "password":"diurd",
            "segmentTable":"druid_segments"

         },
         "segmentOutputPath":"s3a://test-bucket/druid_indexed_data/"

      },
      "hadoopDependencyCoordinates":[
         "org.apache.hadoop:hadoop-client:2.7.3",
         "org.apache.hadoop:hadoop-aws:2.7.3"

      ],
      "tuningConfig":{
         "type":"hadoop",
         "workingPath":"/var/druid/hadoop-tmp",
         "jobProperties":{
            "fs.s3a.connection.ssl.enabled":"true",
            "fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem",
            "mapreduce.map.memory.mb":"12288",
            "mapreduce.reduce.memory.mb":"16384",
            "mapreduce.map.java.opts":"-Xmx6144m -XX:+UseG1GC -Dfile.encoding=UTF-8",
            "mapreduce.reduce.java.opts":"-Xmx12288m -XX:+UseG1GC -Dfile.encoding=UTF-8",
            "mapreduce.job.classloader":"true"

         },
         "partitionsSpec":{
            "type":"hashed",
            "numShards":8

         },
         "indexSpec":{
            "bitmap":{
               "type":"roaring"

            }

         },
         "maxRowsInMemory":500000,
         "leaveIntermediate":False,
         "cleanupOnFailure":True,
         "overwriteFiles":True,
         "ignoreInvalidRows":False,
         "combineText":False,
         "useCombiner":True
      }
   }
}

Команда bash, которую я использую для запуска:

cd /opt/apache-druid-0.17.0; java -Xmx512m                            
-Daws.region=us-east-1                            
-Ddruid.storage.bucket=test-bucket                            
-Ddruid.storage.baseKey=druid_                            
 -Ddruid.storage.useS3aSchema=True                            
-Ddruid.s3.accessKey=ACCESS_KEY                            
-Ddruid.s3.secretKey=SECRET_KEY                            
-Ddruid.storage.storageDirectory=s3a://test-bucket/druid_indexed_data/                            
-Ddruid.storage.type=s3                            
-Dfile.encoding=UTF-8                            
 -classpath extensions/druid-parquet-extensions/*:extensions/druid-avro-extensions:extensions/druid-hdfs-storage:lib/*:/opt/apache-druid-0.17.0/conf/druid/single-server/micro-quickstart/_common:/usr/local/Cellar/hadoop/3.1.2/bin/hadoop                            
 org.apache.druid.cli.Main index hadoop /var/log/myusername/druid_index_spec_test_datasource_v1_2020-02-10T000000_1583167637.414025.log

Я использую Друид 0.17. Я читал об этом здесь и там, и я думаю, что проблема заключается в том, что друид загружается неправильно, при запуске кода были oop зависимости. Он загружает hadoop-common и hadoop-client версии 2.8.5, в то время как я установил hadoopDependencyCoordinates для использования 2.7.3, которая является версией, которая решит эту проблему. Есть идеи, что мне здесь не хватает?

...