Я просто новичок в искре. Я пытаюсь подключиться к таблице HDBase myDB с именем mimic3, семейству столбцов с именем sepsiscategories, и существует много столбцов. Я пытаюсь получить столбец "subject_id". Это мой код:
import org.apache.hadoop.conf.Configuration
import org.apache.spark.sql.SparkSession
import org.apache.hadoop.fs.{FileSystem,FSDataInputStream,Path}
import java.net.URI
import java.io.File
import java.util.Properties
import java.sql.DriverManager
import org.apache.spark.sql.{Row,SaveMode}
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.conf.Configuration._
import spark.implicits._
import spark.sql
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val mimic_table_conf = HBaseConfiguration.create();
mimic_table_conf.set(TableInputFormat.INPUT_TABLE,"mimic3")
val mimic_PatternsFromHbase = spark.sparkContext.newAPIHadoopRDD(mimic_table_conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable], classOf[Result])
val sepsiscategories = mimic_PatternsFromHbase.mapPartitions(f=> f.map(row1 => (Bytes.toString(row1._2.getRow),
Bytes.toString(row1._2.getValue(Bytes.toBytes("sepsiscategories"),Bytes.toBytes("admissiontype")))))).toDF("id","admissiontype")
sepsiscategories.createOrReplaceTempView("sepsiscategories")
spark.sql("select * from sepsiscategories").show
последняя строка spark.sql («выбрать * из отдельных категорий»). Show rase me this error:
org.apache.hadoop.hbase.DoNotRetryIOException: java.lang.NoClassDefFoundError: com / yammer / metrics / core / Gauge at org.apache.hadoop.hbase.client.RpcRetringCaller.translateExjalgrgr_rg.apache.hadoop.hbase.client. .client.ScannerCallableWithReplicas $ RetriedRPC.call (ScannerCallableWithReplicas.java:362) в org.apache.hadoop.hbase.client.RpcRetringCaller.callWithRetries (RpcRetringCalound.hb.run (ResultBoundedCompletionService.java:80) в java.util.concurrent.ThreadPoolExecutor.runWorker (ThreadPoolExecutor.java:1149) в java.util.concurrent.ThreadPoolExecutor $ Worker.run (ThreadPoolExecutor.java:624) в java.lang.Thread.run (Thread.java:748) Причина: java.lang.NoClassDefFoundError: com / yammer / metrics/ core / Gauge at org.apache.hadoop.hbase.ipc.AbstractRpcClient.callMethod (AbstractRpcClient.java:401) в org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod (AbstractRp.jap org.32) orgp.cap.hadoop.hbase.ipc.AbstractRpcClient.access $ 200 (AbstractRpcClient.java:94) в org.apache.hadoop.hbase.ipc.AbstractRpcClient $ BlockingRpcChannelImplementation.callBlockingMethod (AbstractRpoop.j.hb.j.hb.jp.jp.jp. protobuf.generated.ClientProtos $ ClientService $ BlockingStub.scan (ClientProtos.java:37059) в org.apache.hadoop.hbase.client.ScannerCallable.openScanner (ScannerCallable.java:405) в org.apache.hadooplib. ScannerCallable.call (ScannerCallable.java:274) в org.apache.hadoop.hbase.client.ScannerCallable.call (ScannerCallable.java:62) в org.apache.hadoop.hbase.client.RpcRetringCaller.callWithoutRetries (RpcRetringCaller.java:219) ... еще 7 причин: java.lang.ClassNotFoundException: com.yammer.metrics.core.Gauge в java.net.URLClassLoader.findClass (URLClass:oader) 2.0java.lang.ClassLoader.loadClass (ClassLoader.java:424) в sun.misc.Launcher $ AppClassLoader.loadClass (Launcher.java:349) в java.lang.ClassLoader.loadClass (ClassLoader.java:357)
это результат команды сканирования в мою таблицу mimic3:
scan "mimic3"
ROW COLUMN+CELL
100 column=sepsiscategories:admissiontype, timestamp=1573115453362, value=ScheduledSurgical
100 column=sepsiscategories:age, timestamp=1573115453362, value=71.94
100 column=sepsiscategories:bicarbonate_max, timestamp=1573115453362, value=22.0
100 column=sepsiscategories:bicarbonate_min, timestamp=1573115453362, value=22.0
100 column=sepsiscategories:bun_max, timestamp=1573115453362, value=21.0
100 column=sepsiscategories:bun_min, timestamp=1573115453362, value=16.0
100 column=sepsiscategories:explicit_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:hadm_id, timestamp=1573115453362, value=153952
100 column=sepsiscategories:has_choc_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:has_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:has_severe_sepsis, timestamp=1573115453362, value=0
100 column=sepsiscategories:has_sirs, timestamp=1573115453362, value=0
100 column=sepsiscategories:heartrate_max, timestamp=1573115453362, value=104.0
100 column=sepsiscategories:heartrate_min, timestamp=1573115453362, value=62.0
100 column=sepsiscategories:icustay_id, timestamp=1573115453362, value=221100
100 column=sepsiscategories:infection, timestamp=1573115453362, value=0
100 column=sepsiscategories:intime, timestamp=1573115453362, value=2157-08-10 12:16:00.0
100 column=sepsiscategories:mingcs, timestamp=1573115453362, value=15.0
100 column=sepsiscategories:organ_dysfunction, timestamp=1573115453362, value=0
100 column=sepsiscategories:outtime, timestamp=1573115453362, value=2157-08-12 15:04:00.0
100 column=sepsiscategories:pco2, timestamp=1573115453362, value=37.0
100 column=sepsiscategories:potassium_max, timestamp=1573115453362, value=6.9
100 column=sepsiscategories:potassium_min, timestamp=1573115453362, value=3.0
100 column=sepsiscategories:resprate_max, timestamp=1573115453362, value=42.0
100 column=sepsiscategories:sodium_max, timestamp=1573115453362, value=143.0
100 column=sepsiscategories:sodium_min, timestamp=1573115453362, value=138.0
100 column=sepsiscategories:sysbp_max, timestamp=1573115453362, value=131.0
100 column=sepsiscategories:sysbp_min, timestamp=1573115453362, value=84.0
100 column=sepsiscategories:tempc_max, timestamp=1573115453362, value=37.70000076293945
100 column=sepsiscategories:tempc_min, timestamp=1573115453362, value=35.20000076293945
100 column=sepsiscategories:urineoutput, timestamp=1573115453362, value=2650.0
100 column=sepsiscategories:wbc_max, timestamp=1573115453362, value=7.9
100 column=sepsiscategories:wbc_min, timestamp=1573115453362, value=7.9
что может быть причиной этой ошибки PS Я использую метрику jar metrics-core-3.1.2.jar