отправляя искровое задание в AWS EMR (v 5.23.0), я получаю следующую ошибку:
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/sql/types/DataType
at etl.SparkDataProcessor$.processTransactionData(SparkDataProcessor.scala:51)
at etl.SparkDataProcessor$.delayedEndpoint$etl$SparkDataProcessor$1(SparkDataProcessor.scala:17)
at etl.SparkDataProcessor$delayedInit$body.apply(SparkDataProcessor.scala:11)
at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:383)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at etl.SparkDataProcessor$.main(SparkDataProcessor.scala:11)
at etl.SparkDataProcessor.main(SparkDataProcessor.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:239)
at org.apache.hadoop.util.RunJar.main(RunJar.java:153)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.types.DataType
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:419)
at java.lang.ClassLoader.loadClass(ClassLoader.java:352)
... 18 more
Попробовал следующие заявки в stackoverflow для той же проблемы, но все еще не повезло. Локальный запуск приложения в intelliJ работает нормально, и я работаю с sbt assembly
. Ниже мой файл build.sbt
Примечание. Я даже добавил assemblyExcludedJars , чтобы посмотреть, поможет ли это. Раньше такого не было.
name := "blah"
version := "0.1"
scalaVersion := "2.11.0"
sparkVersion := "2.4.0"
artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) =>
artifact.name + "_" + sv.binary + "-" + sparkVersion.value + "_" + module.revision + "." + artifact.extension
}
lazy val doobieVersion = "0.8.6"
// Dependencies
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "2.4.0" % "provided",
"org.apache.spark" %% "spark-sql" % "2.4.0" % "provided",
"org.scalatest" %% "scalatest" % "3.0.8",
"org.apache.hadoop" % "hadoop-common" % "2.9.2" % "provided",
"org.apache.hadoop" % "hadoop-aws" % "2.9.2" % "provided",
"com.amazonaws" % "aws-java-sdk-s3" % "1.11.46",
"com.google.guava" % "guava" % "19.0",
"com.typesafe.slick" %% "slick" % "3.3.1",
"com.typesafe.slick" %% "slick-hikaricp" % "3.3.1",
"mysql" % "mysql-connector-java" % "6.0.6",
"com.microsoft.sqlserver" % "mssql-jdbc" % "8.2.0.jre8",
// "com.github.geirolz" %% "advxml" % "2.0.0-RC1",
"org.scalaj" %% "scalaj-http" % "2.4.2",
"org.json4s" %% "json4s-native" % "3.6.7",
"io.jvm.uuid" %% "scala-uuid" % "0.3.1"
)
// JVM Options
javaOptions ++= Seq("-Xms512m", "-Xmx2048M", "-XX:+CMSClassUnloadingEnabled")
// SBT Test Options
fork in Test := true
testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest, "-oD")
assemblyExcludedJars in assembly := {
// Exclude conflicting jars
val cp = (fullClasspath in assembly).value
cp.filter { f =>
f.data.getName.contains("spark-core") ||
f.data.getName.contains("spark-sql")
}
}
// SBT Assembly Options
assemblyJarName in assembly := "blah.jar"
assemblyMergeStrategy in assembly := {
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case "reference.conf" => MergeStrategy.concat
case x => MergeStrategy.first
}