Я пытаюсь сгенерировать fat-jar (со сборкой sbt), а затем отправить его на spark, используя spark-submit, и заставить мое приложение использовать мои дополнения и кодировщики для входа в систему.
Я пробовал, чтоуказано в: Настройка Apache Spark Logging с Scala и logback , а также в Разделение журналов приложений в Logback от Spark Logs в log4j
, но безуспешно. Либо удаленная банка завершается с ошибкой из-за некоторых дедуплицирующих ошибок, либо приложение не запускается из-за какой-то ошибки при создании sparkContext.
My build.sbt
libraryDependencies += Seq(
"org.apache.spark" %% "spark-core" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-sql" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-mllib" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-streaming-kafka-0-10" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-hive" % "2.4.0" excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.scala-lang.modules" %% "scala-xml" % "1.0.6"
, "io.minio" % "minio" % "6.0.11" excludeAll(ExclusionRule(organization= "com.google.code.findbugs", name= "annotations"))
, "com.typesafe.slick" %% "slick" % "3.3.2"
, "com.typesafe.slick" %% "slick-hikaricp" % "3.3.2"
, "com.microsoft.sqlserver" % "mssql-jdbc" % "7.4.1.jre8"
, "org.slf4j" % "log4j-over-slf4j" % "1.7.25"
)
assemblyMergeStrategy in assembly := {
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
case PathList("com", "squareup", xs @ _*) => MergeStrategy.last
case PathList("com", "sun", xs @ _*) => MergeStrategy.last
case PathList("com", "thoughtworks", xs @ _*) => MergeStrategy.last
case PathList("commons-beanutils", xs @ _*) => MergeStrategy.last
case PathList("commons-cli", xs @ _*) => MergeStrategy.last
case PathList("commons-collections", xs @ _*) => MergeStrategy.last
case PathList("commons-io", xs @ _*) => MergeStrategy.last
case PathList("io", "netty", xs @ _*) => MergeStrategy.last
case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
case PathList("javax", "xml", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", xs @ _*) => MergeStrategy.last
case PathList("org", "codehaus", xs @ _*) => MergeStrategy.last
case PathList("org", "glassfish", xs @ _*) => MergeStrategy.last
case PathList("org", "fusesource", xs @ _*) => MergeStrategy.last
case PathList("org", "mortbay", xs @ _*) => MergeStrategy.last
case PathList("org", "tukaani", xs @ _*) => MergeStrategy.last
case PathList("com", "twitter", xs @ _*) => MergeStrategy.last
case PathList("org", "objenesis", xs @ _*) => MergeStrategy.last
case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
case PathList("xerces", xs @ _*) => MergeStrategy.last
case PathList("xmlenc", xs @ _*) => MergeStrategy.last
case "about.html" => MergeStrategy.rename
case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "git.properties" => MergeStrategy.last
case "plugin.xml" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case "parquet.thrift" => MergeStrategy.last
case "codegen/config.fmpp" => MergeStrategy.last
// Needed only to sbt assembly non provided spark-streaming-kafka-0-10
case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class") => MergeStrategy.first
// Needed only to sbt assembly etcd coming with libraries
case PathList("META-INF", "io.netty.versions.properties", xs @ _*) => MergeStrategy.last
// Needed only to sbt assembly mastria-etcd4s coming with libraries
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","util", xs @ _*) => MergeStrategy.first
case PathList("library.properties", xs @ _*) => MergeStrategy.first
case PathList("logback.xml",xs @ _ *) => MergeStrategy.last
case x => (assemblyMergeStrategy in assembly).value(x)
//case PathList("MANIFEST.MF",xs @ _ *) => MergeStrategy.first
// case _ => MergeStrategy.first
}
assemblyMergeStrategy in assembly := {
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
case PathList("com", "squareup", xs @ _*) => MergeStrategy.last
case PathList("com", "sun", xs @ _*) => MergeStrategy.last
case PathList("com", "thoughtworks", xs @ _*) => MergeStrategy.last
case PathList("commons-beanutils", xs @ _*) => MergeStrategy.last
case PathList("commons-cli", xs @ _*) => MergeStrategy.last
case PathList("commons-collections", xs @ _*) => MergeStrategy.last
case PathList("commons-io", xs @ _*) => MergeStrategy.last
case PathList("io", "netty", xs @ _*) => MergeStrategy.last
case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
case PathList("javax", "xml", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", xs @ _*) => MergeStrategy.last
case PathList("org", "codehaus", xs @ _*) => MergeStrategy.last
case PathList("org", "glassfish", xs @ _*) => MergeStrategy.last
case PathList("org", "fusesource", xs @ _*) => MergeStrategy.last
case PathList("org", "mortbay", xs @ _*) => MergeStrategy.last
case PathList("org", "tukaani", xs @ _*) => MergeStrategy.last
case PathList("com", "twitter", xs @ _*) => MergeStrategy.last
case PathList("org", "objenesis", xs @ _*) => MergeStrategy.last
case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
case PathList("xerces", xs @ _*) => MergeStrategy.last
case PathList("xmlenc", xs @ _*) => MergeStrategy.last
case "about.html" => MergeStrategy.rename
case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "git.properties" => MergeStrategy.last
case "plugin.xml" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case "parquet.thrift" => MergeStrategy.last
case "codegen/config.fmpp" => MergeStrategy.last
// Needed only to sbt assembly non provided spark-streaming-kafka-0-10
case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class") => MergeStrategy.first
// Needed only to sbt assembly etcd coming with libraries
case PathList("META-INF", "io.netty.versions.properties", xs @ _*) => MergeStrategy.last
// Needed only to sbt assembly mastria-etcd4s coming with libraries
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","util", xs @ _*) => MergeStrategy.first
case PathList("library.properties", xs @ _*) => MergeStrategy.first
case PathList("logback.xml",xs @ _ *) => MergeStrategy.last
case x => (assemblyMergeStrategy in assembly).value(x)
//case PathList("MANIFEST.MF",xs @ _ *) => MergeStrategy.first
// case _ => MergeStrategy.first
}
Когда я использую тип безопасного класса LazyLoggingпроцесс завершается неудачей независимо от того, какую комбинацию аргументов я пытаюсь использовать.
Я хотел бы увидеть полный пример build.sbt, чтобы, возможно, найти мою ошибку.