Я пытался подавить журнал с помощью spark.sparkContext.setLogLevel("ERROR")
в:
package com.databricks.example
import org.apache.log4j.Logger
import org.apache.spark.sql.SparkSession
object DFUtils extends Serializable {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def pointlessUDF(raw: String) = {
raw
}
}
object DataFrameExample extends Serializable {
def main(args: Array[String]): Unit = {
val pathToDataFolder = args(0)
// println(pathToDataFolder + "data.json")
// start up the SparkSession
// along with explicitly setting a given config
val spark = SparkSession.builder().appName("Spark Example")
.config("spark.sql.warehouse.dir", "/user/hive/warehouse")
.getOrCreate()
// for suppresse logs by raising log level
spark.sparkContext.setLogLevel("ERROR")
// println(spark.range(1, 2000).count());
// udf registration
spark.udf.register("myUDF", DFUtils.pointlessUDF(_:String):String)
val df = spark.read.json(pathToDataFolder + "data.json")
df.printSchema()
// df.collect.foreach(println)
// val x = df.select("value").foreach(x => println(x));
val manipulated = df.groupBy("grouping").sum().collect().foreach(x => println(x))
// val manipulated = df.groupBy(expr("myUDF(group)")).sum().collect().foreach(x => println(x))
}
}
Почему я по-прежнему получаю журналы уровней INFO и WARN? Я успешно поднял уровень журнала до ошибки? Спасибо.
$ ~/programs/spark/spark-2.4.5-bin-hadoop2.7/bin/spark-submit --class com.databricks.example.DataFrameExample --master local target/scala-2.11/example_2.11-0.1-SNAPSHOT.jar /tmp/test/
20/03/19 10:09:10 WARN Utils: Your hostname, ocean resolves to a loopback address: 127.0.1.1; using 192.168.122.1 instead (on interface virbr0)
20/03/19 10:09:10 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
20/03/19 10:09:11 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
20/03/19 10:09:12 INFO SparkContext: Running Spark version 2.4.5
20/03/19 10:09:12 INFO SparkContext: Submitted application: Spark Example
20/03/19 10:09:12 INFO SecurityManager: Changing view acls to: t
20/03/19 10:09:12 INFO SecurityManager: Changing modify acls to: t
20/03/19 10:09:12 INFO SecurityManager: Changing view acls groups to:
20/03/19 10:09:12 INFO SecurityManager: Changing modify acls groups to:
20/03/19 10:09:12 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(t); groups with view permissions: Set(); users with modify permissions: Set(t); groups with modify permissions: Set()
20/03/19 10:09:13 INFO Utils: Successfully started service 'sparkDriver' on port 35821.
20/03/19 10:09:13 INFO SparkEnv: Registering MapOutputTracker
20/03/19 10:09:13 INFO SparkEnv: Registering BlockManagerMaster
20/03/19 10:09:13 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
20/03/19 10:09:13 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
20/03/19 10:09:13 INFO DiskBlockManager: Created local directory at /tmp/blockmgr-ce47f30a-ee1c-44a8-9f5b-204905ee3b2d
20/03/19 10:09:13 INFO MemoryStore: MemoryStore started with capacity 366.3 MB
20/03/19 10:09:13 INFO SparkEnv: Registering OutputCommitCoordinator
20/03/19 10:09:14 INFO Utils: Successfully started service 'SparkUI' on port 4040.
20/03/19 10:09:14 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://192.168.122.1:4040
20/03/19 10:09:14 INFO SparkContext: Added JAR file:/tmp/test/bookexample/target/scala-2.11/example_2.11-0.1-SNAPSHOT.jar at spark://192.168.122.1:35821/jars/example_2.11-0.1-SNAPSHOT.jar with timestamp 1584626954295
20/03/19 10:09:14 INFO Executor: Starting executor ID driver on host localhost
20/03/19 10:09:14 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 39215.
20/03/19 10:09:14 INFO NettyBlockTransferService: Server created on 192.168.122.1:39215
20/03/19 10:09:14 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
20/03/19 10:09:14 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.122.1, 39215, None)
20/03/19 10:09:14 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.122.1:39215 with 366.3 MB RAM, BlockManagerId(driver, 192.168.122.1, 39215, None)
20/03/19 10:09:14 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.122.1, 39215, None)
20/03/19 10:09:14 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 192.168.122.1, 39215, None)
root
|-- grouping: string (nullable = true)
|-- value: long (nullable = true)
[group_3,10]
[group_1,12]
[group_2,5]
[group_4,2]