Я пытался записать потоковые данные из концентратора событий в BLOB-объекты в Azure Databricks. Это не удалось через несколько раз. ниже сообщение об ошибке. Можете ли вы сообщить причину?
val query =
streamingDataFrame
.writeStream
.format("json")
.option("path", "/mnt/blobimages/DatabricksSentimentPowerBI")
.option("checkpointLocation", "/mnt/blobimages/sample/check2")
.trigger(ProcessingTime("20 seconds"))
.start()
at org.apache.spark.sql.execution.datasources.FileFormatWriter $ .write (FileFormatWriter.scala: 224)
в org.apache.spark.sql.execution.streaming.FileStreamSink.addBatch (FileStreamSink.scala: 131)
в org.apache.spark.sql.execution.streaming.MicroBatchExecution $$ anonfun $ org $ apache $ spark $ sql $ исполнение $ streaming $ MicroBatchExecution $$ runBatch $ 5 $$ anonfun $ применить $ 17.apply (MicroBatchExecution.scala: 549)
в org.apache.spark.sql.execution.SQLExecution $$ anonfun $ withCustomExecutionEnv $ 1.apply (SQLExecution.scala: 89)
в org.apache.spark.sql.execution.SQLExecution $ .withSQLConfPropagated (SQLExecution.scala: 175)
в org.apache.spark.sql.execution.SQLExecution $ .withCustomExecutionEnv (SQLExecution.scala: 84)
в org.apache.spark.sql.execution.SQLExecution $ .withNewExecutionId (SQLExecution.scala: 126)
в org.apache.spark.sql.execution.streaming.MicroBatchExecution $$ anonfun $ org $ apache $ spark $ sql $ исполнение $ потоковое $ MicroBatchExecution $$ runBatch $ 5.apply (MicroBatchExecution.scala: 547)
в org.apache.spark.sql.execution.streaming.ProgressReporter $ class.reportTimeTaken (ProgressReporter.scala: 379)
в org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken (StreamExecution.scala: 60)
в org.apache.spark.sql.execution.streaming.MicroBatchExecution.org $ apache $ spark $ sql $ выполнение $ потоковое $ MicroBatchExecution $$ runBatch (MicroBatchExecution.scala: 546)
на org.apache.spark.sql.execution.streaming.MicroBatchExecution $$ anonfun $ runActivationStream $ 1 $$ anonfun $ apply $ mcZ $ sp $ 1.apply $ mcV $ sp (MicroBatchExecution.scala: 204)
в org.apache.spark.sql.execution.streaming.MicroBatchExecution $$ anonfun $ runActivationStream $ 1 $$ anonfun $ apply $ mcZ $ sp $ 1.apply (MicroBatchExecution.scala: 172)
в org.apache.spark.sql.execution.streaming.MicroBatchExecution $$ anonfun $ runActivationStream $ 1 $$ anonfun $ apply $ mcZ $ sp $ 1.apply (MicroBatchExecution.scala: 172)
в org.apache.spark.sql.execution.streaming.ProgressReporter $ class.reportTimeTaken (ProgressReporter.scala: 379)
в org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken (StreamExecution.scala: 60)
в org.apache.spark.sql.execution.streaming.MicroBatchExecution $$ anonfun $ runActivationStream $ 1.apply $ mcZ $ sp (MicroBatchExecution.scala: 172)
в org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute (TriggerExecutor.scala: 56)
в org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivationStream (MicroBatchExecution.scala: 166)
в org.apache.spark.sql.execution.streaming.StreamExecution.org $ apache $ spark $ sql $ выполнение $ потоковое $ StreamExecution $$ runStream (StreamExecution.scala: 293)
в org.apache.spark.sql.execution.streaming.StreamExecution $$ anon $ 1.run (StreamExecution.scala: 203)
Вызвано: org.apache.spark.SparkException: Задание прервано из-за сбоя этапа: сбой задачи 0 на этапе 239.0 4 раза, последний сбой: Потерянное задание 0.3 на этапе 239.0 (TID 458, 10.139.64.5, исполнитель 0): org .apache.spark.SparkException: задача не выполнена при записи строк.
в org.apache.spark.sql.execution.datasources.FileFormatWriter $ .org $ apache $ spark $ sql $ исполнительный $ источники данных $ FileFormatWriter $$ executeTask (FileFormatWriter.scala: 285)
в org.apache.spark.sql.execution.datasources.FileFormatWriter $$ anonfun $ write $ 1.apply (FileFormatWriter.scala: 197)
в org.apache.spark.sql.execution.datasources.FileFormatWriter $$ anonfun $ write $ 1.apply (FileFormatWriter.scala: 196)
в org.apache.spark.scheduler.ResultTask.runTask (ResultTask.scala: 87)
в org.apache.spark.scheduler.Task.run (Task.scala: 112)
в org.apache.spark.executor.Executor $ TaskRunner.run (Executor.scala: 384)в java.util.concurrent.ThreadPoolExecutor.runWorker (ThreadPoolExecutor.java:1149)
в java.util.concurrent.ThreadPoolExecutor $ Worker.run (ThreadPoolExecutor.java:624)
на java.lang.Thread.run (Thread.java:748)
Вызвано: org.apache.spark.SparkException: не удалось выполнить пользовательскую функцию ($ anonfun $ 7: (строка) => double)
в org.apache.spark.sql.catalyst.expressions.GeneratedClass $ GeneratedIteratorForCodegenStage1.project_doConsume_0 $ (неизвестный источник)
в org.apache.spark.sql.catalyst.expressions.GeneratedClass $ GeneratedIteratorForCodegenStage1.processNext (неизвестный источник)
в org.apache.spark.sql.execution.BufferedRowIterator.hasNext (BufferedRowIterator.java:43)
в org.apache.spark.sql.execution.WholeStageCodegenExec $$ anonfun $ 10 $$ anon $ 1.hasNext (WholeStageCodegenExec.scala: 620)
в org.apache.spark.sql.execution.datasources.FileFormatWriter $ SingleDirectoryWriteTask.execute (FileFormatWriter.scala: 380)
в org.apache.spark.sql.execution.datasources.FileFormatWriter $$ anonfun $ org $ apache $ spark $ sql $ исполнительный $ источники данных $ FileFormatWriter $$ executeTask $ 3.apply (FileFormatWriter.scala: 269)
в org.apache.spark.sql.execution.datasources.FileFormatWriter $$ anonfun $ org $ apache $ spark $ sql $ исполнительный $ источники данных $ FileFormatWriter $$ executeTask $ 3.apply (FileFormatWriter.scala: 267)
в org.apache.spark.util.Utils $ .tryWithSafeFinallyAndFailureCallbacks (Utils.scala: 1392)
в org.apache.spark.sql.execution.datasources.FileFormatWriter $ .org $ apache $ spark $ sql $ исполнительный $ источники данных $ FileFormatWriter $$ executeTask (FileFormatWriter.scala: 272)
... еще 8
Вызывается: java.lang.IndexOutOfBoundsException: индекс: 0, размер: 0
в java.util.ArrayList.rangeCheck (ArrayList.java:657)
на java.util.ArrayList.get (ArrayList.java:433)
at linece8d5970ee964032865eb55725903ecf40. $ read $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ IW $$ $$ IW IW $$ anonfun $ 7.Apply (команда-624712114183209: 117)
at linece8d5970ee964032865eb55725903ecf40. $ read $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ iw $$ IW $$ $$ IW IW $$ anonfun $ 7.Apply (команда-624712114183209: 108)
... еще 17
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1747)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1735)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1734)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1734)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:962)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:962)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:962)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1970)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1918)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1906)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2141)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:194)
... 20 more
Caused by: org.apache.spark.SparkException: Task failed while writing rows.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:285)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:196)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:384)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$7: (string) => double)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$10$$anon$1.hasNext(WholeStageCodegenExec.scala:620)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:380)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:269)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:267)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1392)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272)
... 8 more
Caused by: java.lang.IndexOutOfBoundsException: Index: 0, Size: 0
at java.util.ArrayList.rangeCheck(ArrayList.java:657)
at java.util.ArrayList.get(ArrayList.java:433)
at linece8d5970ee964032865eb55725903ecf40.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$7.apply(command-624712114183209:117)
at linece8d5970ee964032865eb55725903ecf40.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$7.apply(command-624712114183209:108)