FetchFailedException: слишком большая рамка в режиме записи Spark с паркетным режимом - PullRequest
0 голосов
/ 06 ноября 2018

Ошибка ниже, при сохранении фрейма данных в виде таблицы в режиме партера, перед тем, как сохранение таблицы было разделено на 400, вот проанализированные аргументы отправки:

- num-executors 12 --executor-cores 8 --executor-memory 12g --driver- память 32g - драйверные ядра 2 --conf spark.sql.shuffle.partitions = 400

18/11/05 22:02:04 ERROR FileFormatWriter: Aborting job null.
org.apache.spark.SparkException: Job aborted due to stage failure: 
ShuffleMapStage 16 (saveAsTable at testSite.scala:192) has failed the maximum allowable number of times: 4. Most recent failure reason: org.apache.spark.shuffle.FetchFailedException: 
Too large frame: 3405090943     at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:513)    
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:444)     
at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:61)  
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)   
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)   
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)   
at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)    
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)   
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)   
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.sort_addToSorter$(Unknown Source)     
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)   at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)  at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395)     at org.apache.spark.sql.execution.RowIteratorFromScala.advanceNext(RowIterator.scala:83)    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.advancedBufferedToRowWithNullFreeJoinKey(SortMergeJoinExec.scala:793)  at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.<init>(SortMergeJoinExec.scala:668)    at org.apache.spark.sql.execution.joins.SortMergeJoinExec$$anonfun$doExecute$1.apply(SortMergeJoinExec.scala:204)   at org.apache.spark.sql.execution.joins.SortMergeJoinExec$$anonfun$doExecute$1.apply(SortMergeJoinExec.scala:141)   at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)  at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)  at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)     at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)  at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)     at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)  at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)     at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)   at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)   at org.apache.spark.scheduler.Task.run(Task.scala:108)  at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)  
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)  
at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.IllegalArgumentException: Too large frame: 3405090943     
at org.spark_project.guava.base.Preconditions.checkArgument(Preconditions.java:119)     
at org.apache.spark.network.util.TransportFrameDecoder.decodeNext(TransportFrameDecoder.java:133)   
at org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:81)
...