запустить summary () на большом фрейме данных - PullRequest
0 голосов
/ 15 мая 2019

У меня есть несколько паркетных файлов, каждый из которых содержит приблизительно 3 миллиона строк и 6 тысяч столбцов.Я пытаюсь запустить summary() на каждом кадре данных, как показано ниже:

data = spark.read.parquet('/HDFS/path/to/parquet/file/test.parquet')
stats = data.summary()
stats_pd = stats.toPandas()

Однако Spark создает исключение

Caused by: org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "apply(Ljava/lang/Object;)Ljava/lang/Object;" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificMutableProjection" grows beyond 64 KB
    at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:361)
    at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:234)
    at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:446)
    at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313)
    at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235)
    at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:204)
    at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80)
    at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1417)
    ... 34 more
Caused by: org.codehaus.janino.InternalCompilerException: Code of method "apply(Ljava/lang/Object;)Ljava/lang/Object;" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificMutableProjection" grows beyond 64 KB
    at org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:990)
    at org.codehaus.janino.CodeContext.write(CodeContext.java:867)
    at org.codehaus.janino.UnitCompiler.writeOpcode(UnitCompiler.java:11901)
    at org.codehaus.janino.UnitCompiler.load(UnitCompiler.java:11542)
    at org.codehaus.janino.UnitCompiler.load(UnitCompiler.java:11536)
    at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4139)
    at org.codehaus.janino.UnitCompiler.access$7200(UnitCompiler.java:212)
    at org.codehaus.janino.UnitCompiler$12$1.visitLocalVariableAccess(UnitCompiler.java:4082)
    at org.codehaus.janino.UnitCompiler$12$1.visitLocalVariableAccess(UnitCompiler.java:4074)
    at org.codehaus.janino.Java$LocalVariableAccess.accept(Java.java:4103)
    at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4074)
    at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4070)
    at org.codehaus.janino.Java$Lvalue.accept(Java.java:3977)
    at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4070)
    at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4135)
    at org.codehaus.janino.UnitCompiler.access$6700(UnitCompiler.java:212)
    at org.codehaus.janino.UnitCompiler$12$1.visitAmbiguousName(UnitCompiler.java:4077)
    at org.codehaus.janino.UnitCompiler$12$1.visitAmbiguousName(UnitCompiler.java:4074)
    at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:4053)
    at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4074)
    at org.codehaus.janino.UnitCompiler$12.visitLvalue(UnitCompiler.java:4070)
    at org.codehaus.janino.Java$Lvalue.accept(Java.java:3977)
    at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4070)
    at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:5253)
    at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4842)
    at org.codehaus.janino.UnitCompiler.access$8300(UnitCompiler.java:212)
    at org.codehaus.janino.UnitCompiler$12.visitMethodInvocation(UnitCompiler.java:4097)
    at org.codehaus.janino.UnitCompiler$12.visitMethodInvocation(UnitCompiler.java:4070)
    at org.codehaus.janino.Java$MethodInvocation.accept(Java.java:4902)
    at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4070)
    at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:5253)
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:3468)
    at org.codehaus.janino.UnitCompiler.access$5100(UnitCompiler.java:212)
    at org.codehaus.janino.UnitCompiler$9.visitMethodInvocation(UnitCompiler.java:3447)
    at org.codehaus.janino.UnitCompiler$9.visitMethodInvocation(UnitCompiler.java:3419)
    at org.codehaus.janino.Java$MethodInvocation.accept(Java.java:4902)
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3419)
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2339)
    at org.codehaus.janino.UnitCompiler.access$1800(UnitCompiler.java:212)
    at org.codehaus.janino.UnitCompiler$6.visitExpressionStatement(UnitCompiler.java:1473)
    at org.codehaus.janino.UnitCompiler$6.visitExpressionStatement(UnitCompiler.java:1466)
    at org.codehaus.janino.Java$ExpressionStatement.accept(Java.java:2851)
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1466)
    at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1546)
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3075)
    at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336)
    at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309)
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799)
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958)
    at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212)
    at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393)
    at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385)
    at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286)
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385)
    at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1285)
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:825)
    at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:411)
    at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:212)
    at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:390)
    at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:385)
    at org.codehaus.janino.Java$PackageMemberClassDeclaration.accept(Java.java:1405)
    at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385)
    at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:357)
    ... 41 more

Может ли кто-нибудь пролить свет на это?

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...