Scala Проблема с Spark Bigquery Connector - InternalException: grp c .StatusRuntimeException - PullRequest
1 голос
/ 08 апреля 2020

Я получаю следующую ошибку при локальном запуске кода Spark Scala:

Exception in thread "main" com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.InternalException: com.google.cloud.spark.bigquery.repackaged.io.grpc.StatusRuntimeException: INTERNAL: request failed: internal error
    at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.ApiExceptionFactory.createException(ApiExceptionFactory.java:67)
    at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.grpc.GrpcApiExceptionFactory.create(GrpcApiExceptionFactory.java:72)
    at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.grpc.GrpcApiExceptionFactory.create(GrpcApiExceptionFactory.java:60)
    at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.grpc.GrpcExceptionCallable$ExceptionTransformingFuture.onFailure(GrpcExceptionCallable.java:97)
    at com.google.cloud.spark.bigquery.repackaged.com.google.api.core.ApiFutures$1.onFailure(ApiFutures.java:68)
    at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.Futures$CallbackListener.run(Futures.java:1072)
    at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.DirectExecutor.execute(DirectExecutor.java:30)
    at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.AbstractFuture.executeListener(AbstractFuture.java:1164)
    at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.AbstractFuture.complete(AbstractFuture.java:958)
    at com.google.cloud.spark.bigquery.repackaged.com.google.common.util.concurrent.AbstractFuture.setException(AbstractFuture.java:749)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.stub.ClientCalls$GrpcFuture.setException(ClientCalls.java:522)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.stub.ClientCalls$UnaryStreamToFuture.onClose(ClientCalls.java:497)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl.closeObserver(ClientCallImpl.java:426)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl.access$500(ClientCallImpl.java:66)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.close(ClientCallImpl.java:689)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.access$900(ClientCallImpl.java:577)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInternal(ClientCallImpl.java:751)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInContext(ClientCallImpl.java:740)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
    at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
    Suppressed: com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.AsyncTaskException: Asynchronous task failed
        at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.ApiExceptions.callAndTranslateApiException(ApiExceptions.java:57)
        at com.google.cloud.spark.bigquery.repackaged.com.google.api.gax.rpc.UnaryCallable.call(UnaryCallable.java:112)
        at com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageClient.createReadSession(BigQueryStorageClient.java:237)
        at com.google.cloud.spark.bigquery.direct.DirectBigQueryRelation.buildScan(DirectBigQueryRelation.scala:139)
        at org.apache.spark.sql.execution.datasources.DataSourceStrategy.$anonfun$apply$2(DataSourceStrategy.scala:293)
        at org.apache.spark.sql.execution.datasources.DataSourceStrategy.$anonfun$pruneFilterProject$1(DataSourceStrategy.scala:326)
        at org.apache.spark.sql.execution.datasources.DataSourceStrategy.pruneFilterProjectRaw(DataSourceStrategy.scala:381)
        at org.apache.spark.sql.execution.datasources.DataSourceStrategy.pruneFilterProject(DataSourceStrategy.scala:325)
        at org.apache.spark.sql.execution.datasources.DataSourceStrategy.apply(DataSourceStrategy.scala:293)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:480)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:486)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:485)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
        at scala.collection.TraversableOnce.$anonfun$foldLeft$1(TraversableOnce.scala:156)
        at scala.collection.TraversableOnce.$anonfun$foldLeft$1$adapted(TraversableOnce.scala:156)
        at scala.collection.Iterator.foreach(Iterator.scala:937)
        at scala.collection.Iterator.foreach$(Iterator.scala:937)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
        at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:156)
        at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:154)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1425)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
        at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:480)
        at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:486)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
        at org.apache.spark.sql.execution.CacheManager.$anonfun$cacheQuery$1(CacheManager.scala:100)
        at org.apache.spark.sql.execution.CacheManager.writeLock(CacheManager.scala:67)
        at org.apache.spark.sql.execution.CacheManager.cacheQuery(CacheManager.scala:91)
        at org.apache.spark.sql.Dataset.persist(Dataset.scala:2963)
        at org.apache.spark.sql.Dataset.cache(Dataset.scala:2973)
        at Transform$.main(Transform.scala:22)
        at Transform.main(Transform.scala)
Caused by: com.google.cloud.spark.bigquery.repackaged.io.grpc.StatusRuntimeException: INTERNAL: request failed: internal error
    at com.google.cloud.spark.bigquery.repackaged.io.grpc.Status.asRuntimeException(Status.java:533)
    ... 16 more

ниже моя зависимость Maven

<dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.12</artifactId>
            <version>2.4.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.12</artifactId>
            <version>2.4.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.google.cloud.spark/spark-bigquery -->

        <dependency>
            <groupId>com.google.cloud.spark</groupId>
            <artifactId>spark-bigquery-with-dependencies_2.12</artifactId>
            <version>0.14.0-beta</version>
        </dependency>

Я запускаю пример кода Scala от [здесь] (https://cloud.google.com/dataproc/docs/tutorials/bigquery-connector-spark-example).

1 Ответ

1 голос
/ 08 апреля 2020

Пожалуйста, обновите ссылку на таблицу на bigquery-public-data.samples.shakespeare. Похоже, старая ссылка publicdata на наборы данных BigQuery publi c больше не действительна.

...