from pyspark.sql import SQLContext
from pyspark import SparkContext
from impala.dbapi import connect
import sys
sc = SparkContext("local", "first app")
sqlContext = SQLContext(sc)
with open(sys.argv[1]) as file:
config = yaml.safe_load(file)
print(config)
table_name = 'loadout'
df = sqlContext.read \
.format("jdbc") \
.option("url", config['db_config']['url']) \
.option("dbtable", "(SELECT TOP 100 * FROM dbo.{0})".format(table_name)) \
.option("user", config['db_config']['username']) \
.option("password", config['db_config']['password']) \
.option("driver", config['db_config']['driver']) \
.load()
df.show()
Выдает ошибку:
> Traceback (most recent call last): File
> "/home/rkumbar/ddl_generation/test.py", line 34, in <module>
> .option("driver", config['db_config']['driver']) \ File "/opt/cloudera/parcels/CDH-5.12.1-1.cdh5.12.1.p0.3/lib/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 139, in load File
> "/opt/cloudera/parcels/CDH-5.12.1-1.cdh5.12.1.p0.3/lib/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py", line 813, in __call__ File
> "/opt/cloudera/parcels/CDH-5.12.1-1.cdh5.12.1.p0.3/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py",
> line 45, in deco File
> "/opt/cloudera/parcels/CDH-5.12.1-1.cdh5.12.1.p0.3/lib/spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py",
> line 308, in get_return_value py4j.protocol.Py4JJavaError: An error
> occurred while calling o56.load. :
> com.microsoft.sqlserver.jdbc.SQLServerException: Incorrect syntax near
> the keyword 'WHERE'.
Моя команда отправки искры:
spark-submit --driver-class-path /home/xyz/ddl_generation/mssql-jdbc-6.1.0.jre7.jar test.py config.yml