Я хотел бы знать, как читать таблицу в Spark, используя разделы. В основном у меня есть следующий код, который определяет, в каком разделе находится мой «company_idx»:
def get_partitionValue(company_idx):
if company_idx<500:
partitionValue = 'p500'
elif company_idx<1000 and company_idx>500:
partitionValue = 'p1000'
elif company_idx<1500 and company_idx>1000:
partitionValue = 'p1500'
elif company_idx<2000 and company_idx>1500:
partitionValue = 'p2000'
elif company_idx<2500 and company_idx>2000:
partitionValue = 'p2500'
elif company_idx<3000 and company_idx>2500:
partitionValue = 'p3000'
elif company_idx<3500 and company_idx>3000:
partitionValue = 'p3500'
elif company_idx<4000 and company_idx>3500:
partitionValue = 'p4000'
elif company_idx<4500 and company_idx>4000:
partitionValue = 'p4500'
elif company_idx<5000 and company_idx>4500:
partitionValue = 'p5000'
elif company_idx<5500 and company_idx>5000:
partitionValue = 'p5500'
elif company_idx<6000 and company_idx>5500:
partitionValue = 'p6000'
elif company_idx<6500 and company_idx>6000:
partitionValue = 'p6500'
elif company_idx<7000 and company_idx>6500:
partitionValue = 'p7000'
elif company_idx<7500 and company_idx>7000:
partitionValue = 'p7500'
elif company_idx<8000 and company_idx>7500:
partitionValue = 'p8000'
elif company_idx<8500 and company_idx>8000:
partitionValue = 'p8500'
elif company_idx<9000 and company_idx>8500:
partitionValue = 'p9000'
elif company_idx<9500 and company_idx>9000:
partitionValue = 'p9500'
elif company_idx<10000 and company_idx>9500:
partitionValue = 'p10000'
elif company_idx<10500 and company_idx>10000:
partitionValue = 'p10500'
elif company_idx<11000 and company_idx>10500:
partitionValue = 'p11000'
elif company_idx<11500 and company_idx>11000:
partitionValue = 'p11500'
elif company_idx<12000 and company_idx>11500:
partitionValue = 'p12000'
elif company_idx<12500 and company_idx>12000:
partitionValue = 'p12500'
elif company_idx<13000 and company_idx>12500:
partitionValue = 'p13000'
elif company_idx<13500 and company_idx>13000:
partitionValue = 'p13500'
elif company_idx<14000 and company_idx>13500:
partitionValue = 'p14000'
elif company_idx<14500 and company_idx>14000:
partitionValue = 'p14500'
elif company_idx<15000 and company_idx>14500:
partitionValue = 'p15000'
elif company_idx<15500 and company_idx>15000:
partitionValue = 'p15500'
elif company_idx<16000 and company_idx>15500:
partitionValue = 'p16000'
elif company_idx<16500 and company_idx>16000:
partitionValue = 'p16500'
elif company_idx<17000 and company_idx>16500:
partitionValue = 'p17000'
elif company_idx<17500 and company_idx>17000:
partitionValue = 'p17500'
elif company_idx<18000 and company_idx>17500:
partitionValue = 'p18000'
elif company_idx<18500 and company_idx>18000:
partitionValue = 'p18500'
elif company_idx<19000 and company_idx>18500:
partitionValue = 'p19000'
elif company_idx<19500 and company_idx>19000:
partitionValue = 'p19500'
elif company_idx<20000 and company_idx>119500:
partitionValue = 'p20000'
else:
partitionValue = 'pMAX'
return partitionValue
Мой код следующий:
dataframe_mysql = sqlContext.read.format("jdbc").options(
url ="jdbc:mysql://IP_ADDRESS/"+newDatabaseName,
driver="com.mysql.jdbc.Driver",
dbtable="Scores",
user="UUSSSERRRRR",
password="XXXXXXX"
).load()
dataframe_mysql.registerTempTable("Scores")
start_date = '2010-01-01'#event["start_date"]
end_date = '2019-06-18'#event["end_date"]
company_idx=1
partitionValue = get_partitionValue(company_idx)
sql_query = "SELECT * FROM Scores PARTITION ("+partitionValue+") where company_idx="+str(company_idx)
sqlContext.sql(sql_query).show()
Но вышеприведенное, кажется, неработать, как я получаю следующую ошибку:
** AnalysisException: 'Количество псевдонимов столбцов не соответствует числу столбцов. Количество псевдонимов столбцов: 1;количество столбцов: 57 .;линия 1 поз 14 '**