У меня есть список столбцов ['col1','col2','col3']
в spark DataFrame, который я хочу привести.Я попробовал ниже, но, похоже, это не работает.Я попробовал с приведением к DECIMAL (3,2) и INT из DECIMAL (11,3) тип данных
create table database_nm.test_kou
(
YEAR_MNTH_NBR INT,
PARTN_ID TINYINT,
EMP_NAME STRING,
FST_AMT DECIMAL(11,3),
SND_AMT DECIMAL(11,3),
FST_PCT DECIMAL(11,3),
SND_PCT DECIMAL(11,3),
TRD_AMT DECIMAL(11,3),
TRD_PCT DECIMAL(11,3),
FTH_AMT DECIMAL(11,3)
);
INSERT INTO database_nm.test_kou VALUES
(201812,1,'Kou',11.11,12.12,13.13,14.14,15.15,16.16,17.17);
INSERT INTO database_nm.test_kou VALUES
(201812,1,'Cha',21.21,22.22,23.23,24.24,25.25,26.26,27.27);
INSERT INTO database_nm.test_kou VALUES
(201812,1,'Cha',21.21,22.22,23.23,24.24,25.25,100.00,27.27);
Мой код pyspark:
col_unfmt=sql("select * from database_nm.test_kou")
colWithAmt = [column for column in col_unfmt.columns if '_amt' in column]
colWithPct = [column for column in col_unfmt.columns if '_pct' in column]
colWithRemainings = [
column for column in col_unfmt.columns if column not in colWithPct+colWithAmt
]
selectAmtColsExpr = [lit(y).cast("int").alias(y) for y in colWithAmt]
selectPctColsExpr = [(lit(z)/lit(100)).cast("decimal(3,2)").alias(z) for z in colWithPct]
selectRemainingColsExpr = colWithRemainings
col_unfmt.select(selectRemainingColsExpr+selectPctColsExpr+selectAmtColsExpr).show()
+-------------+--------+--------+-------+-------+-------+-------+-------+-------+-------+
|year_mnth_nbr|partn_id|emp_name|fst_pct|snd_pct|trd_pct|fst_amt|snd_amt|trd_amt|fth_amt|
+-------------+--------+--------+-------+-------+-------+-------+-------+-------+-------+
| 201812| 1| Kou| null| null| null| null| null| null| null|
| 201812| 1| Cha| null| null| null| null| null| null| null|
+-------------+--------+--------+-------+-------+-------+-------+-------+-------+-------+