Lilly, Вы можете прочитать данные из csv с помощью pandas или создать pandas фрейм данных, как показано ниже, а затем преобразовать его в спарк-фрейм данных
import pandas as pd
data_1 = {
'district': ["Arba", "Arba", "Arba","Cebu", "Cebu"],
'item': ['coil', 'pen', 'hat','oil','pen'],
'rate_increase(%)': [500,-85,50,-40,1100]}
pandas_df = pd.DataFrame(data_1)
ddf_1 = spark.createDataFrame(pandas_df)
ddf_1.createOrReplaceTempView("ddf_1")
output = spark.sql("""
select district, item , `rate_increase(%)` from (
select row_number() over (partition by district order by `rate_increase(%)` desc) as RowNum, district,item, `rate_increase(%)` from ddf_1 where `rate_increase(%)` > 0 )
where RowNum <= 5 order by district, RowNum
""")
output.show()
+--------+----+----------------+
|district|item|rate_increase(%)|
+--------+----+----------------+
| Arba|coil| 500|
| Arba| hat| 50|
| Cebu| pen| 1100|
+--------+----+----------------+