Я поворачиваю столбец, и он генерирует несколько новых столбцов.
Я бы хотел получить эти столбцы и упаковать их под полем.
Ниже код дает мне желаемый результат.
Но я вручную выбираю col("search"), col("main"), col("theme")
, мне интересно, есть ли способ динамического выбора всех этих столбцов (можно сказать, поворотные столбцы?))
# I'm going to pivot on the 2nd column
mylist = [
[1, 'search', 3, 1],
[1, 'search', 3, 2],
[1, 'main', 5, 3],
[1, 'main', 6, 4],
[2, 'search', 4, 10],
[2, 'search', 4, 11],
[2, 'main', 6, 12],
[2, 'main', 6, 13],
[2, 'theme', 6, 14],
[3, 'search', 4, 5],
[3, 'main', 6, 6],
[3, 'main', 6, 7],
[3, 'theme', 6, 8],
]
df = pd.DataFrame(mylist, columns=['id', 'origin', 'time', 'screen_index'])
mylist = df.to_dict('records')
spark_session = get_spark_session()
df = spark_session.createDataFrame(Row(**x) for x in mylist)
df_wanted = df.groupBy("id").pivot('origin').agg(
struct(count(lit(1)).alias('count'), avg("time").alias('avg_time'))
).withColumn(
#### here I'm manually selecting columns, but want to grab them dynamically because I don't know beforehand what they gonna be.
"origin_info", struct(col("search"), col("main"), col("theme"))
).select("id", "origin_info")
df_wanted.printSchema()
root
|-- id: long (nullable = true)
|-- origin_info: struct (nullable = false)
| |-- search: struct (nullable = false)
| | |-- count: long (nullable = false)
| | |-- avg_time: double (nullable = true)
| |-- main: struct (nullable = false)
| | |-- count: long (nullable = false)
| | |-- avg_time: double (nullable = true)
| |-- theme: struct (nullable = false)
| | |-- count: long (nullable = false)
| | |-- avg_time: double (nullable = true)