Чтобы ответить на ваш вопрос, диапазон (32) просто указывает количество столбцов, к которым класс StrucField может быть применен для требуемой схемы. В вашем случае 30 столбцов. На основе ваших данных мне удалось создать фрейм данных, используя ниже logi c:
from pyspark.sql.functions import *
from pyspark.sql.types import *
data_json = {"code_event": "1092406", "code_event_system": "LOTTO", "company_id": "2", "date_event": "2020-05-27 12:00:00.000",
"date_event_real": "0001-01-01 00:00:00.000", "ecode_class": "", "ecode_event": "183", "eperiod_event": "",
"etl_date": "2020-05-27", "event_no": 1, "group_no": 0, "name_event": "Ungaria Putto - 8/20", "name_event_short": "Ungaria Putto - 8/20",
"odd_coefficient": 1, "odd_coefficient_entry": 1, "odd_coefficient_user": 1, "odd_ekey": "11", "odd_name": "11", "odd_status": "",
"odd_type": "11", "odd_voidfactor": 0, "odd_win_types": "", "special_bet_value": "", "ticket_id": "899M-E2X93P", "id_update": 8000001036823656,
"topic_group": "cwg5", "kafka_key": "899M-E2X93P", "kafka_epoch": 1590580609424, "kafka_partition": 0, "kafka_topic": "tickets-calculated_2"}
column_names = [x for x in data_json.keys()]
row_data = [([x for x in data_json.values()])]
input = []
for i in column_names:
if str(type(data_json[i])).__contains__('str') :
input.append(StructField(str(i), StringType(), True))
elif str(type(data_json[i])).__contains__('int') and len(str(data_json[i])) <= 8:
input.append(StructField(str(i), IntegerType(), True))
else :
input.append(StructField(str(i), LongType(), True))
schema = StructType(input)
data = spark.createDataFrame(row_data, schema)
data.show()
Вывод
# +----------+-----------------+----------+--------------------+--------------------+-----------+-----------+-------------+----------+--------+--------+--------------------+--------------------+---------------+---------------------+--------------------+--------+--------+----------+--------+--------------+-------------+-----------------+-----------+----------------+-----------+-----------+-------------+---------------+--------------------+
# |code_event|code_event_system|company_id| date_event| date_event_real|ecode_class|ecode_event|eperiod_event| etl_date|event_no|group_no| name_event| name_event_short|odd_coefficient|odd_coefficient_entry|odd_coefficient_user|odd_ekey|odd_name|odd_status|odd_type|odd_voidfactor|odd_win_types|special_bet_value| ticket_id| id_update|topic_group| kafka_key| kafka_epoch|kafka_partition| kafka_topic|
# +----------+-----------------+----------+--------------------+--------------------+-----------+-----------+-------------+----------+--------+--------+--------------------+--------------------+---------------+---------------------+--------------------+--------+--------+----------+--------+--------------+-------------+-----------------+-----------+----------------+-----------+-----------+-------------+---------------+--------------------+
# | 1092406| LOTTO| 2|2020-05-27 12:00:...|0001-01-01 00:00:...| | 183| |2020-05-27| 1| 0|Ungaria Putto - 8/20|Ungaria Putto - 8/20| 1| 1| 1| 11| 11| | 11| 0| | |899M-E2X93P|8000001036823656| cwg5|899M-E2X93P|1590580609424| 0|tickets-calculated_2|
# +----------+-----------------+----------+--------------------+--------------------+-----------+-----------+-------------+----------+--------+--------+--------------------+--------------------+---------------+---------------------+--------------------+--------+--------+----------+--------+--------------+-------------+-----------------+-----------+----------------+-----------+-----------+-------------+---------------+--------------------+