Сам разобрался, приведенное ниже решение может кому-то помочь.
from pyspark.sql.types import ArrayType,IntegerType
from pyspark.sql.functions import regexp_replace
import numpy as np
def to_np_array(x):
height = 200
width = 200
nChannels = 3
return np.reshape(x, (height,width,nChannels)).tolist()
spark_to_np_array = udf(to_np_array, ArrayType(ArrayType(ArrayType(IntegerType()))))
imagesdf = spark.read.format("image").load("/mnt/images/*", inferschema=True)
imagesdf = imagesdf.withColumn("FileName", regexp_replace('image.origin', 'dbfs:/mnt/images/', ''))
imagesdf = imagesdf.withColumn("ImageArray", spark_to_np_array(imagesdf["image.data"])).select("FileName","ImageArray")
data:image/s3,"s3://crabby-images/57084/5708410005ae5dd8dbc313831b414948e8026977" alt="The Resulting Schema looks like this"