Функция array_contains () не допускает null
во втором параметре.Чтобы проверить, имеет ли массив значение null, вы можете сделать sort_array (), установив ascending = true.Затем, если первый элемент имеет значение null, вы можете проверить его снова isnull (sort_array (col (a), true) (0))
Проверьте это:
scala> val df = spark.read.format("json").option("multiLine","true").load("/tmp/stack/tanvi.json").toDF("id")
df: org.apache.spark.sql.DataFrame = [id: struct<adList: array<struct<a:string,b:string,c:bigint,optionalField:string>>>]
scala> df.printSchema
root
|-- id: struct (nullable = true)
| |-- adList: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- a: string (nullable = true)
| | | |-- b: string (nullable = true)
| | | |-- c: long (nullable = true)
| | | |-- optionalField: string (nullable = true)
scala> df.select(sort_array(df("id.adList.optionalField"),true)(0),size(df("id.adList.optionalField"))).show(false)
+---------------------------------------------------------------+------------------------------------------------+
|sort_array(id.adList.optionalField AS `optionalField`, true)[0]|size(id.adList.optionalField AS `optionalField`)|
+---------------------------------------------------------------+------------------------------------------------+
|null |2 |
+---------------------------------------------------------------+------------------------------------------------+
scala> df.select(sort_array(df("id.adList.optionalField"),true)(1),size(df("id.adList.optionalField"))).show(false)
+---------------------------------------------------------------+------------------------------------------------+
|sort_array(id.adList.optionalField AS `optionalField`, true)[1]|size(id.adList.optionalField AS `optionalField`)|
+---------------------------------------------------------------+------------------------------------------------+
|null |2 |
+---------------------------------------------------------------+------------------------------------------------+
scala> df.select(isnull(sort_array(df("id.adList.optionalField"),true)(0)),size(df("id.adList.optionalField"))).show(false)
+-------------------------------------------------------------------------+------------------------------------------------+
|(sort_array(id.adList.optionalField AS `optionalField`, true)[0] IS NULL)|size(id.adList.optionalField AS `optionalField`)|
+-------------------------------------------------------------------------+------------------------------------------------+
|true |2 |
+-------------------------------------------------------------------------+------------------------------------------------+
scala>