Во время выполнения кода, приведенного ниже, появляется сообщение об ошибке ... azure Блок данных обрабатывает EDA.
df_typed = spark.sql("SELECT cast(Price as int),
cast(Age as int), cast(KM as int), FuelType,
cast(HP as int), cast(MetColor as int),
cast(Automatic as int), cast(CC as int),
cast(Doors as int), cast(Weight as int) FROM usedcars_CSV")
df_typed
Ошибка:
Py4JJavaError Traceback (most recent call last)
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
/databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
Py4JJavaError: An error occurred while calling o204.sql.
: org.apache.spark.sql.AnalysisException: cannot resolve '`Price`' given input columns: [default.usedcars_csv._c3, default.usedcars_csv._c6, default.usedcars_csv._c5, default.usedcars_csv._c1, default.usedcars_csv._c7, default.usedcars_csv._c0, default.usedcars_csv._c9, default.usedcars_csv._c2, default.usedcars_csv._c4, default.usedcars_csv._c8]; line 1 pos 12;
'Project [unresolvedalias(cast('Price as int), None), unresolvedalias(cast('Age as int), None), unresolvedalias(cast('KM as int), None), 'FuelType, unresolvedalias(cast('HP as int), None), unresolvedalias(cast('MetColor as int), None), unresolvedalias(cast('Automatic as int), None), unresolvedalias(cast('CC as int), None), unresolvedalias(cast('Doors as int), None), unresolvedalias(cast('Weight as int), None)]
+- SubqueryAlias `default`.`usedcars_csv`
+- Relation[_c0#347,_c1#348,_c2#349,_c3#350,_c4#351,_c5#352,_c6#353,_c7#354,_c8#355,_c9#356] csv
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:120)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$3.applyOrElse(CheckAnalysis.scala:111)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$6.apply(TreeNode.scala:303)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$6.apply(TreeNode.scala:303)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:76)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:302)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$8.apply(TreeNode.scala:353)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:207)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:351)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:300)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$8.apply(TreeNode.scala:353)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:207)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:351)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:300)