В PySpark я хочу вычислить корреляцию между двумя векторами данных, используя следующий код (у меня нет проблем с импортом pyspark или createDataFrame):
from pyspark.ml.linalg import Vectors
from pyspark.ml.stat import Correlation
import pyspark
spark = pyspark.sql.SparkSession.builder.master("local[*]").getOrCreate()
data = [(Vectors.sparse(4, [(0, 1.0), (3, -2.0)]),),
(Vectors.dense([4.0, 5.0, 0.0, 3.0]),)]
df = spark.createDataFrame(data, ["features"])
r1 = Correlation.corr(df, "features").head()
print("Pearson correlation matrix:\n" + str(r1[0]))
Но я получил AttributeError(AttributeError: объект 'NoneType' не имеет атрибута 'setCallSite') как:
AttributeError Traceback (most recent call last)
<ipython-input-136-d553c1ade793> in <module>()
6 df = spark.createDataFrame(data, ["features"])
7
----> 8 r1 = Correlation.corr(df, "features").head()
9 print("Pearson correlation matrix:\n" + str(r1[0]))
/usr/local/lib/python3.6/dist-packages/pyspark/sql/dataframe.py in head(self, n)
1130 """
1131 if n is None:
-> 1132 rs = self.head(1)
1133 return rs[0] if rs else None
1134 return self.take(n)
/usr/local/lib/python3.6/dist-packages/pyspark/sql/dataframe.py in head(self, n)
1132 rs = self.head(1)
1133 return rs[0] if rs else None
-> 1134 return self.take(n)
1135
1136 @ignore_unicode_prefix
/usr/local/lib/python3.6/dist-packages/pyspark/sql/dataframe.py in take(self, num)
502 [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
503 """
--> 504 return self.limit(num).collect()
505
506 @since(1.3)
/usr/local/lib/python3.6/dist-packages/pyspark/sql/dataframe.py in collect(self)
463 [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
464 """
--> 465 with SCCallSiteSync(self._sc) as css:
466 port = self._jdf.collectToPython()
467 return list(_load_from_socket(port, BatchedSerializer(PickleSerializer())))
/usr/local/lib/python3.6/dist-packages/pyspark/traceback_utils.py in __enter__(self)
70 def __enter__(self):
71 if SCCallSiteSync._spark_stack_depth == 0:
---> 72 self._context._jsc.setCallSite(self._call_site)
73 SCCallSiteSync._spark_stack_depth += 1
74
AttributeError: 'NoneType' object has no attribute 'setCallSite'
Любое решение?