>>> df = spark.createDataFrame(data, ["Name", "Age"])
>>> df.show()
忽略了:+ 1) / 1]
25/02/05 10:15:06 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)
org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:612)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:594)
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:789)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:766)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:525)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
25/02/05 10:15:06 ERROR TaskSetManager: Task 0 in stage 0.0 failed 1 times; aborting job
Traceback (most recent call last):
File "<python-input-4>", line 1, in <module>
df.show()
~~~~~~~^^
File "D:\Tools2\spark-3.5.4-bin-hadoop3\python\pyspark\sql\dataframe.py", line 947, in show
print(self._show_string(n, truncate, vertical))
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Tools2\spark-3.5.4-bin-hadoop3\python\pyspark\sql\dataframe.py", line 965, in _show_string
return self._jdf.showString(n, 20, vertical)
~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "D:\Tools2\spark-3.5.4-bin-hadoop3\python\lib\py4j-0.10.9.7-src.zip\py4j\java_gateway.py", line 1322, in __call__
return_value = get_return_value(
answer, self.gateway_client, self.target_id, self.name)
File "D:\Tools2\spark-3.5.4-bin-hadoop3\python\pyspark\errors\exceptions\captured.py", line 179, in deco
return f(*a, **kw)
File "D:\Tools2\spark-3.5.4-bin-hadoop3\python\lib\py4j-0.10.9.7-src.zip\py4j\protocol.py", line 326, in get_return_value
raise Py4JJavaError(
"An error occurred while calling {0}{1}{2}.\n".
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o56.showString.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0) (MDXN01072079.mshome.net executor driver): org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:612)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:594)
at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:789)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:766)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:525)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
<_io.BufferedRWPair object at 0x000001C42C971140>
#!/usr/bin/bash
which python
python --version
echo $PYSPARK_PYTHON
echo $SPARK_HOME
which spark-shell
echo $HADOOP_HOME
which winutils
....
脚本的结果是:
/c/Python313/python
Python 3.13.1
C:\Python313\python.exe
D:\Tools2\spark-3.5.4-bin-hadoop3
/c/Python313/Scripts/spark-shell
D:\Tools2\hadoop
/d/Tools2/hadoop/bin/winutils
$ spark_home/bin在路径上:-)代码绝对可以。我怀疑问题是由于您使用的火花版与您拥有的Python版本之间的不相容性。 请降级到Python3.12,看看是否有帮助。