Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
// :: ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable C:\notos\software\hadoop\hadoop-2.7.\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:)
at org.apache.hadoop.security.SecurityUtil.getAuthenticationMethod(SecurityUtil.java:)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:)
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:)
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:)
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:)
at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$.apply(Utils.scala:)
at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$.apply(Utils.scala:)
at scala.Option.getOrElse(Option.scala:)
at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$.apply(SparkSession.scala:)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$.apply(SparkSession.scala:)
at scala.Option.getOrElse(Option.scala:)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:)
at com.jason.test.spark.PipelineExample$.main(PipelineExample.scala:)
at com.jason.test.spark.PipelineExample.main(PipelineExample.scala)
// :: WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
// :: WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS
// :: WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS
// :: ERROR Executor: Exception in task 0.0 in stage 12.0 (TID )
java.io.IOException: (null) entry in command string: null chmod C:\notos\tmp\spark-logistic-regression-model\metadata\_temporary\\_temporary\attempt_20181022220551_0012_m_000000_48\part-
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:)
at org.apache.spark.internal.io.SparkHadoopWriter.open(SparkHadoopWriter.scala:)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$$$anonfun$.apply(PairRDDFunctions.scala:)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$$$anonfun$.apply(PairRDDFunctions.scala:)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:)
at org.apache.spark.scheduler.Task.run(Task.scala:)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:)
at java.lang.Thread.run(Thread.java:)
win10 重装了系统,在idea 中运行spark 往本地写文件时报上述错误,解决方法
官网下载 hadoop 在本地解压,然后下载hadooponwindows-master.zip ,解压,用其中的 bin 和 etc 替换hadoop 中的bin 和etc ,为hadoop配置环境变量,
重启idea ,则运行正常