1、设置块大小 sc.hadoopConfiguration.setLong(fs.local.block.size,128x1024x1024)
2、读取csv
1 val schema = "name string, age int, job string" 2 val df3 = spark.read 3 .options(Map(("delimiter", ";"), ("header", "true"))) 4 .schema(schema) 5 .csv("data/people2.csv") 6 df3.printSchema() 7 8 9 val df4 = spark.read 10 .option("delimiter", ";") 11 .option("header", "true") 12 .option("inferschema", "true") 13 .csv("data/people2.csv") 14 df4.printSchema() 15 df4.show