sparksql_分析航线数据_DataFrame场景
# Set File Paths
# 设置文件路径 读取csv文件
flightPerfFilePath = "/databricks-datasets/flights/departuredelays.csv"
airportsFilePath = "/databricks-datasets/flights/airport-codes-na.txt"
# 获得起飞延时数据集 加载数据
# Obtain Airports dataset
airports = spark.read.csv(airportsFilePath, header='true', inferSchema='true', sep='\t')
airports.createOrReplaceTempView("airports")
# 获得起飞延迟数据集
# Obtain Departure Delays dataset
flightPerf = spark.read.csv(flightPerfFilePath, header='true')
# 建立临时视图
flightPerf.createOrReplaceTempView("FlightPerformance")
# Cache the Departure Delays dataset
# 缓存起飞数据集 方便后续加快查询
flightPerf.cache()
参考此博客:
https://blog.csdn.net/wangxw1803/article/details/87182997