hive
启动以下服务:
start-dfs.sh
start-yarn.sh
mapred --daemon start historyserver
/opt/installs/spark/sbin/start-history-server.sh
hive-server-manager.sh start metastore
import os
from pyspark.sql import SparkSession
"""
------------------------------------------
Description : TODO:
SourceFile : 02、spark
Author : null
Date : 2024/11/6
-------------------------------------------
"""
if __name__ == '__main__':
# 配置环境
os.environ['JAVA_HOME'] = 'E:/java-configuration/jdk-8'
# 配置Hadoop的路径,就是前面解压的那个路径
os.environ['HADOOP_HOME'] = 'E:/applications/bigdata_config/hadoop-3.3.1/hadoop-3.3.1'
# 配置base环境Python解析器的路径
os.environ['PYSPARK_PYTHON'] = 'C:/Users/35741/miniconda3/python.exe'
# 配置base环境Python解析器的路径
os.environ['PYSPARK_DRIVER_PYTHON'] = 'C:/Users/35741/miniconda3/python.exe'
os.environ['HADOOP_USER_NAME'] = 'root'
spark = SparkSession.builder \
.master("local[2]") \
.appName("第一个sparksql案例") \
.config("spark.sql.warehouse.dir", 'hdfs://shucang:9820/user/hive/warehouse') \
.config('hive.metastore.uris', 'thrift://shucang:9083') \
.config("spark.sql.shuffle.partitions", 2) \
.enableHiveSupport() \
.getOrCreate()
# 此时spark已经知道hive有什么数据库 ,该数据库中有什么表了,但是没有use
spark.sql("select * from yhdb01.sql2_1").createOrReplaceTempView("sql2_1")
hiveDf = spark.sql("select * from sql2_1")
# 写入hive的数据库中
# 需要有库吗 ——需要 需要有这个表吗 —— 不需要 但是记得判空 hive中没有空类型
hiveDf.write.saveAsTable("yhdb01.sql22_1",mode="overwrite")
spark.stop()
mysql
# 不需要事先将表创建好
import os
from pyspark.sql import SparkSession
"""
------------------------------------------
Description : TODO:
SourceFile : 02、spark
Author : null
Date : 2024/11/6
-------------------------------------------
"""
if __name__ == '__main__':
# 配置环境
os.environ['JAVA_HOME'] = 'E:/java-configuration/jdk-8'
# 配置Hadoop的路径,就是前面解压的那个路径
os.environ['HADOOP_HOME'] = 'E:/applications/bigdata_config/hadoop-3.3.1/hadoop-3.3.1'
# 配置base环境Python解析器的路径
os.environ['PYSPARK_PYTHON'] = 'C:/Users/35741/miniconda3/python.exe'
# 配置base环境Python解析器的路径
os.environ['PYSPARK_DRIVER_PYTHON'] = 'C:/Users/35741/miniconda3/python.exe'
spark = SparkSession.builder \
.master("local[2]") \
.appName("第一个sparksql案例") \
.config("spark.sql.shuffle.partitions", 2) \
.getOrCreate()
df = spark.read.json("../../data/sql/person.json")
df.createOrReplaceTempView("person")
# 获取一个dataFrame
dfMysql = spark.sql("""
select * from person
""")
# 第一种方式:format
dfMysql.write.format("jdbc") \
.option("driver", "com.mysql.cj.jdbc.Driver") \
.option("url", "jdbc:mysql://localhost:3306/mydb01?characterEncoding=UTF-8") \
.option("user", "root") \
.option("password", "root") \
.option("dbtable", "person") \
.save(mode="overwrite")
# append
# 第二种方式:jdbc
dictUsername = {"user": "root", "password": "root"}
dfMysql.write.jdbc(url="jdbc:mysql://localhost:3306/mydb01?characterEncoding=UTF-8",table="person",properties=dictUsername,mode="append")
spark.stop()