1:后台启动jupyter
nohup jupyter lab --port 9000 --ip 0.0.0.0 &
2: 配置spark路径
import os import sys reload(sys) sys.setdefaultencoding("utf8") # spark_name = os.environ.get(‘SPARK_HOME‘,None) spark_name = ‘/home/work/local/spark/‘ # 注意spark路径 sys.path.insert(0, os.path.join(spark_name,‘python‘)) sys.path.insert(0, os.path.join(spark_name,‘python/lib/py4j-0.10.4-src.zip‘)) from pyspark.sql import session import pyspark.sql.functions as F from pyspark.sql.types import * user_name = "xxx" app_name = "spark_debug" spark = session.SparkSession.builder.appName("{user_name}-jupyter-{app_name}".format(user_name=user_name, app_name=app_name)) .config("spark.driver.maxResultSize", "6g") .config("spark.driver.memory", "8g") .config("spark.executor.memory", "12g") .config("spark.executor.instances", "32") .config("spark.dynamicAllocation.maxExecutors", "32") .config("spark.yarn.dist.archives", "hdfs://ns-fed/user/strategy/yudonghai/python.tgz#python") .config("spark.pyspark.python", "python/bin/python2.7") .config("spark.sql.shuffle.partitions", "500") .enableHiveSupport().getOrCreate() application_id = spark._sc.applicationId
注意手动kill任务释放资源