day31
今日内容
1.进程
2.数据共享
3.锁
4.进程池
5.模块(爬虫)
requests
bs4(beautifulsoup)
6.协程
内容回顾
-
GIL锁
-
对数据进行控制,决定到底释放一个还是多个。
-
-
进程和线程的区别?
-
第一
-
进程是cpu资源分配的最小单元
-
线程是cpu计算的最小单元
-
-
第二
-
一个进程中可以有多个线程。
-
-
第三
-
对于python来说他的进程和线程和其他语言有差异,是有GIL锁
-
GIL锁保证一个进程中同一时刻只有一个线程被bpu调度
-
-
注意:IO密集型操作可以使用多线程;计算密集型可以使用多线程;
-
-
Lock和RLock
-
线程池
今日详细
进程
#mac系统不会报错#win系统会报错
def task(arg):
print(arg)
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
#这样win系统就不会报错了
def task(arg):
print(arg)
if __name__ =="__main__":
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
不共享
import multiprocssing
import threading
import queue
data_list = []
def task(arg):
data_list.append(arg)
print(data_list)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
if __name__ =="__main__":
run()
进程常用功能
import multiprocssing
import threading
import queue
def task(arg):
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
p.join()#等待进程
if __name__ =="__main__":
run()
###########################
def task(arg):
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.daemon = False#True不等子进程,相反
p.start()
if __name__ =="__main__":
run()
###################
def task(arg):
#获取当前进程
p = multiprocessing.current_process()
print(p.name)
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.name = "pp1"#给进程命名
p.start()
if __name__ =="__main__":
run()
#########################
def task(arg):
#获取当前进程
p = multiprocessing.current_process()
print(p.ident)#打印当前进程
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.name = "pp1"#给进程命名
p.start()
if __name__ =="__main__":
run()
面向对象创建进程
import multiprocssing
import threading
import queue
class MyProcess(multiprocessing.Process):
def task(arg):
def run(self):
print("当前进程",multiprocessing.current_process())
def run():
p.MyProcess()
p.start()
if __name__ =="__main__":
run()
进程间的数据共享
先进先出 队列先进后出 站
进程间的共享
#linux这样写
import multiprocessing
q = multiprocessing.Qurue()
def task(arg,q):
q.put(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
while True:
v = q.get()
print(v)
if __name__ == "__main__"
run()
############################
#windows这样写
import multiprocessing
def task(arg,q):
q.put(arg)
if __name__ == "__main__"
q = multiprocessing.Qurue()
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
while True:
v = q.get()
print(v)
Manager
#linux这样写
import multiprocessing
q = multiprocessing.Manager()
dic = m.dict()
def task(arg,q):
dic[arg] = 100
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
p.start()
input(">>>")
print(dic.values())
if __name__ == "__main__"
run()
############################
#windows这样写
import multiprocessing
def task(arg,q):
dic[arg] = 100
if __name__ == "__main__"
m = multiprocessing.Manager()
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,m,))
p.join()
print(dic)
######################2
import multiprocessing
def task(arg,q):
dic[arg] = 100
if __name__ == "__main__"
m = multiprocessing.Manager()
dic = m.dict()
process_list[]
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,cic))
p.start()
process_list.append(p)
while True:
count = 0
for p in process_list:
if not p.is_alive():#True是还在执行,Fasle是已执行完
cunt +=1
if count == len(process_list):
break
print(dic)
进程锁
import time
import multiprocessing
lock = multiprocessing.RLock()
def task(arg):
print("鬼子来了")
lock.acquire()
time.sleep(2)
print(arg)
lock.release()
if __name__ == "__main__":
p1 = multiprocessing.Process(target=task,args=1)
p1.start()
p2 = multiprocessing.Process(target=task,args=1)
p2.start()
进程池
import time
from comcurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
# #############################线程池
def task (arg):
time.sleep(2)
print(arg)
if __name__ == "__main__":
pool = ThreadPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
# #############################进程池
def task (arg):
time.sleep(2)
print(arg)
if __name__ == "__main__":
pool = ProcessPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
爬虫
需要安装这两个模块
pip install requests
pip install beautifulsoup4
import requests
from bs4 import BeautifulSoup
#模拟浏览器发送请求
r1 = requests.get(
url="xxx",
headers={"User-Agent":sssssssssssssssssssssssssssssssssssssssssss})
#输出字节
print(r1.content)
#查看下载下来的文本信息
print(r1.text)
soup = BeautifulSoup(r1.text,"html.parser")
osup.find("div,attrs={"id":"content-list"}")
for child in content_list.children:
contet
day31
今日内容
1.进程
2.数据共享
3.锁
4.进程池
5.模块(爬虫)
requests
bs4(beautifulsoup)
6.协程
内容回顾
-
GIL锁
-
对数据进行控制,决定到底释放一个还是多个。
-
-
进程和线程的区别?
-
第一
-
进程是cpu资源分配的最小单元
-
线程是cpu计算的最小单元
-
-
第二
-
一个进程中可以有多个线程。
-
-
第三
-
对于python来说他的进程和线程和其他语言有差异,是有GIL锁
-
GIL锁保证一个进程中同一时刻只有一个线程被bpu调度
-
-
注意:IO密集型操作可以使用多线程;计算密集型可以使用多线程;
-
-
Lock和RLock
-
线程池
今日详细
进程
#mac系统不会报错#win系统会报错
def task(arg):
print(arg)
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
#这样win系统就不会报错了
def task(arg):
print(arg)
if __name__ =="__main__":
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
不共享
import multiprocssing
import threading
import queue
data_list = []
def task(arg):
data_list.append(arg)
print(data_list)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
if __name__ =="__main__":
run()
进程常用功能
import multiprocssing
import threading
import queue
def task(arg):
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.start()
p.join()#等待进程
if __name__ =="__main__":
run()
###########################
def task(arg):
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.daemon = False#True不等子进程,相反
p.start()
if __name__ =="__main__":
run()
###################
def task(arg):
#获取当前进程
p = multiprocessing.current_process()
print(p.name)
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.name = "pp1"#给进程命名
p.start()
if __name__ =="__main__":
run()
#########################
def task(arg):
#获取当前进程
p = multiprocessing.current_process()
print(p.ident)#打印当前进程
time.sleep(2)
print(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
p.name = "pp1"#给进程命名
p.start()
if __name__ =="__main__":
run()
面向对象创建进程
import multiprocssing
import threading
import queue
class MyProcess(multiprocessing.Process):
def task(arg):
def run(self):
print("当前进程",multiprocessing.current_process())
def run():
p.MyProcess()
p.start()
if __name__ =="__main__":
run()
进程间的数据共享
先进先出 队列先进后出 站
进程间的共享
#linux这样写
import multiprocessing
q = multiprocessing.Qurue()
def task(arg,q):
q.put(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
while True:
v = q.get()
print(v)
if __name__ == "__main__"
run()
############################
#windows这样写
import multiprocessing
def task(arg,q):
q.put(arg)
if __name__ == "__main__"
q = multiprocessing.Qurue()
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
while True:
v = q.get()
print(v)
Manager
#linux这样写
import multiprocessing
q = multiprocessing.Manager()
dic = m.dict()
def task(arg,q):
dic[arg] = 100
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
p.start()
input(">>>")
print(dic.values())
if __name__ == "__main__"
run()
############################
#windows这样写
import multiprocessing
def task(arg,q):
dic[arg] = 100
if __name__ == "__main__"
m = multiprocessing.Manager()
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,m,))
p.join()
print(dic)
######################2
import multiprocessing
def task(arg,q):
dic[arg] = 100
if __name__ == "__main__"
m = multiprocessing.Manager()
dic = m.dict()
process_list[]
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,cic))
p.start()
process_list.append(p)
while True:
count = 0
for p in process_list:
if not p.is_alive():#True是还在执行,Fasle是已执行完
cunt +=1
if count == len(process_list):
break
print(dic)
进程锁
import time
import multiprocessing
lock = multiprocessing.RLock()
def task(arg):
print("鬼子来了")
lock.acquire()
time.sleep(2)
print(arg)
lock.release()
if __name__ == "__main__":
p1 = multiprocessing.Process(target=task,args=1)
p1.start()
p2 = multiprocessing.Process(target=task,args=1)
p2.start()
进程池
import time
from comcurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
# #############################线程池
def task (arg):
time.sleep(2)
print(arg)
if __name__ == "__main__":
pool = ThreadPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
# #############################进程池
def task (arg):
time.sleep(2)
print(arg)
if __name__ == "__main__":
pool = ProcessPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
爬虫
需要安装这两个模块
pip install requests
pip install beautifulsoup4
import requests
from bs4 import BeautifulSoup
#模拟浏览器发送请求
r1 = requests.get(
url="xxx",
headers={"User-Agent":sssssssssssssssssssssssssssssssssssssssssss})
#输出字节
print(r1.content)
#查看下载下来的文本信息
print(r1.text)
soup = BeautifulSoup(r1.text,"html.parser")
osup.find("div,attrs={"id":"content-list"}")
for child in content_list.children:
contet