Python 的 简单多线程实现 用 dummy 模块 一句话就可以搞定,但需要对线程,队列做进一步的操作,最好自己写个线程池类来实现。
Code:
# coding:utf-8
# version: 0.1
import re,time
from requests import get
from Queue import Queue, Empty
from threading import Thread # 全局变量
COUNT = 0 # 爬虫类
class Spider(Thread):
"""docstring for Spider"""
def __init__(self,queue):
Thread.__init__(self)
self.queue = queue
self.start() # 执行 run() def run(self):
"每次读取 queue 的一条"
global COUNT
while(1):
try:
sth = self.queue.get(block=false)
except Empty:
break
except Exception,e:
print '[- Excpt :]',str(e) print COUNT
COUNT += 1 # 线程池类
class ThreadPool(object):
def __init__(self):
self.queue = Queue() # 需要执行的队列
self.threads = [] # 多线程列秒
pass def add_task(self):
pass def init_threads(self):
pass def wait(self):
for t in self.threads:
if t.isAlive():
t.join() if __name__ == '__main__':
start = time.time() tp = ThreadPool(thread_num)
tp.wait() end = time.time() print '[ - info ] cost time :{}'.format(end - start)