import urllib import os def reporthook(blocks_read, block_size, total_size): if not blocks_read: print "Connection opened" return if total_size < 0: print "Read %d blocks (%d bytes)" % (blocks_read, blocks_read * block_size) else: amount_read = blocks_read * block_size print "Read %d blocks, %d/%d, %.0f%%" % (blocks_read, amount_read, total_size, amount_read*100.0/(total_size)) return try: filename, msg = urllib.urlretrieve("http://www.xiaomi.com", reporthook=reporthook) print print "File:", filename print "Headers:" print msg print "File exists before cleanup:", os.path.exists(filename) finally: urllib.urlcleanup() print "File still exists:", os.path.exists(filename)
输出如下:
Connection opened Read 1 blocks, 8192/55662, 15% Read 2 blocks, 16384/55662, 29% Read 3 blocks, 24576/55662, 44% Read 4 blocks, 32768/55662, 59% Read 5 blocks, 40960/55662, 74% Read 6 blocks, 49152/55662, 88% Read 7 blocks, 57344/55662, 103% File: /tmp/tmp4lhkGA Headers: Date: Wed, 19 Feb 2014 08:55:20 GMT Content-Type: text/html; charset=utf-8 Cache-Control: max-age=1800 X-Cacheable: MI-WWW-Cacheable Server: MIFE/3.0 Expires: Wed, 19 Feb 2014 09:25:20 GMT Powered-By-ChinaCache: MISS from 010519h3SP.4 Age: 990 Content-Length: 55662 Powered-By-ChinaCache: HIT from 01005143SG Connection: close File exists before cleanup: True File still exists: False
可以看到最后竟然读取了103%, 这是由于每次读取都是按照block 8192bytes来读取的,就算最后不满8192bytes, 也返回一个block的size