结构设计
基础架构为flask+gunicorn+负载均衡,负载均衡分为阿里云硬件负载均衡服务和软负载nginx。gunicorn使用supervisor进行管理。
使用nginx软件负载结构图
使用阿里云硬件负载均衡服务结构图
因为flask app需要在内存中保存ip树以及国家、省份、城市相关的字典,因此占用内存较高。gunicorn的1个worker需要占用300M内存,nginx的4个worker内存占用较小(不到100M),因此占用1.3G的内存(即需要一个2G内存的服务器)。当gunicorn任意一个节点挂断或者升级时,另外一个节点仍然在使用,不会影响整体服务
ip数据库
IP库(也叫IP地址数据库),是由专业技术人员经过长时间通过多种技术手段收集而来的,并且长期有专业人员进行更新、维护、补充。
ip数据库解析查询代码
基于二叉查找树实现
import struct
from socket import inet_aton, inet_ntoa
import os
import sys
sys.setrecursionlimit(1000000)
_unpack_V = lambda b: struct.unpack("<L", b)
_unpack_N = lambda b: struct.unpack(">L", b)
_unpack_C = lambda b: struct.unpack("B", b)
class IpTree:
def __init__(self):
self.ip_dict = {}
self.country_codes = {}
self.china_province_codes = {}
self.china_city_codes = {}
def load_country_codes(self, file_name):
try:
path = os.path.abspath(file_name)
with open(path, "rb") as f:
for line in f.readlines():
data = line.split(‘\t‘)
self.country_codes[data[0]] = data[1]
# print self.country_codes
except Exception as ex:
print "cannot open file %s: %s" % (file, ex)
print ex.message
exit(0)
def load_china_province_codes(self, file_name):
try:
path = os.path.abspath(file_name)
with open(path, "rb") as f:
for line in f.readlines():
data = line.split(‘\t‘)
provinces = data[2].split(‘\r‘)
self.china_province_codes[provinces[0]] = data[0]
# print self.china_province_codes
except Exception as ex:
print "cannot open file %s: %s" % (file, ex)
print ex.message
exit(0)
def load_china_city_codes(self, file_name):
try:
path = os.path.abspath(file_name)
with open(path, "rb") as f:
for line in f.readlines():
data = line.split(‘\t‘)
cities = data[3].split(‘\r‘)
self.china_city_codes[cities[0]] = data[0]
except Exception as ex:
print "cannot open file %s: %s" % (file, ex)
print ex.message
exit(0)
def loadfile(self, file_name):
try:
ipdot0 = 254
path = os.path.abspath(file_name)
with open(path, "rb") as f:
local_binary0 = f.read()
local_offset, = _unpack_N(local_binary0[:4])
local_binary = local_binary0[4:local_offset]
# 256 nodes
while ipdot0 >= 0:
middle_ip = None
middle_content = None
lis = []
# offset
begin_offset = ipdot0 * 4
end_offset = (ipdot0 + 1) * 4
# index
start_index, = _unpack_V(local_binary[begin_offset:begin_offset + 4])
start_index = start_index * 8 + 1024
end_index, = _unpack_V(local_binary[end_offset:end_offset + 4])
end_index = end_index * 8 + 1024
while start_index < end_index:
content_offset, = _unpack_V(local_binary[start_index + 4: start_index + 7] +
chr(0).encode(‘utf-8‘))
content_length, = _unpack_C(local_binary[start_index + 7])
content_offset = local_offset + content_offset - 1024
content = local_binary0[content_offset:content_offset + content_length]
if middle_content != content and middle_content is not None:
contents = middle_content.split(‘\t‘)
lis.append((middle_ip, (contents[0], self.lookup_country_code(contents[0]),
contents[1], self.lookup_china_province_code(contents[1]),
contents[2], self.lookup_china_city_code(contents[2]),
contents[3], contents[4])))
middle_content, = content,
middle_ip = inet_ntoa(local_binary[start_index:start_index + 4])
start_index += 8
self.ip_dict[ipdot0] = self.generate_tree(lis)
ipdot0 -= 1
except Exception as ex:
print "cannot open file %s: %s" % (file, ex)
print ex.message
exit(0)
def lookup_country(self, country_code):
try:
for item_country, item_country_code in self.country_codes.items():
if country_code == item_country_code:
return item_country, item_country_code
return ‘None‘, ‘None‘
except KeyError:
return ‘None‘, ‘None‘
def lookup_country_code(self, country):
try:
return self.country_codes[country]
except KeyError:
return ‘None‘
def lookup_china_province(self, province_code):
try:
for item_province, item_province_code, in self.china_province_codes.items():
if province_code == item_province_code:
return item_province, item_province_code
return ‘None‘, ‘None‘
except KeyError:
return ‘None‘, ‘None‘
def lookup_china_province_code(self, province):
try:
return self.china_province_codes[province.encode(‘utf-8‘)]
except KeyError:
return ‘None‘
def lookup_china_city(self, city_code):
try:
for item_city, item_city_code in self.china_city_codes.items():
if city_code == item_city_code:
return item_city, item_city_code
return ‘None‘, ‘None‘
except KeyError:
return ‘None‘, ‘None‘
def lookup_china_city_code(self, city):
try:
return self.china_city_codes[city]
except KeyError:
return ‘None‘
def lookup(self, ip):
ipdot = ip.split(‘.‘)
ipdot0 = int(ipdot[0])
if ipdot0 < 0 or ipdot0 > 255 or len(ipdot) != 4:
return None
try:
d = self.ip_dict[int(ipdot[0])]
except KeyError:
return None
if d