1.Pycparser介绍
项目链接:https://github.com/eliben/pycparser
Pycparser是C语言的解析器,支持完整的C99标准,用纯Python编写。
非常方便对C语言源码的解析和处理,如生成AST、提取源码中函数调用关系等。
Pycparser非常容易上手,需重点阅读examples
目录和c_ast.py
文件
2.源码解读
-
c_ast.py
文件解读_c_ast.cfg
和c_ast.py
提供了C99的语法和实现,如_c_ast.cfg
对IF语句的描述:
If: [cond*, iftrue*, iffalse*]
表示IF节点由三个子节点构成,分别是condition、iftrue、iffalse,等同于BNF范式的描述
在c_ast.py
中对IF节点的定义:
class If(Node):
__slots__ = ('cond', 'iftrue', 'iffalse', 'coord', '__weakref__')
def __init__(self, cond, iftrue, iffalse, coord=None):
self.cond = cond
self.iftrue = iftrue
self.iffalse = iffalse
self.coord = coord
def children(self):
nodelist = []
if self.cond is not None: nodelist.append(("cond", self.cond))
if self.iftrue is not None: nodelist.append(("iftrue", self.iftrue))
if self.iffalse is not None: nodelist.append(("iffalse", self.iffalse))
return tuple(nodelist)
def __iter__(self):
if self.cond is not None:
yield self.cond
if self.iftrue is not None:
yield self.iftrue
if self.iffalse is not None:
yield self.iffalse
attr_names = ()
发现__init__
方法除了三个子节点,还多了一个coord节点,该节点是用来表示源码中节点的位置信息的,比如代码行号等。
观察children方法,用nodelist管理子节点,每一个子节点用tuple表示,如条件语句用(“cond”,self.cond)表示,self.cond
才是真实的条件节点,"cond"
是该节点的TAG。
参考examples文件,可实现在C语言源码中提取所有If节点的条件节点:
def find_If(node,if_list):
if node is None:
return
if isinstance(node,c_ast.If):
if_list.append(node.cond)
# iterator its children
for item in node.children(): # deep search
# item is a tuple , item[0] is type, item[1] is Node
t_node = item[1]
if isinstance(t_node, c_ast.If):
if_list.append(t_node.cond)
find_If(t_node.iftrue,if_list)
find_If(t_node.iffalse,if_list)
else:
find_If(t_node,if_list)
filename = "notes.c"
ifcondList = []
ast = parse_file(filename, use_cpp=True)
find_If(ast,ifcondList)
提取If节点后,就可以做很多事情了,如输出所有条件语句的代码,实现如下:
from pycparser import c_parser, c_ast, parse_file, c_generator
generator = c_generator.CGenerator()
for cond_tuple in ifcondList:
cond_node = cond_tuple[1] #每一个Item由tuple组成,第二个元素才是真实Node
cond_code = generator.visit(cond_node) #获取条件的代码
print(cond_code)
对条件节点的处理,还可继续分析,如提取条件中的常量、操作符等
3.实现cflow
工具中的函数调用关系功能
Pycparser有一个访问者模式的设计模式的应用,用来解析目标节点,具体使用可参考项目examples目录下的
func_calls.py
和func_defs.py
文件
from __future__ import print_function
import sys
import re
import json
sys.path.extend(['.', '..'])
from pycparser import c_parser, c_ast, parse_file, c_generator
def extract_funcDef(node,defList):
if node is None:
return
childrens = [item[1] for item in node.children()]
for item in childrens:
if isinstance(item,c_ast.FuncDef):
defList.append(item)
else:
extract_funcDef(item,defList)
def extract_funcCall(node,funcList):
if isinstance(node, c_ast.Node): # for AST node
node = (node,None)
if node[0] is None:
return
childrens = [item[1] for item in node[0].children()]
for item in childrens:
if isinstance(item, c_ast.FuncCall):
funcList.append(item)
else:
extract_funcCall(item,funcList)
class FuncDefVisitor(c_ast.NodeVisitor):
def __init__(self,funcname,funcList):
self.funcname = funcname
self.funcList = funcList
def visit_FuncDef(self, node):
if node.decl.name == self.funcname:
extract_funcCall(node,self.funcList)
# print('%s at %s' % (node.decl.name, node.decl.coord))
def show_deflist(defList):
for defFunc in defList:
name = defFunc.decl.name
# print(name,defFunc.decl.coord)
# pass
def show_func_defs(ast, funcname,the_dict,invoke_dict):
# ast = parse_file(filename, use_cpp=True)
funcList = []
v = FuncDefVisitor(funcname,funcList)
v.visit(ast)
# print(len(funcList))
invoke_dict[funcname] = [func.name.name for func in funcList]
for func in funcList:
try:
the_dict[func.name.name].append(funcname)
except Exception as e:
the_dict[func.name.name] = [funcname]
# raise e
# print('funcDefs:',func.name.name,func.name.coord)
if __name__ == '__main__':
filename = "./codes/notes.c"
defList = []
the_dict = {}
invoke_dict = {}
ast = parse_file(filename, use_cpp=True)
extract_funcDef(ast,defList)
# print(len(defList))
show_deflist(defList)
nameList = [item.decl.name for item in defList]
for name in nameList:
show_func_defs(ast,name,the_dict,invoke_dict)
# parser(filename)
print('====Ref_dict====')
for k,v in the_dict.items():
print('{}:{}'.format(k,v))
print('====Invoke_dict====')
for k,v in invoke_dict.items():
print('{}:{}'.format(k,v))
得到的输出结果:
下一篇有空再介绍如何用Invoke_dict生成调用图,需要用到graphviz,此部分已经实现,可看效果图