写在最前,原理参考: https://www.cnblogs.com/meowww/p/6475952.html#3706558
首先列出了GCC 实现Lengauer - Tarjan 算法使用到的数据结构dom_info:
/* This class holds various arrays reflecting the (sub)structure of the
flowgraph. Most of them are of type TBB and are also indexed by TBB. */
class dom_info
{
public:
dom_info (function *, cdi_direction);
dom_info (vec <basic_block>, cdi_direction);
~dom_info ();
void calc_dfs_tree ();
void calc_idoms ();
inline basic_block get_idom (basic_block);
private:
void calc_dfs_tree_nonrec (basic_block);
void compress (TBB);
void dom_init (void);
TBB eval (TBB);
void link_roots (TBB, TBB);
// m_dfs_parent[v]是结点v在深度为主生成树中的父节点
TBB *m_dfs_parent;
// m_key[v]是结点v的半必经结点的深度为主编号
TBB *m_key;
// m_path_min[v]是v的祖先链中其半必经结点的深度为主编号最小的一个结点
TBB *m_path_min;
// m_bucket[v]是其半必经结点是m_dfs_order[v]的集合中的第一个结点
TBB *m_bucket;
// m_next_bucket是m_bucket的辅助结构,用数组代替链表保存集合,加快查询速度
TBB *m_next_bucket;
// m_dom[v]是结点v的半必经结点的深度为主编号
TBB *m_dom;
/* The following few fields implement the structures needed for disjoint
sets. */
// 如果v在树林中,m_set_chain[v] 是v的祖先的深度为主编号
// 如果v是树林中一棵树的根,m_set_chain[v] 为 0
TBB *m_set_chain;
// m_set_child 和 m_set_size 是用来使树林中的树保持平衡,并使算法时间达到时间下界的两个数据结构
unsigned int *m_set_size;
TBB *m_set_child;
// m_dfs_order[v]是结点v的深度为主编号
TBB *m_dfs_order;
TBB *m_dfs_last;
// m_dfs_to_bb[i]是深度为主编号为i的结点
basic_block *m_dfs_to_bb;
/* This is the next free DFS number when creating the DFS tree. */
unsigned int m_dfsnum;
/* The number of nodes in the DFS tree (==m_dfsnum-1). */
unsigned int m_nodes;
/* Blocks with bits set here have a fake edge to EXIT. These are used
to turn a DFS forest into a proper tree. */
bitmap m_fake_exit_edge;
/* Number of basic blocks in the function being compiled. */
size_t m_n_basic_blocks;
/* True, if we are computing postdominators (rather than dominators). */
bool m_reverse;
/* Start block (the entry block for forward problem, exit block for backward
problem). */
basic_block m_start_block;
/* Ending block. */
basic_block m_end_block;
};
Lengauer - Tarjan 算法流程:
① 计算CFG的深度为主生成树,获得每个结点的深度为主编号,详见 https://www.cnblogs.com/compiler-share/p/15100454.html
② 按深度为主编号由大到小的顺序,访问每一个节点v,对v的所有直接前驱k1做一次eval,求得sdom的最小值k。然后把节点v放到sdom的bucket里,连接v与par的边。
③ 现在par到v的这棵子树中已经处理完了,所以可以对par的bucket里的每个w求一次idom并且清空bucket。如果sdom(eval(w))=sdom(w),则idom(w)=sdom(w)=parent(v);否则可以记下idom(w)=idom(eval(w)),实现时我们可以写成idom(w)=eval(w),留到第4步处理。
④ 按深度为主编号由小到大的顺序,访问每一个v,如果idom(v)=sdom(v),已经是第3步求出的正确的idom了;否则就证明这是第3步留下的待处理点,令idom(v)=idom(idom(v))即可
/* This calculates the immediate dominators (or post-dominators). THIS is our
working structure and should hold the DFS forest.
On return the immediate dominator to node V is in m_dom[V]. */
void
dom_info::calc_idoms ()
{
/* Go backwards in DFS order, to first look at the leafs. */
// Step2
// 按深度为主编号由大到小的顺序,访问每一个节点v,对v的所有直接前驱k1做一次eval,求得sdom的最小值k。
// 然后把节点v放到k的bucket里,连接v与par的边。
for (TBB v = m_nodes; v > 1; v--)
{
basic_block bb = m_dfs_to_bb[v];
edge e;
TBB par = m_dfs_parent[v]; // 深度为主生成树中的父节点
TBB k = v;
edge_iterator ei = m_reverse ? ei_start (bb->succs)
: ei_start (bb->preds);
edge_iterator einext;
if (m_fake_exit_edge)
{
/* If this block has a fake edge to exit, process that first. */
if (bitmap_bit_p (m_fake_exit_edge, bb->index))
{
einext = ei;
einext.index = 0;
goto do_fake_exit_edge;
}
}
/* Search all direct predecessors for the smallest node with a path
to them. That way we have the smallest node with also a path to
us only over nodes behind us. In effect we search for our
semidominator. */
while (!ei_end_p (ei)) // 遍历bb在控制流图中的所有前驱
{
basic_block b;
TBB k1;
e = ei_edge (ei);
b = m_reverse ? e->dest : e->src;
einext = ei;
ei_next (&einext);
if (b == m_start_block)
{
do_fake_exit_edge:
k1 = *m_dfs_last;
}
else
k1 = m_dfs_order[b->index];
/* Call eval() only if really needed. If k1 is above V in DFS tree,
then we know, that eval(k1) == k1 and key[k1] == k1. */
if (k1 > v) // 如果e是cross edge或者back edge
k1 = m_key[eval (k1)]; // 找出已访问节点中,k1的祖先链中半必经结点的深度为主编号最小的那个节点
if (k1 < k) // sdom 取深度为主编号最小的那个
k = k1;
ei = einext;
}
m_key[v] = k;
link_roots (par, v); // 链接v和par结点
m_next_bucket[v] = m_bucket[k]; // 结点v仅被处理一次,m_next_bucket[v]具有唯一性,故无需clean
m_bucket[k] = v;
/* Transform semidominators into dominators. */
// Step3
// 现在par到v的这棵子树中已经处理完了,所以可以对par的bucket里的每个w求一次idom并且清空bucket。
// 如果sdom(eval(w))=sdom(w),则idom(w)=sdom(w)=parent(v)
// 否则可以记下idom(w)=idom(eval(w)),实现时我们可以写成idom(w)=eval(w),留到第4步处理。
for (TBB w = m_bucket[par]; w; w = m_next_bucket[w])
{
k = eval (w);
if (m_key[k] < m_key[w])
m_dom[w] = k;
else
m_dom[w] = par;
}
/* We don‘t need to cleanup next_bucket[]. */
m_bucket[par] = 0;
}
/* Explicitly define the dominators. */
// Step4
// 按深度为主编号由小到大的顺序,访问每一个v
// 如果idom(v)=sdom(v)的话,就已经是第3步求出的正确的idom了
// 否则就证明这是第3步留下的待处理点,令idom(v)=idom(idom(v))
m_dom[1] = 0;
for (TBB v = 2; v <= m_nodes; v++)
if (m_dom[v] != m_key[v])
m_dom[v] = m_dom[m_dom[v]];
}
link_roots 和 eval 管理一个辅助的数据结构,即由深度为主生成树构成的树林,用于追踪已处理过的结点。eval 使用compress对从数组m_set_chain导出的结点到深度为主生成树的树根的路径执行路径压缩。这里有4个数据结构:
① 如果v在树林中,m_set_chain[v] 是v的祖先的深度为主编号;如果v是树林中一棵树的根,m_set_chain[v] 为 0
② m_path_min[v] 是v的祖先链中其半必经结点的深度为主编号最小的一个结点
③ m_set_child 和 m_set_size 是用来使树林中的树保持平衡,并使算法时间达到时间下界的两个数据结构
link_roots 实现:
核心逻辑只有一行代码 m_set_chain[s] = v,设置v为s的祖先节点,把s和父节点v连接起来。其他算法是为了降低算法复杂度,这里不做讨论。
/* This essentially merges the two sets of V and W, giving a single set with
the new root V. The internal representation of these disjoint sets is a
balanced tree. Currently link(V,W) is only used with V being the parent
of W. */
void
dom_info::link_roots (TBB v, TBB w)
{
TBB s = w;
/* Rebalance the tree. */
while (m_key[m_path_min[w]] < m_key[m_path_min[m_set_child[s]]])
{
if (m_set_size[s] + m_set_size[m_set_child[m_set_child[s]]]
>= 2 * m_set_size[m_set_child[s]])
{
m_set_chain[m_set_child[s]] = s;
m_set_child[s] = m_set_child[m_set_child[s]];
}
else
{
m_set_size[m_set_child[s]] = m_set_size[s];
s = m_set_chain[s] = m_set_child[s];
}
}
m_path_min[s] = m_path_min[w];
m_set_size[v] += m_set_size[w];
if (m_set_size[v] < 2 * m_set_size[w])
std::swap (m_set_child[v], s);
/* Merge all subtrees. */
while (s)
{
// 核心代码
m_set_chain[s] = v;
s = m_set_child[s];
}
}
eval 和 compress 的实现:
递归查询已处理过的结点中v的祖先结点,把m_set_chain[v]设置为root结点,把m_path_min[v]设置为祖先链中半必经结点的深度为主编号最小的那个结点。
/* Compress the path from V to the root of its set and update path_min at the
same time. After compress(di, V) set_chain[V] is the root of the set V is
in and path_min[V] is the node with the smallest key[] value on the path
from V to that root. */
void
dom_info::compress (TBB v)
{
/* Btw. It‘s not worth to unrecurse compress() as the depth is usually not
greater than 5 even for huge graphs (I‘ve not seen call depth > 4).
Also performance wise compress() ranges _far_ behind eval(). */
TBB parent = m_set_chain[v];
if (m_set_chain[parent])
{
compress (parent);
if (m_key[m_path_min[parent]] < m_key[m_path_min[v]])
m_path_min[v] = m_path_min[parent];
m_set_chain[v] = m_set_chain[parent];
}
}
/* Compress the path from V to the set root of V if needed (when the root has
changed since the last call). Returns the node with the smallest key[]
value on the path from V to the root. */
inline TBB
dom_info::eval (TBB v)
{
/* The representative of the set V is in, also called root (as the set
representation is a tree). */
TBB rep = m_set_chain[v];
/* V itself is the root. */
if (!rep)
return m_path_min[v];
/* Compress only if necessary. */
if (m_set_chain[rep])
{
compress (v);
rep = m_set_chain[v];
}
if (m_key[m_path_min[rep]] >= m_key[m_path_min[v]])
return m_path_min[v];
else
return m_path_min[rep];
}