C# 霍夫曼二叉树压缩算法实现

知道有的人比较懒,直接贴全部代码.
一开始一次性Code完了压缩部分代码.只调试了2,3次就成功了.
一次性写150行代码,没遇到什么bug的感觉还是蛮爽的.
写解压代码,才发现压缩代码有些细节问题.
对最后一个字符处理问题.
遇到比较折腾点:构建二叉树时,把原本应该是(叶结点的有值的)节点放在了左节点,正确应该放在右节点,导致生成的编码序列不满足(任意编码不是其他编码的前缀).导致解码失败.
使用方法:

var srcData = Encoding.UTF8.GetBytes(textBox1.Text);
            var cpsData = Compress(srcData);
            treeView1.ExpandAll();
           var depData = DeCompress(cpsData);
            var depStr = Encoding.UTF8.GetString(depData );

这个TreeView就是显示二叉树的,要添加控件,或者删除代码.

快速理解:
1.此压缩直接对字节流进行压缩.
2.压缩原理:字节流对每个直接使用率不平均,所以用变长的编码对256个字节重新编码,以较短的编码表示使用率高的字节,较长编码表示使用率低的字节.
所以总体来看,用新的编码表示的字节流要比原来的短.(除非字节流特别小,压缩效果就不好)
3.由于二叉树的性质,将使用率低的先加入树,使用率高的后加入作为使用率低的节点的父节点的兄弟节点(因为有值的节点必须是叶结点).从最底下向上构建
二叉树.
 using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO; namespace 霍夫曼二叉树压缩
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
var s=GetCode();
var b= GetByteByCode(s);
} private void button1_Click(object sender, EventArgs e)
{
var srcData = Encoding.UTF8.GetBytes(textBox1.Text);
var cpsData = Compress(srcData);
treeView1.ExpandAll();
var depData = DeCompress(cpsData);
var depStr = Encoding.UTF8.GetString(depData );
} Dictionary<int, string> dicCode = new Dictionary<int, string>();
byte[] Compress(byte[] data)
{
Dictionary<byte, int> everyCount = new Dictionary<byte, int>();
foreach (var d in data)
{
if(everyCount.ContainsKey(d)==false )
everyCount.Add(d,);
everyCount[d]++;
}
var orderAscCounts = everyCount.OrderBy(a=>a.Value);
Queue<Count> queCouts = new Queue<Count>();
orderAscCounts.ToList().ForEach(d => {
queCouts.Enqueue(new Count { key=d.Key, count=d.Value });
});
BuildTree(ref queCouts);
foreach (var a in BNode.nodes)
{
var code = new string(GetCode(a).Reverse().ToArray());
dicCode.Add(a.key,code);
}
BNode root = BNode.nodes[];
while(root.parent!=null){
root = root.parent;
}
CreateTreeView(root,treeView1.Nodes);
string curCode = "";
List<byte> outData = new List<byte>();
foreach (var d in data)
{
curCode += dicCode[d];
if (curCode.Length >= )
{
byte curBit = GetByteByCode(curCode.Substring(,));
outData.Add(curBit);
curCode = curCode.Length > ? curCode.Substring(, curCode.Length - ) : "";
}
}
if (curCode != "")
{
curCode = curCode.PadRight(,'');
byte curBit = GetByteByCode(curCode);
outData.Add(curBit);
} return outData.ToArray();
} byte[] DeCompress(byte[] data)
{
string codes = "";
for (int i = ; i < data.Length - ;i++ )
{
codes += GetCode(data[i]);
}
codes += GetCode(data[data.Length-]).TrimEnd('');
var bdata = GetCode(codes); return bdata;
} byte GetByteByCode(string curCode)
{
return Convert.ToByte(curCode, );
}
byte[] GetCode(string code)
{
List<byte> datas = new List<byte>();
int pos = ;
var orderDicCode=dicCode.OrderByDescending(a=>a.Value.Length);
do{
int p=-;
foreach (var vCode in orderDicCode)
{
p = code.IndexOf(vCode.Value);
if (p == )
{
datas.Add((byte)vCode.Key);
code = code.Substring(vCode.Value.Length , code.Length-vCode.Value.Length );
break;
}
}
if (p == -)
{
throw new Exception("解压出错:发现未能识别的编码,编码表或数据已被破坏!");
}
}while(code.Length>); /* for (int i = 1; pos + i < code.Length ; i++)
{
var firstCode = code.Substring(pos, i);
var secondCode = code.Substring(pos, i + 1); var first = dicCode.Where(a => a.Value == firstCode);
var second = dicCode.Where(a => a.Value == secondCode);
if (first.Count() > 0 && second.Count() == 0 ){
datas.Add( (byte)first.First().Key);
pos = pos+i;
i = 1;
} else if (pos + i == code.Length - 1 && second.Count() > 0)
datas.Add( (byte)second.First().Key );
}*/
return datas.ToArray();
}
string GetCode(byte b )
{
return Convert.ToString(b, ).PadLeft(, '');//Convert.ToString(b, 2) ;//:
}
string GetCode(BNode a)
{
if (a.parent!=null)
return (a.isLeft ? "" : "")+GetCode(a.parent);
return "" ;
} BNode BuildTree(ref Queue<Count> queCouts )
{
var first = queCouts.Dequeue();
var second = queCouts.Dequeue(); var lft =first.node==null? new BNode { key=first.key, count=first.count } : first.node; var rgt = second.node == null ? new BNode { key = second.key, count = second.count } : second.node; if (rgt.key == -)
{
var temp = lft;
lft = rgt;
rgt = temp; } var pnode = new BNode
{
key = -, count = first.count + second.count
};
lft.isLeft = true;
rgt.isLeft = false;
pnode.left = lft;
pnode.right = rgt;
lft.parent = pnode; rgt.parent = pnode;
if (lft.key != -)
BNode.nodes.Add(lft);
if (rgt.key != -)
BNode.nodes.Add(rgt);
if (queCouts.Count > ){
queCouts.Enqueue(new Count { count=pnode.count, key=pnode.key, node=pnode });
var orderQue = queCouts.OrderBy(q => q.count).ToList();
queCouts.Clear();
foreach (var a in orderQue)
queCouts.Enqueue(a);
return BuildTree(ref queCouts);
}
else
return pnode;
} void CreateTreeView(BNode node , TreeNodeCollection tnc)
{
if (node == null) return;
var newNode = tnc.Add((node.isLeft ? "" : "") + (node.key!=-?"-"+node.key + ":" + node.count:""));
CreateTreeView(node.left,newNode.Nodes);
CreateTreeView(node.right, newNode.Nodes);
} class Count
{
public int key;
public int count;
public BNode node;
} class BNode{
public int key;
public int count;
public BNode left;
public BNode right;
public BNode parent;
public bool isLeft = false;
public static List<BNode> nodes = new List< BNode>(); }
}
}
上一篇:数据结构-二叉树(6)哈夫曼树(Huffman树)/最优二叉树


下一篇:Web部分