Huffman编码是一种压缩编码,我们可以借助Huffman树来完成这个任务。
算法的内容可以参考网上的介绍,这里只是使用C++的方式实现这个算法,并且给出了一个简单的例子进行测试。
本文涉及到了STL库的最新知识,比如auto关键字,智能指针等等,总之,最新的STL11库让我更爱C++这么语言(C语言虽然简单,但是C语言还是停留在最初的设计的方法上,并没有提供其他的更方便的方式,比如相关的容器,不过还好我们有了C++,不过C++太大了,不太容易学)
代码如下:
#include <iostream>
#include <vector>
#include <algorithm>
#include <map>
#include <memory>
using namespace std;
struct s_symbol_info
{
char ch;
int w;
s_symbol_info(char c, int n) : ch(c), w(n) {}
};
struct huffman_node
{
typedef shared_ptr<huffman_node> sptr_huffman_node_in;
// pos
sptr_huffman_node_in lchild;
sptr_huffman_node_in rchild;
// value
char ch;
huffman_node() : ch('\0') { InitPos(); }
huffman_node(char c) : ch(c) { InitPos(); }
~huffman_node() {}
void InitPos() {
lchild.reset();
rchild.reset();
}
};
typedef shared_ptr<huffman_node> sptr_huffman_node;
typedef shared_ptr<huffman_node> huffman_tree;
struct heap_node
{
// weight
int weight;
// value
sptr_huffman_node pNode;
heap_node(int w, sptr_huffman_node p) : weight(w), pNode(p){}
};
typedef shared_ptr<heap_node> sptr_heap_node;
struct s_comp_sptr_heap_node
{
bool operator() (sptr_heap_node e1, sptr_heap_node e2) {
return e1->weight > e2->weight;
}
}CompSPtrHeapNode;
huffman_tree create_huffman_tree(const vector<s_symbol_info> & vecSymbolInfo)
{
huffman_tree tree;
vector<sptr_heap_node> vecHeapNode;
for (auto itr=vecSymbolInfo.begin(); itr!=vecSymbolInfo.end(); ++itr) {
vecHeapNode.push_back(make_shared<heap_node>(itr->w, make_shared<huffman_node>(itr->ch)));
}
make_heap(vecHeapNode.begin(), vecHeapNode.end(), CompSPtrHeapNode);
while (vecHeapNode.size() > 1) {
pop_heap(vecHeapNode.begin(), vecHeapNode.end(), CompSPtrHeapNode);
auto & eTmp1 = vecHeapNode.back();
auto e1 = eTmp1;
vecHeapNode.pop_back();
pop_heap(vecHeapNode.begin(), vecHeapNode.end(), CompSPtrHeapNode);
auto & eTmp2 = vecHeapNode.back();
auto e2 = eTmp2;
vecHeapNode.pop_back();
auto e = make_shared<heap_node>(e1->weight+e2->weight, make_shared<huffman_node>());
vecHeapNode.push_back(e);
push_heap(vecHeapNode.begin(), vecHeapNode.end(), CompSPtrHeapNode);
e->pNode->lchild = e1->pNode;
e->pNode->rchild = e2->pNode;
}
if (!vecHeapNode.empty()) {
tree = vecHeapNode[0]->pNode;
}
return tree;
}
void build_huffman_code(sptr_huffman_node pNode, string & strHuffCode, map<char, string> & mapHuffCode)
{
sptr_huffman_node lchild = pNode->lchild;
sptr_huffman_node rchild = pNode->rchild;
if (lchild == nullptr && rchild == nullptr) {
mapHuffCode[pNode->ch] = strHuffCode;
return ;
}
strHuffCode += "0";
build_huffman_code(lchild, strHuffCode, mapHuffCode);
strHuffCode.pop_back();
strHuffCode += "1";
build_huffman_code(rchild, strHuffCode, mapHuffCode);
strHuffCode.pop_back();
}
map<char, string> build_huffman_code(huffman_tree root)
{
map<char, string> mapHuffCode;
string strHuffCode;
build_huffman_code(root, strHuffCode, mapHuffCode);
return mapHuffCode;
}
string huffman_code(string str, const map<char, string> & mapHuffCode)
{
string rt;
for(int i=0; i<str.size(); ++i) {
rt += mapHuffCode.at(str[i]);
}
return rt;
}
string huffman_decode(string str, huffman_tree root)
{
string rt;
auto pNode = root;
for (int i=0; i<str.size(); ++i) {
if (str[i] == '0') {
pNode = pNode->lchild;
} else {
pNode = pNode->rchild;
}
if (pNode->ch != '\0') {
rt.push_back(pNode->ch);
pNode = root;
}
}
return rt;
}
int main()
{
vector<s_symbol_info> vecSymbolInfo = {
s_symbol_info( 'a' , 45 ),
s_symbol_info( 'b' , 13 ),
s_symbol_info( 'c' , 12 ),
s_symbol_info( 'd' , 16 ),
s_symbol_info( 'e' , 9 ),
s_symbol_info( 'f' , 5 ),
};
cout << "Input:";
for (auto itr=vecSymbolInfo.begin(); itr!=vecSymbolInfo.end(); ++itr) {
cout << itr->ch << ":" << itr->w << " ";
}
cout << endl;
auto huffTree = create_huffman_tree(vecSymbolInfo);
auto mapHuffCode = build_huffman_code(huffTree);
cout << "Output Huffman Code:" << endl;
for (auto itr=mapHuffCode.begin(); itr!=mapHuffCode.end(); ++itr) {
cout << "\t" << itr->first << ":" << itr->second << endl;
}
string strTest = "abaadcef";
cout << "Input Test String:" << strTest << endl;
string strHuffCode = huffman_code(strTest, mapHuffCode);
cout << "OutHuffmanCode:";
cout << strHuffCode << endl;
cout << "OutHuffmanDecode:" << huffman_decode(strHuffCode, huffTree) << endl;
return 0;
}
输出如下:
Input:a:45 b:13 c:12 d:16 e:9 f:5
Output Huffman Code:
a:0
b:101
c:100
d:111
e:1101
f:1100
Input Test String:abaadcef
OutHuffmanCode:01010011110011011100
OutHuffmanDecode:abaadcef