代码:
#include<iostream>
#include<fstream>
#define ASCLL_NUMBER 128
// 一个混杂了链表、树的结点结构定义
typedef char ElementType;
typedef int Frequency;
typedef char Code;
struct Element{
ElementType e;
Frequency f;
Element* next;
Element* left;
Element* right;
};
// 初始化结构数组
void initElements(Element** A, int n){
for(int i = 0; i < n; i++){
A[i] = new Element;
A[i]->e = 0;
A[i]->f = 0;
A[i]->next = NULL;
A[i]->left = NULL;
A[i]->right = NULL;
}
}
// 扫描并获取每种字符的频率, 以ASCLL码作为索引
void scanAndGetFrequency(char* s, Element** A, Frequency* fmax, long* count){
int i = 0;
for(char c = s[i]; c != '\0'; c = s[++i]){
(*count)++;
A[c]->e = c;
A[c]->f++;
if(*fmax < A[c]->f)
*fmax = A[c]->f;
}
}
// 找到一个大于传入数字1/2的质数
int Prime(int p){
p = p/2 + 1;
while(1){
int i = 0;
for(i = p/2; i >= 2; i--)
if(p%i == 0) break;
if(i == 1) return p;
p++;
}
}
// 向哈希表里插入元素, 相同索引依频率从小到大排序
void insert(Element** hash, int p, Element* ele){
if(ele->f == 0) return;
int n = ele->f % p;
while(hash[n]->next && hash[n]->next->f < ele->f)
hash[n] = hash[n]->next;
ele->next = hash[n]->next;
hash[n]->next = ele;
}
// 依频率排序, 为哈弗曼编码做准备
void sortWithFrequency(Element** A, int n, Element** hash, int p){
for(int i = 0; i < n; i++){
insert(hash, p, A[i]);
}
}
// 找到频率最小的元素
Element* minEle(Element** hash, int p, int* now, long count){
while(1){
Element* ele = hash[*now%p]->next;
if(!ele || ele->f > *now){
(*now)++;
if(*now > count) return NULL;
continue;
}else{
hash[*now%p]->next = ele->next;
return ele;
}
}
}
// 合并建树
void buildTree(Element** ele2, Element* tree){
tree->f = ele2[0]->f + ele2[1]->f;
tree->left = ele2[0];
tree->right = ele2[1];
}
// 哈弗曼编码
Element* HuffmanCode(Element** hash, int p, long count){
int now = 1;
while(1){
Element* ele2[2];
initElements(ele2, 2);
Element* tree = new Element;
tree->e = -1;
tree->f = 0;
tree->next = NULL;
tree->left = NULL;
tree->right = NULL;
for(int i = 0; i < 2; i++){
Element* ele = minEle(hash, p, &now, count);
if(ele)
ele2[i] = ele;
else
return ele2[0];
}
buildTree(ele2, tree);
insert(hash, p, tree);
}
}
// 显示编码结构
void showCode(Element* tree, char* code, int i){
if(!tree) return;
if(tree->left){
code[i] = '0';
showCode(tree->left, code, i+1);
code[i] = '1';
showCode(tree->right, code, i+1);
}else{
code[i] = '\0';
std::cout<<tree->e<<'\t'<<tree->f<<'\t'<<code<<'\n';
}
}
// 解码并显示
void decode(char* s, Element* tree){
if(!tree) return;
Element* t = tree;
int i = 0;
while(1){
char c = s[i];
if(t->left){
if(c == '0')
t = t->left;
if(c == '1')
t = t->right;
i++;
if((c = s[i]) == '\0'){
std::cout<<std::endl;
return;
}
}else{
std::cout<<t->e;
t = tree;
}
}
}
int main(){
// 读待编码文件, 将其存入到字符串s1中
std::ifstream q1;
int length;
q1.open("q1.txt");
q1.seekg(0, std::ios::end);
length = q1.tellg();
q1.seekg(0, std::ios::beg);
char s1[length];
q1.read(s1, length);
q1.close();
// 扫描s1, 获取每种字符的频率, 储存到数组A中
Element* A[ASCLL_NUMBER];
initElements(A, ASCLL_NUMBER);
int fmax = 0;
long count = 0;
scanAndGetFrequency(s1, A, &fmax, &count);
// 按频率排序
int p = Prime(fmax);
Element* hash[p];
initElements(hash, p);
sortWithFrequency(A, ASCLL_NUMBER, hash, p);
// 进行编码
Element* tree = HuffmanCode(hash, p, count);
// 输出编码结果
char code[ASCLL_NUMBER/2];
int i = 0;
showCode(tree, code, i);
// 读待解码文件, 将其存入到字符串s2中
std::ifstream q2;
q2.open("q2.txt");
q2.seekg(0, std::ios::end);
length = q2.tellg();
q2.seekg(0, std::ios::beg);
char s2[length];
q2.read(s2, length);
q2.close();
// 解码, 并输出结果
decode(s2, tree);
return 0;
}
测试文件:
原始文件,q1.txt
:
We could represent a priority queue as a sorted contiguous list, in which case removal of an entry is immediate, but insertion would take time proportional to n, the number of entries in the queue. Or we could represent it as an unsorted list, in which case insertion is rpid but removal is slow.
根据q1
编码结果,写出对and
的编码,q2.txt
:
111000011110
运行结果:
有任何问题请回复提出。然后欢迎关注微信公众号格物致愚: