哈夫曼编码还能用哪种算法实现(哈夫曼编码压缩算法学习)
哈夫曼编码还能用哪种算法实现(哈夫曼编码压缩算法学习)6、当根节点唯一之后,开始进行编码,左分支记0,右分支记1,得到两个字典表格新节点的权重为两个字母的词频和,则新的是:{ node3:5 }4、继续取最小权重的两个来构造二叉树,小的放左边,大的放右边,那么我们这轮就应该取 C 和 node1新节点的权重为两个字母的词频和,则新的是:{ A:5 node2:5 }5、继续该过程:
1、比如说有一个字符串 ABCCAAACAD
2、先统计词频: { A:5 B:1 C:3 D:1}
3、取出最小词频 B D 作为二叉树的两个叶子节点,构造一个二叉树,然后塞回去
新节点的权重为两个字母的词频和,则新的是:{ A:5 C:3 node1:2 }
4、继续取最小权重的两个来构造二叉树,小的放左边,大的放右边,那么我们这轮就应该取 C 和 node1
新节点的权重为两个字母的词频和,则新的是:{ A:5 node2:5 }
5、继续该过程:
新节点的权重为两个字母的词频和,则新的是:{ node3:5 }
6、当根节点唯一之后,开始进行编码,左分支记0,右分支记1,得到两个字典表格
1:压缩表格 {B: "000" D: "001" C: "01" A: "1"}
2:解压表格 {1: "A" 000: "B" 001: "D" 01: "C"}
因为D和B词频一样,所以谁左谁右,并不一定,所以可能有的人得到的是 B=000 D=001 有的人得到的是 D=000 B=001
7、根据上边的表格1进行压缩:
ABCCAAACAD = 10000101111011001(b) 17bit位,也就是三个字节(3*8=24)
压缩前字节长度为:10,压缩后字节长度为 3 个字节:[1 11 217]
8、解密过程就简单了,直接对照表格2进行解密即可
1(A) 000(B) 01(C) 01(C) 1(A) 1(A) 1(A) 01(C) 1(A) 001(D)
下面 送上代码,保存成 html ,右键 chrome 打开,按下F12 可以看到效果。
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width initial-scale=1.0">
<title>哈夫曼编码学习</title>
</head>
<body>
请输入一个字符串:<br>
<textarea
style="width: 400px;height: 300px;">我塔夺苯基本三梅花三弄基本三仴仴人孺阿附顶替要琴个全仴dwefwefsgvzsdgrsgerhrehj23413241fsfeg</textarea><br>
<button onclick="doZip()">编码</button><br>
<script>
Array.prototype.first = function () {
if (this.length > 0) return this[0];
}
Array.prototype.take = function (len) {
var ret = [];
for (var c = 0; c < len; c ) ret.push(this[c]);
return ret;
}
Array.prototype.skip = function (len) {
var ret = [];
for (var c = len; c < this.length; c ) ret.push(this[c]);
return ret;
}
function doZip() {
let text = document.getElementsByTagName("textarea")[0].value;
console.log(0 text);
//字频统计
function GetWordRate() {
let wordRate = {};
for (let ch of text) {
if (!wordRate[ch]) wordRate[ch] = 1;
else wordRate[ch] ;
}
return wordRate;
}
//构造哈夫曼树
function MakeTree(wordRate) {
//var node = { ch:? left:? right:? weight:? }; 节点数据格式:字符,左节点,右节点,权重
let tree = [];
for (var k in wordRate) tree.push({ ch: k weight: wordRate[k] left: null right: null });
return _makeTree(tree);
function _makeTree(tree) {
if (!tree) return tree;
if (tree.length <= 1) return tree.first();
let _tree = [];
let _sortArr = tree.sort((a b) => a.weight - b.weight); //从小到大排序
let min1 = _sortArr.first(); //最小
let min2 = _sortArr.skip(1).first(); //第二小
let other = _sortArr.skip(2).take(_sortArr.length - 2);
let root = { ch: '' left: min1 right: min2 weight: (min1.weight min2.weight) };
_tree.push(root);
for (let c = 0; c < other.length; c ) _tree.push(other[c]);
if (_tree.length == 1) return _tree.first();
else return _makeTree(_tree);
}
}
/**
* code 就是上一级的编码
* */
function MakeTable(tree code) {
let retCompress = {};
let retUnCompress = {};
if (tree.ch != '') {
retCompress[tree.ch] = code;
retUnCompress[code] = tree.ch;
return { tbCompress:retCompress tbUnCompress:retUnCompress };
}
if (tree.left) {
let tb = MakeTable(tree.left code '0');
for(let k in tb.tbCompress) retCompress[k] = tb.tbCompress[k];
for(let k in tb.tbUnCompress) retUnCompress[k] = tb.tbUnCompress[k];
}
if (tree.right) {
let tb = MakeTable(tree.right code '1');
for(let k in tb.tbCompress) retCompress[k] = tb.tbCompress[k];
for(let k in tb.tbUnCompress) retUnCompress[k] = tb.tbUnCompress[k];
}
return { tbCompress:retCompress tbUnCompress:retUnCompress };
}
function main() {
let wordRate = GetWordRate(); //取字频
console.log(1 wordRate);
let tree = MakeTree(wordRate); //生成哈夫曼树
console.log(2 tree);
let { tbCompress tbUnCompress } = MakeTable(tree ""); //生成哈夫曼编码表
console.log(3 tbCompress tbUnCompress);
var oldStr = [];
for(var c=0;c<text.length;c ) oldStr.push(text.charCodeAt(c).toString(2));
var newStr = [];
var compressData = [];
for(var c=0;c<text.length;c ) {
newStr.push(tbCompress[text[c]]);
compressData.push(text[c]);
}
newStr = [...newStr.join("")];
console.log(4 newStr.join(""));
//把二进制字符串转换成真正的二进制
//8位对齐,整个字符串,前面补0
newStr = newStr.join("");
var n = 8 - newStr.length % 8;
if (n==1) newStr = "1" newStr;
else newStr = "".padStart(n-1 "0") "1" newStr;
console.log(4 newStr.length n newStr);
var data = [];
for(var c=0;c<newStr.length;c =8) {
var binStr = newStr.substring(c c 8);
data.push(parseInt(binStr 2));
}
console.log(5 data);
//把二进制恢复成二进制字符串
newStr = []
for(var d of data) {
var binStr = d.toString(2).padStart(8 '0');
newStr.push(binStr);
}
newStr = newStr.join("");
newStr = newStr.replace(/^0*1/g ''); //去掉最前边的0000 1
console.log(6 newStr);
var text2 = [];
var word = [];
for(var c=0;c<newStr.length;c ) {
word.push(newStr[c]);
if (word.join("") in tbUnCompress) {
text2.push(tbUnCompress[word.join("")]);
word = [];
}
}
console.log(5 text2.join(""));
}
main();
}
</script>
</body>
</html>
效果如下: