node-red-nodes/parsers/smaz/smaz.js

125 lines
6.3 KiB
JavaScript

// Copyright (c) 2006-2009, Salvatore Sanfilippo
// All rights reserved.
// Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
// Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
// Neither the name of Smaz nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Our compression codebook
var rc = [
" ", "the", "e", "t", "a", "of", "o", "and", "i", "n", "s", "e ", "r", " th", " t", "in", "he", "th", "h", "he ", "to", "\r\n", "l", "s ", "d", " a", "an","er", "c", " o", "d ", "on", " of", "re", "of ", "t ", ", ", "is", "u", "at", " ", "n ", "or", "which", "f", "m", "as", "it", "that", "\n", "was", "en", " ", " w", "es", " an", " i", "\r", "f ", "g", "p", "nd", " s", "nd ", "ed ", "w", "ed", "http://", "for", "te", "ing", "y ", "The", " c", "ti", "r ", "his", "st", " in", "ar", "nt", ",", " to", "y", "ng", " h", "with", "le", "al", "to ", "b", "ou", "be", "were", " b", "se", "o ", "ent", "ha", "ng ", "their", "\"", "hi", "from", " f", "in ", "de", "ion", "me", "v", ".", "ve", "all", "re ", "ri", "ro", "is ", "co", "f t", "are", "ea", ". ", "her", " m", "er ", " p", "es ", "by", "they", "di", "ra", "ic", "not", "s, ", "d t", "at ", "ce", "la", "h ", "ne", "as ", "tio", "on ", "n t", "io", "we", " a ", "om", ", a", "s o", "ur", "li", "ll", "ch", "had", "this", "e t", "g ", "e\r\n", " wh", "ere", " co", "e o", "a ", "us", " d", "ss", "\n\r\n", "\r\n\r", "=\"", " be", " e", "s a", "ma", "one", "t t", "or ", "but", "el", "so", "l ", "e s", "s,", "no", "ter", " wa", "iv", "ho", "e a", " r", "hat", "s t", "ns", "ch ", "wh", "tr", "ut", "/", "have", "ly ", "ta", " ha", " on", "tha", "-", " l", "ati", "en ", "pe", " re", "there", "ass", "si", " fo", "wa", "ec", "our", "who", "its", "z", "fo", "rs", ">", "ot", "un", "<", "im", "th ", "nc", "ate", "><", "ver", "ad", " we", "ly", "ee", " n", "id", " cl", "ac", "il", "</", "rt", " wi", "div", "e, ", " it", "whi", " ma", "ge", "x", "&#x0", "\'>", "=\'"];
var cb = rc.reduce(function(result, item, index, array) {
result[item] = index;
return result;
}, {})
var smaz = module.exports = {
codebook: cb,
reverse_codebook: rc,
flush_verbatim: function(verbatim) {
var output = [];
if (verbatim.length > 1) {
output.push(255);
output.push(verbatim.length-1);
} else {
output.push(254);
}
var k = 0;
for (; k < verbatim.length; k++) {
output.push(verbatim.charCodeAt(k));
}
return output;
},
compress: function(input) {
var verbatim = "";
var output = [];
var input_index = 0;
while (input_index < input.length) {
// Try to lookup substrings into the hash table, starting from the
// longer to the shorter substrings
var encoded = false;
var j = 7;
if (input.length-input_index < 7) {
j = input.length-input_index;
}
for (; j > 0; j--) {
var code = smaz.codebook[input.substr(input_index,j)];
if (code != undefined) {
// Match found in the hash table,
// Flush verbatim bytes if needed
if (verbatim) {
output = output.concat(smaz.flush_verbatim(verbatim));
verbatim = "";
}
// Emit the byte
output.push(code);
input_index += j;
encoded = true;
break;
}
}
if (!encoded) {
// Match not found - add the byte to the verbatim buffer
verbatim += input[input_index];
input_index++;
// Flush if we reached the verbatim bytes length limit
if (verbatim.length == 256) {
output = output.concat(smaz.flush_verbatim(verbatim));
verbatim = "";
}
}
}
// Flush verbatim bytes if needed
if (verbatim) {
output = output.concat(smaz.flush_verbatim(verbatim));
verbatim = "";
}
return new Uint8Array(output);
},
decompress: function(input) {
var output = "";
var i = 0;
while (i < input.length) {
if (input[i] === 254) {
// Verbatim byte
if (i+1 >= input.length) {
throw "Malformed smaz.";
}
output += String.fromCharCode(input[i+1]);
i += 2;
} else if (input[i] === 255) {
// Verbatim string
var j;
if (i+input[i+1]+2 >= input.length) {
throw "Malformed smaz.";
}
for (j = 0; j < input[i+1]+1; j++) {
output += String.fromCharCode(input[i+2+j]);
}
i += 3+input[i+1];
} else {
// Codebook entry
output += smaz.reverse_codebook[input[i]];
i++;
}
}
return output;
}
};