// Copyright (c) 2006-2009, Salvatore Sanfilippo // All rights reserved. // Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: // Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. // Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. // Neither the name of Smaz nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Our compression codebook var rc = [ " ", "the", "e", "t", "a", "of", "o", "and", "i", "n", "s", "e ", "r", " th", " t", "in", "he", "th", "h", "he ", "to", "\r\n", "l", "s ", "d", " a", "an","er", "c", " o", "d ", "on", " of", "re", "of ", "t ", ", ", "is", "u", "at", " ", "n ", "or", "which", "f", "m", "as", "it", "that", "\n", "was", "en", " ", " w", "es", " an", " i", "\r", "f ", "g", "p", "nd", " s", "nd ", "ed ", "w", "ed", "http://", "for", "te", "ing", "y ", "The", " c", "ti", "r ", "his", "st", " in", "ar", "nt", ",", " to", "y", "ng", " h", "with", "le", "al", "to ", "b", "ou", "be", "were", " b", "se", "o ", "ent", "ha", "ng ", "their", "\"", "hi", "from", " f", "in ", "de", "ion", "me", "v", ".", "ve", "all", "re ", "ri", "ro", "is ", "co", "f t", "are", "ea", ". ", "her", " m", "er ", " p", "es ", "by", "they", "di", "ra", "ic", "not", "s, ", "d t", "at ", "ce", "la", "h ", "ne", "as ", "tio", "on ", "n t", "io", "we", " a ", "om", ", a", "s o", "ur", "li", "ll", "ch", "had", "this", "e t", "g ", "e\r\n", " wh", "ere", " co", "e o", "a ", "us", " d", "ss", "\n\r\n", "\r\n\r", "=\"", " be", " e", "s a", "ma", "one", "t t", "or ", "but", "el", "so", "l ", "e s", "s,", "no", "ter", " wa", "iv", "ho", "e a", " r", "hat", "s t", "ns", "ch ", "wh", "tr", "ut", "/", "have", "ly ", "ta", " ha", " on", "tha", "-", " l", "ati", "en ", "pe", " re", "there", "ass", "si", " fo", "wa", "ec", "our", "who", "its", "z", "fo", "rs", ">", "ot", "un", "<", "im", "th ", "nc", "ate", "><", "ver", "ad", " we", "ly", "ee", " n", "id", " cl", "ac", "il", "", "=\'"]; var cb = rc.reduce(function(result, item, index, array) { result[item] = index; return result; }, {}) var smaz = module.exports = { codebook: cb, reverse_codebook: rc, flush_verbatim: function(verbatim) { var output = []; if (verbatim.length > 1) { output.push(255); output.push(verbatim.length-1); } else { output.push(254); } var k = 0; for (; k < verbatim.length; k++) { output.push(verbatim.charCodeAt(k)); } return output; }, compress: function(input) { var verbatim = ""; var output = []; var input_index = 0; while (input_index < input.length) { // Try to lookup substrings into the hash table, starting from the // longer to the shorter substrings var encoded = false; var j = 7; if (input.length-input_index < 7) { j = input.length-input_index; } for (; j > 0; j--) { var code = smaz.codebook[input.substr(input_index,j)]; if (code != undefined) { // Match found in the hash table, // Flush verbatim bytes if needed if (verbatim) { output = output.concat(smaz.flush_verbatim(verbatim)); verbatim = ""; } // Emit the byte output.push(code); input_index += j; encoded = true; break; } } if (!encoded) { // Match not found - add the byte to the verbatim buffer verbatim += input[input_index]; input_index++; // Flush if we reached the verbatim bytes length limit if (verbatim.length == 256) { output = output.concat(smaz.flush_verbatim(verbatim)); verbatim = ""; } } } // Flush verbatim bytes if needed if (verbatim) { output = output.concat(smaz.flush_verbatim(verbatim)); verbatim = ""; } return new Uint8Array(output); }, decompress: function(input) { var output = ""; var i = 0; while (i < input.length) { if (input[i] === 254) { // Verbatim byte if (i+1 >= input.length) { throw "Malformed smaz."; } output += String.fromCharCode(input[i+1]); i += 2; } else if (input[i] === 255) { // Verbatim string var j; if (i+input[i+1]+2 >= input.length) { throw "Malformed smaz."; } for (j = 0; j < input[i+1]+1; j++) { output += String.fromCharCode(input[i+2+j]); } i += 3+input[i+1]; } else { // Codebook entry output += smaz.reverse_codebook[input[i]]; i++; } } return output; } };