From 64cba4939268a3fb518459ae4bec720430b37032 Mon Sep 17 00:00:00 2001 From: Holly Date: Fri, 12 Jul 2024 19:49:56 +0100 Subject: [PATCH] holy fuck it works --- mc/LibDeflate.lua | 3605 +++++++++++++++++++++++++++++++++++++++++ mc/bufferstuff.lua | 6 + mc/chunk.lua | 24 +- mc/server.lua | 139 +- theforwarder/index.ts | 3 +- 5 files changed, 3749 insertions(+), 28 deletions(-) create mode 100644 mc/LibDeflate.lua diff --git a/mc/LibDeflate.lua b/mc/LibDeflate.lua new file mode 100644 index 0000000..02b36ab --- /dev/null +++ b/mc/LibDeflate.lua @@ -0,0 +1,3605 @@ +--[[-- +LibDeflate 1.0.2-release
+Pure Lua compressor and decompressor with high compression ratio using +DEFLATE/zlib format. + +@file LibDeflate.lua +@author Haoqian He (Github: SafeteeWoW; World of Warcraft: Safetyy-Illidan(US)) +@copyright LibDeflate <2018-2021> Haoqian He +@license zlib License + +This library is implemented according to the following specifications.
+Report a bug if LibDeflate is not fully compliant with those specs.
+Both compressors and decompressors have been implemented in the library.
+1. RFC1950: DEFLATE Compressed Data Format Specification version 1.3
+https://tools.ietf.org/html/rfc1951
+2. RFC1951: ZLIB Compressed Data Format Specification version 3.3
+https://tools.ietf.org/html/rfc1950
+ +This library requires Lua 5.1/5.2/5.3/5.4 interpreter or LuaJIT v2.0+.
+This library does not have any dependencies.
+
+This file "LibDeflate.lua" is the only source file of +the library.
+Submit suggestions or report bugs to +https://github.com/safeteeWow/LibDeflate/issues +]] --[[ +zlib License + +(C) 2018-2021 Haoqian He + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +License History: +1. GNU General Public License Version 3 in v1.0.0 and earlier versions. +2. GNU Lesser General Public License Version 3 in v1.0.1 +3. the zlib License since v1.0.2 + +Credits and Disclaimer: +This library rewrites the code from the algorithm +and the ideas of the following projects, +and uses their code to help to test the correctness of this library, +but their code is not included directly in the library itself. +Their original licenses shall be comply when used. + +1. zlib, by Jean-loup Gailly (compression) and Mark Adler (decompression). + http://www.zlib.net/ + Licensed under zlib License. http://www.zlib.net/zlib_license.html + For the compression algorithm. +2. puff, by Mark Adler. https://github.com/madler/zlib/tree/master/contrib/puff + Licensed under zlib License. http://www.zlib.net/zlib_license.html + For the decompression algorithm. +3. LibCompress, by jjsheets and Galmok of European Stormrage (Horde) + https://www.wowace.com/projects/libcompress + Licensed under GPLv2. + https://www.gnu.org/licenses/old-licenses/gpl-2.0.html + For the code to create customized codec. +4. WeakAuras2, + https://github.com/WeakAuras/WeakAuras2 + Licensed under GPLv2. + For the 6bit encoding and decoding. +]] --[[ + Curseforge auto-packaging replacements: + + Project Date: @project-date-iso@ + Project Hash: @project-hash@ + Project Version: @project-version@ +--]] local LibDeflate + +do + -- Semantic version. all lowercase. + -- Suffix can be alpha1, alpha2, beta1, beta2, rc1, rc2, etc. + -- NOTE: Two version numbers needs to modify. + -- 1. On the top of LibDeflate.lua + -- 2. _VERSION + -- 3. _MINOR + + -- version to store the official version of LibDeflate + local _VERSION = "1.0.2-release" + + -- When MAJOR is changed, I should name it as LibDeflate2 + local _MAJOR = "LibDeflate" + + -- Update this whenever a new version, for LibStub version registration. + -- 0 : v0.x + -- 1 : v1.0.0 + -- 2 : v1.0.1 + -- 3 : v1.0.2 + local _MINOR = 3 + + local _COPYRIGHT = "LibDeflate " .. _VERSION .. + " Copyright (C) 2018-2021 Haoqian He." .. + " Licensed under the zlib License" + + -- Register in the World of Warcraft library "LibStub" if detected. + if LibStub then + local lib, minor = LibStub:GetLibrary(_MAJOR, true) + if lib and minor and minor >= _MINOR then -- No need to update. + return lib + else -- Update or first time register + LibDeflate = LibStub:NewLibrary(_MAJOR, _MINOR) + -- NOTE: It is important that new version has implemented + -- all exported APIs and tables in the old version, + -- so the old library is fully garbage collected, + -- and we 100% ensure the backward compatibility. + end + else -- "LibStub" is not detected. + LibDeflate = {} + end + + LibDeflate._VERSION = _VERSION + LibDeflate._MAJOR = _MAJOR + LibDeflate._MINOR = _MINOR + LibDeflate._COPYRIGHT = _COPYRIGHT +end + +-- localize Lua api for faster access. +local assert = assert +local error = error +local pairs = pairs +local string_byte = string.byte +local string_char = string.char +local string_find = string.find +local string_gsub = string.gsub +local string_sub = string.sub +local table_concat = table.concat +local table_sort = table.sort +local tostring = tostring +local type = type + +-- Converts i to 2^i, (0<=i<=32) +-- This is used to implement bit left shift and bit right shift. +-- "x >> y" in C: "(x-x%_pow2[y])/_pow2[y]" in Lua +-- "x << y" in C: "x*_pow2[y]" in Lua +local _pow2 = {} + +-- Converts any byte to a character, (0<=byte<=255) +local _byte_to_char = {} + +-- _reverseBitsTbl[len][val] stores the bit reverse of +-- the number with bit length "len" and value "val" +-- For example, decimal number 6 with bits length 5 is binary 00110 +-- It's reverse is binary 01100, +-- which is decimal 12 and 12 == _reverseBitsTbl[5][6] +-- 1<=len<=9, 0<=val<=2^len-1 +-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code +-- of a huffman alphabet is 9? +local _reverse_bits_tbl = {} + +-- Convert a LZ77 length (3<=len<=258) to +-- a deflate literal/LZ77_length code (257<=code<=285) +local _length_to_deflate_code = {} + +-- convert a LZ77 length (3<=len<=258) to +-- a deflate literal/LZ77_length code extra bits. +local _length_to_deflate_extra_bits = {} + +-- Convert a LZ77 length (3<=len<=258) to +-- a deflate literal/LZ77_length code extra bit length. +local _length_to_deflate_extra_bitlen = {} + +-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code. +local _dist256_to_deflate_code = {} + +-- Convert a small LZ77 distance (1<=dist<=256) to +-- a deflate distance code extra bits. +local _dist256_to_deflate_extra_bits = {} + +-- Convert a small LZ77 distance (1<=dist<=256) to +-- a deflate distance code extra bit length. +local _dist256_to_deflate_extra_bitlen = {} + +-- Convert a literal/LZ77_length deflate code to LZ77 base length +-- The key of the table is (code - 256), 257<=code<=285 +local _literal_deflate_code_to_base_len = + { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, + 83, 99, 115, 131, 163, 195, 227, 258 + } + +-- Convert a literal/LZ77_length deflate code to base LZ77 length extra bits +-- The key of the table is (code - 256), 257<=code<=285 +local _literal_deflate_code_to_extra_bitlen = + { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, + 5, 5, 5, 0 + } + +-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29) +local _dist_deflate_code_to_base_dist = { + [0] = 1, + 2, + 3, + 4, + 5, + 7, + 9, + 13, + 17, + 25, + 33, + 49, + 65, + 97, + 129, + 193, + 257, + 385, + 513, + 769, + 1025, + 1537, + 2049, + 3073, + 4097, + 6145, + 8193, + 12289, + 16385, + 24577 +} + +-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29) +local _dist_deflate_code_to_extra_bitlen = + { + [0] = 0, + 0, + 0, + 0, + 1, + 1, + 2, + 2, + 3, + 3, + 4, + 4, + 5, + 5, + 6, + 6, + 7, + 7, + 8, + 8, + 9, + 9, + 10, + 10, + 11, + 11, + 12, + 12, + 13, + 13 + } + +-- The code order of the first huffman header in the dynamic deflate block. +-- See the page 12 of RFC1951 +local _rle_codes_huffman_bitlen_order = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +} + +-- The following tables are used by fixed deflate block. +-- The value of these tables are assigned at the bottom of the source. + +-- The huffman code of the literal/LZ77_length deflate codes, +-- in fixed deflate block. +local _fix_block_literal_huffman_code + +-- Convert huffman code of the literal/LZ77_length to deflate codes, +-- in fixed deflate block. +local _fix_block_literal_huffman_to_deflate_code + +-- The bit length of the huffman code of literal/LZ77_length deflate codes, +-- in fixed deflate block. +local _fix_block_literal_huffman_bitlen + +-- The count of each bit length of the literal/LZ77_length deflate codes, +-- in fixed deflate block. +local _fix_block_literal_huffman_bitlen_count + +-- The huffman code of the distance deflate codes, +-- in fixed deflate block. +local _fix_block_dist_huffman_code + +-- Convert huffman code of the distance to deflate codes, +-- in fixed deflate block. +local _fix_block_dist_huffman_to_deflate_code + +-- The bit length of the huffman code of the distance deflate codes, +-- in fixed deflate block. +local _fix_block_dist_huffman_bitlen + +-- The count of each bit length of the huffman code of +-- the distance deflate codes, +-- in fixed deflate block. +local _fix_block_dist_huffman_bitlen_count + +for i = 0, 255 do _byte_to_char[i] = string_char(i) end + +do + local pow = 1 + for i = 0, 32 do + _pow2[i] = pow + pow = pow * 2 + end +end + +for i = 1, 9 do + _reverse_bits_tbl[i] = {} + for j = 0, _pow2[i + 1] - 1 do + local reverse = 0 + local value = j + for _ = 1, i do + -- The following line is equivalent to "res | (code %2)" in C. + reverse = reverse - reverse % 2 + + (((reverse % 2 == 1) or (value % 2) == 1) and 1 or 0) + value = (value - value % 2) / 2 + reverse = reverse * 2 + end + _reverse_bits_tbl[i][j] = (reverse - reverse % 2) / 2 + end +end + +-- The source code is written according to the pattern in the numbers +-- in RFC1951 Page10. +do + local a = 18 + local b = 16 + local c = 265 + local bitlen = 1 + for len = 3, 258 do + if len <= 10 then + _length_to_deflate_code[len] = len + 254 + _length_to_deflate_extra_bitlen[len] = 0 + elseif len == 258 then + _length_to_deflate_code[len] = 285 + _length_to_deflate_extra_bitlen[len] = 0 + else + if len > a then + a = a + b + b = b * 2 + c = c + 4 + bitlen = bitlen + 1 + end + local t = len - a - 1 + b / 2 + _length_to_deflate_code[len] = (t - (t % (b / 8))) / (b / 8) + c + _length_to_deflate_extra_bitlen[len] = bitlen + _length_to_deflate_extra_bits[len] = t % (b / 8) + end + end +end + +-- The source code is written according to the pattern in the numbers +-- in RFC1951 Page11. +do + _dist256_to_deflate_code[1] = 0 + _dist256_to_deflate_code[2] = 1 + _dist256_to_deflate_extra_bitlen[1] = 0 + _dist256_to_deflate_extra_bitlen[2] = 0 + + local a = 3 + local b = 4 + local code = 2 + local bitlen = 0 + for dist = 3, 256 do + if dist > b then + a = a * 2 + b = b * 2 + code = code + 2 + bitlen = bitlen + 1 + end + _dist256_to_deflate_code[dist] = (dist <= a) and code or (code + 1) + _dist256_to_deflate_extra_bitlen[dist] = (bitlen < 0) and 0 or bitlen + if b >= 8 then + _dist256_to_deflate_extra_bits[dist] = (dist - b / 2 - 1) % (b / 4) + end + end +end + +--- Calculate the Adler-32 checksum of the string.
+-- See RFC1950 Page 9 https://tools.ietf.org/html/rfc1950 for the +-- definition of Adler-32 checksum. +-- @param str [string] the input string to calcuate its Adler-32 checksum. +-- @return [integer] The Adler-32 checksum, which is greater or equal to 0, +-- and less than 2^32 (4294967296). +function LibDeflate:Adler32(str) + -- This function is loop unrolled by better performance. + -- + -- Here is the minimum code: + -- + -- local a = 1 + -- local b = 0 + -- for i=1, #str do + -- local s = string.byte(str, i, i) + -- a = (a+s)%65521 + -- b = (b+a)%65521 + -- end + -- return b*65536+a + if type(str) ~= "string" then + error(("Usage: LibDeflate:Adler32(str):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + local strlen = #str + + local i = 1 + local a = 1 + local b = 0 + while i <= strlen - 15 do + local x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16 = + string_byte(str, i, i + 15) + b = + (b + 16 * a + 16 * x1 + 15 * x2 + 14 * x3 + 13 * x4 + 12 * x5 + 11 * x6 + + 10 * x7 + 9 * x8 + 8 * x9 + 7 * x10 + 6 * x11 + 5 * x12 + 4 * x13 + 3 * + x14 + 2 * x15 + x16) % 65521 + a = + (a + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + + x14 + x15 + x16) % 65521 + i = i + 16 + end + while (i <= strlen) do + local x = string_byte(str, i, i) + a = (a + x) % 65521 + b = (b + a) % 65521 + i = i + 1 + end + return (b * 65536 + a) % 4294967296 +end + +-- Compare adler32 checksum. +-- adler32 should be compared with a mod to avoid sign problem +-- 4072834167 (unsigned) is the same adler32 as -222133129 +local function IsEqualAdler32(actual, expected) + return (actual % 4294967296) == (expected % 4294967296) +end + +--- Create a preset dictionary. +-- +-- This function is not fast, and the memory consumption of the produced +-- dictionary is about 50 times of the input string. Therefore, it is suggestted +-- to run this function only once in your program. +-- +-- It is very important to know that if you do use a preset dictionary, +-- compressors and decompressors MUST USE THE SAME dictionary. That is, +-- dictionary must be created using the same string. If you update your program +-- with a new dictionary, people with the old version won't be able to transmit +-- data with people with the new version. Therefore, changing the dictionary +-- must be very careful. +-- +-- The parameters "strlen" and "adler32" add a layer of verification to ensure +-- the parameter "str" is not modified unintentionally during the program +-- development. +-- +-- @usage local dict_str = "1234567890" +-- +-- -- print(dict_str:len(), LibDeflate:Adler32(dict_str)) +-- -- Hardcode the print result below to verify it to avoid acciently +-- -- modification of 'str' during the program development. +-- -- string length: 10, Adler-32: 187433486, +-- -- Don't calculate string length and its Adler-32 at run-time. +-- +-- local dict = LibDeflate:CreateDictionary(dict_str, 10, 187433486) +-- +-- @param str [string] The string used as the preset dictionary.
+-- You should put stuffs that frequently appears in the dictionary +-- string and preferablely put more frequently appeared stuffs toward the end +-- of the string.
+-- Empty string and string longer than 32768 bytes are not allowed. +-- @param strlen [integer] The length of 'str'. Please pass in this parameter +-- as a hardcoded constant, in order to verify the content of 'str'. The value +-- of this parameter should be known before your program runs. +-- @param adler32 [integer] The Adler-32 checksum of 'str'. Please pass in this +-- parameter as a hardcoded constant, in order to verify the content of 'str'. +-- The value of this parameter should be known before your program runs. +-- @return [table] The dictionary used for preset dictionary compression and +-- decompression. +-- @raise error if 'strlen' does not match the length of 'str', +-- or if 'adler32' does not match the Adler-32 checksum of 'str'. +function LibDeflate:CreateDictionary(str, strlen, adler32) + if type(str) ~= "string" then + error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + if type(strlen) ~= "number" then + error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" .. + " 'strlen' - number expected got '%s'."):format(type(strlen)), 2) + end + if type(adler32) ~= "number" then + error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" .. + " 'adler32' - number expected got '%s'."):format(type(adler32)), 2) + end + if strlen ~= #str then + error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" .. + " 'strlen' does not match the actual length of 'str'." .. + " 'strlen': %u, '#str': %u ." .. + " Please check if 'str' is modified unintentionally."):format( + strlen, #str)) + end + if strlen == 0 then + error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" .. + " 'str' - Empty string is not allowed."), 2) + end + if strlen > 32768 then + error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" .. + " 'str' - string longer than 32768 bytes is not allowed." .. + " Got %d bytes."):format(strlen), 2) + end + local actual_adler32 = self:Adler32(str) + if not IsEqualAdler32(adler32, actual_adler32) then + error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" .. + " 'adler32' does not match the actual adler32 of 'str'." .. + " 'adler32': %u, 'Adler32(str)': %u ." .. + " Please check if 'str' is modified unintentionally."):format( + adler32, actual_adler32)) + end + + local dictionary = {} + dictionary.adler32 = adler32 + dictionary.hash_tables = {} + dictionary.string_table = {} + dictionary.strlen = strlen + local string_table = dictionary.string_table + local hash_tables = dictionary.hash_tables + string_table[1] = string_byte(str, 1, 1) + string_table[2] = string_byte(str, 2, 2) + if strlen >= 3 then + local i = 1 + local hash = string_table[1] * 256 + string_table[2] + while i <= strlen - 2 - 3 do + local x1, x2, x3, x4 = string_byte(str, i + 2, i + 5) + string_table[i + 2] = x1 + string_table[i + 3] = x2 + string_table[i + 4] = x3 + string_table[i + 5] = x4 + hash = (hash * 256 + x1) % 16777216 + local t = hash_tables[hash] + if not t then + t = {}; + hash_tables[hash] = t + end + t[#t + 1] = i - strlen + i = i + 1 + hash = (hash * 256 + x2) % 16777216 + t = hash_tables[hash] + if not t then + t = {}; + hash_tables[hash] = t + end + t[#t + 1] = i - strlen + i = i + 1 + hash = (hash * 256 + x3) % 16777216 + t = hash_tables[hash] + if not t then + t = {}; + hash_tables[hash] = t + end + t[#t + 1] = i - strlen + i = i + 1 + hash = (hash * 256 + x4) % 16777216 + t = hash_tables[hash] + if not t then + t = {}; + hash_tables[hash] = t + end + t[#t + 1] = i - strlen + i = i + 1 + end + while i <= strlen - 2 do + local x = string_byte(str, i + 2) + string_table[i + 2] = x + hash = (hash * 256 + x) % 16777216 + local t = hash_tables[hash] + if not t then + t = {}; + hash_tables[hash] = t + end + t[#t + 1] = i - strlen + i = i + 1 + end + end + return dictionary +end + +-- Check if the dictionary is valid. +-- @param dictionary The preset dictionary for compression and decompression. +-- @return true if valid, false if not valid. +-- @return if not valid, the error message. +local function IsValidDictionary(dictionary) + if type(dictionary) ~= "table" then + return false, + ("'dictionary' - table expected got '%s'."):format(type(dictionary)) + end + if type(dictionary.adler32) ~= "number" or type(dictionary.string_table) ~= + "table" or type(dictionary.strlen) ~= "number" or dictionary.strlen <= 0 or + dictionary.strlen > 32768 or dictionary.strlen ~= #dictionary.string_table or + type(dictionary.hash_tables) ~= "table" then + return false, + ("'dictionary' - corrupted dictionary."):format(type(dictionary)) + end + return true, "" +end + +--[[ + key of the configuration table is the compression level, + and its value stores the compression setting. + These numbers come from zlib source code. + + Higher compression level usually means better compression. + (Because LibDeflate uses a simplified version of zlib algorithm, + there is no guarantee that higher compression level does not create + bigger file than lower level, but I can say it's 99% likely) + + Be careful with the high compression level. This is a pure lua + implementation compressor/decompressor, which is significant slower than + a C/C++ equivalant compressor/decompressor. Very high compression level + costs significant more CPU time, and usually compression size won't be + significant smaller when you increase compression level by 1, when the + level is already very high. Benchmark yourself if you can afford it. + + See also https://github.com/madler/zlib/blob/master/doc/algorithm.txt, + https://github.com/madler/zlib/blob/master/deflate.c for more information. + + The meaning of each field: + @field 1 use_lazy_evaluation: + true/false. Whether the program uses lazy evaluation. + See what is "lazy evaluation" in the link above. + lazy_evaluation improves ratio, but relatively slow. + @field 2 good_prev_length: + Only effective if lazy is set, Only use 1/4 of max_chain, + if prev length of lazy match is above this. + @field 3 max_insert_length/max_lazy_match: + If not using lazy evaluation, + insert new strings in the hash table only if the match length is not + greater than this length. + If using lazy evaluation, only continue lazy evaluation, + if previous match length is strictly smaller than this value. + @field 4 nice_length: + Number. Don't continue to go down the hash chain, + if match length is above this. + @field 5 max_chain: + Number. The maximum number of hash chains we look. +--]] +local _compression_level_configs = { + [0] = {false, nil, 0, 0, 0}, -- level 0, no compression + [1] = {false, nil, 4, 8, 4}, -- level 1, similar to zlib level 1 + [2] = {false, nil, 5, 18, 8}, -- level 2, similar to zlib level 2 + [3] = {false, nil, 6, 32, 32}, -- level 3, similar to zlib level 3 + [4] = {true, 4, 4, 16, 16}, -- level 4, similar to zlib level 4 + [5] = {true, 8, 16, 32, 32}, -- level 5, similar to zlib level 5 + [6] = {true, 8, 16, 128, 128}, -- level 6, similar to zlib level 6 + [7] = {true, 8, 32, 128, 256}, -- (SLOW) level 7, similar to zlib level 7 + [8] = {true, 32, 128, 258, 1024}, -- (SLOW) level 8,similar to zlib level 8 + [9] = {true, 32, 258, 258, 4096} + -- (VERY SLOW) level 9, similar to zlib level 9 +} + +-- Check if the compression/decompression arguments is valid +-- @param str The input string. +-- @param check_dictionary if true, check if dictionary is valid. +-- @param dictionary The preset dictionary for compression and decompression. +-- @param check_configs if true, check if config is valid. +-- @param configs The compression configuration table +-- @return true if valid, false if not valid. +-- @return if not valid, the error message. +local function IsValidArguments(str, check_dictionary, dictionary, + check_configs, configs) + + if type(str) ~= "string" then + return false, ("'str' - string expected got '%s'."):format(type(str)) + end + if check_dictionary then + local dict_valid, dict_err = IsValidDictionary(dictionary) + if not dict_valid then return false, dict_err end + end + if check_configs then + local type_configs = type(configs) + if type_configs ~= "nil" and type_configs ~= "table" then + return false, ("'configs' - nil or table expected got '%s'."):format( + type(configs)) + end + if type_configs == "table" then + for k, v in pairs(configs) do + if k ~= "level" and k ~= "strategy" then + return false, + ("'configs' - unsupported table key in the configs: '%s'."):format( + k) + elseif k == "level" and not _compression_level_configs[v] then + return false, + ("'configs' - unsupported 'level': %s."):format(tostring(v)) + elseif k == "strategy" and v ~= "fixed" and v ~= "huffman_only" and v ~= + "dynamic" then + -- random_block_type is for testing purpose + return false, ("'configs' - unsupported 'strategy': '%s'."):format( + tostring(v)) + end + end + end + end + return true, "" +end + +--[[ -------------------------------------------------------------------------- + Compress code +--]] -------------------------------------------------------------------------- + +-- partial flush to save memory +local _FLUSH_MODE_MEMORY_CLEANUP = 0 +-- full flush with partial bytes +local _FLUSH_MODE_OUTPUT = 1 +-- write bytes to get to byte boundary +local _FLUSH_MODE_BYTE_BOUNDARY = 2 +-- no flush, just get num of bits written so far +local _FLUSH_MODE_NO_FLUSH = 3 + +--[[ + Create an empty writer to easily write stuffs as the unit of bits. + Return values: + 1. WriteBits(code, bitlen): + 2. WriteString(str): + 3. Flush(mode): +--]] +local function CreateWriter() + local buffer_size = 0 + local cache = 0 + local cache_bitlen = 0 + local total_bitlen = 0 + local buffer = {} + -- When buffer is big enough, flush into result_buffer to save memory. + local result_buffer = {} + + -- Write bits with value "value" and bit length of "bitlen" into writer. + -- @param value: The value being written + -- @param bitlen: The bit length of "value" + -- @return nil + local function WriteBits(value, bitlen) + cache = cache + value * _pow2[cache_bitlen] + cache_bitlen = cache_bitlen + bitlen + total_bitlen = total_bitlen + bitlen + -- Only bulk to buffer every 4 bytes. This is quicker. + if cache_bitlen >= 32 then + buffer_size = buffer_size + 1 + buffer[buffer_size] = _byte_to_char[cache % 256] .. + _byte_to_char[((cache - cache % 256) / 256 % 256)] .. + _byte_to_char[((cache - cache % 65536) / 65536 % + 256)] .. + _byte_to_char[((cache - cache % 16777216) / + 16777216 % 256)] + local rshift_mask = _pow2[32 - cache_bitlen + bitlen] + cache = (value - value % rshift_mask) / rshift_mask + cache_bitlen = cache_bitlen - 32 + end + end + + -- Write the entire string into the writer. + -- @param str The string being written + -- @return nil + local function WriteString(str) + for _ = 1, cache_bitlen, 8 do + buffer_size = buffer_size + 1 + buffer[buffer_size] = string_char(cache % 256) + cache = (cache - cache % 256) / 256 + end + cache_bitlen = 0 + buffer_size = buffer_size + 1 + buffer[buffer_size] = str + total_bitlen = total_bitlen + #str * 8 + end + + -- Flush current stuffs in the writer and return it. + -- This operation will free most of the memory. + -- @param mode See the descrtion of the constant and the source code. + -- @return The total number of bits stored in the writer right now. + -- for byte boundary mode, it includes the padding bits. + -- for output mode, it does not include padding bits. + -- @return Return the outputs if mode is output. + local function FlushWriter(mode) + if mode == _FLUSH_MODE_NO_FLUSH then return total_bitlen end + + if mode == _FLUSH_MODE_OUTPUT or mode == _FLUSH_MODE_BYTE_BOUNDARY then + -- Full flush, also output cache. + -- Need to pad some bits if cache_bitlen is not multiple of 8. + local padding_bitlen = (8 - cache_bitlen % 8) % 8 + + if cache_bitlen > 0 then + -- padding with all 1 bits, mainly because "\000" is not + -- good to be tranmitted. I do this so "\000" is a little bit + -- less frequent. + cache = cache - _pow2[cache_bitlen] + + _pow2[cache_bitlen + padding_bitlen] + for _ = 1, cache_bitlen, 8 do + buffer_size = buffer_size + 1 + buffer[buffer_size] = _byte_to_char[cache % 256] + cache = (cache - cache % 256) / 256 + end + + cache = 0 + cache_bitlen = 0 + end + if mode == _FLUSH_MODE_BYTE_BOUNDARY then + total_bitlen = total_bitlen + padding_bitlen + return total_bitlen + end + end + + local flushed = table_concat(buffer) + buffer = {} + buffer_size = 0 + result_buffer[#result_buffer + 1] = flushed + + if mode == _FLUSH_MODE_MEMORY_CLEANUP then + return total_bitlen + else + return total_bitlen, table_concat(result_buffer) + end + end + + return WriteBits, WriteString, FlushWriter +end + +-- Push an element into a max heap +-- @param heap A max heap whose max element is at index 1. +-- @param e The element to be pushed. Assume element "e" is a table +-- and comparison is done via its first entry e[1] +-- @param heap_size current number of elements in the heap. +-- NOTE: There may be some garbage stored in +-- heap[heap_size+1], heap[heap_size+2], etc.. +-- @return nil +local function MinHeapPush(heap, e, heap_size) + heap_size = heap_size + 1 + heap[heap_size] = e + local value = e[1] + local pos = heap_size + local parent_pos = (pos - pos % 2) / 2 + + while (parent_pos >= 1 and heap[parent_pos][1] > value) do + local t = heap[parent_pos] + heap[parent_pos] = e + heap[pos] = t + pos = parent_pos + parent_pos = (parent_pos - parent_pos % 2) / 2 + end +end + +-- Pop an element from a max heap +-- @param heap A max heap whose max element is at index 1. +-- @param heap_size current number of elements in the heap. +-- @return the poped element +-- Note: This function does not change table size of "heap" to save CPU time. +local function MinHeapPop(heap, heap_size) + local top = heap[1] + local e = heap[heap_size] + local value = e[1] + heap[1] = e + heap[heap_size] = top + heap_size = heap_size - 1 + + local pos = 1 + local left_child_pos = pos * 2 + local right_child_pos = left_child_pos + 1 + + while (left_child_pos <= heap_size) do + local left_child = heap[left_child_pos] + if (right_child_pos <= heap_size and heap[right_child_pos][1] < + left_child[1]) then + local right_child = heap[right_child_pos] + if right_child[1] < value then + heap[right_child_pos] = e + heap[pos] = right_child + pos = right_child_pos + left_child_pos = pos * 2 + right_child_pos = left_child_pos + 1 + else + break + end + else + if left_child[1] < value then + heap[left_child_pos] = e + heap[pos] = left_child + pos = left_child_pos + left_child_pos = pos * 2 + right_child_pos = left_child_pos + 1 + else + break + end + end + end + + return top +end + +-- Deflate defines a special huffman tree, which is unique once the bit length +-- of huffman code of all symbols are known. +-- @param bitlen_count Number of symbols with a specific bitlen +-- @param symbol_bitlen The bit length of a symbol +-- @param max_symbol The max symbol among all symbols, +-- which is (number of symbols - 1) +-- @param max_bitlen The max huffman bit length among all symbols. +-- @return The huffman code of all symbols. +local function GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens, + max_symbol, max_bitlen) + local huffman_code = 0 + local next_codes = {} + local symbol_huffman_codes = {} + for bitlen = 1, max_bitlen do + huffman_code = (huffman_code + (bitlen_counts[bitlen - 1] or 0)) * 2 + next_codes[bitlen] = huffman_code + end + for symbol = 0, max_symbol do + local bitlen = symbol_bitlens[symbol] + if bitlen then + huffman_code = next_codes[bitlen] + next_codes[bitlen] = huffman_code + 1 + + -- Reverse the bits of huffman code, + -- because most signifant bits of huffman code + -- is stored first into the compressed data. + -- @see RFC1951 Page5 Section 3.1.1 + if bitlen <= 9 then -- Have cached reverse for small bitlen. + symbol_huffman_codes[symbol] = _reverse_bits_tbl[bitlen][huffman_code] + else + local reverse = 0 + for _ = 1, bitlen do + reverse = reverse - reverse % 2 + + (((reverse % 2 == 1) or (huffman_code % 2) == 1) and 1 or + 0) + huffman_code = (huffman_code - huffman_code % 2) / 2 + reverse = reverse * 2 + end + symbol_huffman_codes[symbol] = (reverse - reverse % 2) / 2 + end + end + end + return symbol_huffman_codes +end + +-- A helper function to sort heap elements +-- a[1], b[1] is the huffman frequency +-- a[2], b[2] is the symbol value. +local function SortByFirstThenSecond(a, b) + return a[1] < b[1] or (a[1] == b[1] and a[2] < b[2]) +end + +-- Calculate the huffman bit length and huffman code. +-- @param symbol_count: A table whose table key is the symbol, and table value +-- is the symbol frenquency (nil means 0 frequency). +-- @param max_bitlen: See description of return value. +-- @param max_symbol: The maximum symbol +-- @return a table whose key is the symbol, and the value is the huffman bit +-- bit length. We guarantee that all bit length <= max_bitlen. +-- For 0<=symbol<=max_symbol, table value could be nil if the frequency +-- of the symbol is 0 or nil. +-- @return a table whose key is the symbol, and the value is the huffman code. +-- @return a number indicating the maximum symbol whose bitlen is not 0. +local function GetHuffmanBitlenAndCode(symbol_counts, max_bitlen, max_symbol) + local heap_size + local max_non_zero_bitlen_symbol = -1 + local leafs = {} + local heap = {} + local symbol_bitlens = {} + local symbol_codes = {} + local bitlen_counts = {} + + --[[ + tree[1]: weight, temporarily used as parent and bitLengths + tree[2]: symbol + tree[3]: left child + tree[4]: right child + --]] + local number_unique_symbols = 0 + for symbol, count in pairs(symbol_counts) do + number_unique_symbols = number_unique_symbols + 1 + leafs[number_unique_symbols] = {count, symbol} + end + + if (number_unique_symbols == 0) then + -- no code. + return {}, {}, -1 + elseif (number_unique_symbols == 1) then + -- Only one code. In this case, its huffman code + -- needs to be assigned as 0, and bit length is 1. + -- This is the only case that the return result + -- represents an imcomplete huffman tree. + local symbol = leafs[1][2] + symbol_bitlens[symbol] = 1 + symbol_codes[symbol] = 0 + return symbol_bitlens, symbol_codes, symbol + else + table_sort(leafs, SortByFirstThenSecond) + heap_size = number_unique_symbols + for i = 1, heap_size do heap[i] = leafs[i] end + + while (heap_size > 1) do + -- Note: pop does not change table size of heap + local leftChild = MinHeapPop(heap, heap_size) + heap_size = heap_size - 1 + local rightChild = MinHeapPop(heap, heap_size) + heap_size = heap_size - 1 + local newNode = {leftChild[1] + rightChild[1], -1, leftChild, rightChild} + MinHeapPush(heap, newNode, heap_size) + heap_size = heap_size + 1 + end + + -- Number of leafs whose bit length is greater than max_len. + local number_bitlen_overflow = 0 + + -- Calculate bit length of all nodes + local fifo = {heap[1], 0, 0, 0} -- preallocate some spaces. + local fifo_size = 1 + local index = 1 + heap[1][1] = 0 + while (index <= fifo_size) do -- Breath first search + local e = fifo[index] + local bitlen = e[1] + local symbol = e[2] + local left_child = e[3] + local right_child = e[4] + if left_child then + fifo_size = fifo_size + 1 + fifo[fifo_size] = left_child + left_child[1] = bitlen + 1 + end + if right_child then + fifo_size = fifo_size + 1 + fifo[fifo_size] = right_child + right_child[1] = bitlen + 1 + end + index = index + 1 + + if (bitlen > max_bitlen) then + number_bitlen_overflow = number_bitlen_overflow + 1 + bitlen = max_bitlen + end + if symbol >= 0 then + symbol_bitlens[symbol] = bitlen + max_non_zero_bitlen_symbol = (symbol > max_non_zero_bitlen_symbol) and + symbol or max_non_zero_bitlen_symbol + bitlen_counts[bitlen] = (bitlen_counts[bitlen] or 0) + 1 + end + end + + -- Resolve bit length overflow + -- @see ZLib/trees.c:gen_bitlen(s, desc), for reference + if (number_bitlen_overflow > 0) then + repeat + local bitlen = max_bitlen - 1 + while ((bitlen_counts[bitlen] or 0) == 0) do bitlen = bitlen - 1 end + -- move one leaf down the tree + bitlen_counts[bitlen] = bitlen_counts[bitlen] - 1 + -- move one overflow item as its brother + bitlen_counts[bitlen + 1] = (bitlen_counts[bitlen + 1] or 0) + 2 + bitlen_counts[max_bitlen] = bitlen_counts[max_bitlen] - 1 + number_bitlen_overflow = number_bitlen_overflow - 2 + until (number_bitlen_overflow <= 0) + + index = 1 + for bitlen = max_bitlen, 1, -1 do + local n = bitlen_counts[bitlen] or 0 + while (n > 0) do + local symbol = leafs[index][2] + symbol_bitlens[symbol] = bitlen + n = n - 1 + index = index + 1 + end + end + end + + symbol_codes = GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens, + max_symbol, max_bitlen) + return symbol_bitlens, symbol_codes, max_non_zero_bitlen_symbol + end +end + +-- Calculate the first huffman header in the dynamic huffman block +-- @see RFC1951 Page 12 +-- @param lcode_bitlen: The huffman bit length of literal/LZ77_length. +-- @param max_non_zero_bitlen_lcode: The maximum literal/LZ77_length symbol +-- whose huffman bit length is not zero. +-- @param dcode_bitlen: The huffman bit length of LZ77 distance. +-- @param max_non_zero_bitlen_dcode: The maximum LZ77 distance symbol +-- whose huffman bit length is not zero. +-- @return The run length encoded codes. +-- @return The extra bits. One entry for each rle code that needs extra bits. +-- (code == 16 or 17 or 18). +-- @return The count of appearance of each rle codes. +local function RunLengthEncodeHuffmanBitlen(lcode_bitlens, + max_non_zero_bitlen_lcode, + dcode_bitlens, + max_non_zero_bitlen_dcode) + local rle_code_tblsize = 0 + local rle_codes = {} + local rle_code_counts = {} + local rle_extra_bits_tblsize = 0 + local rle_extra_bits = {} + local prev = nil + local count = 0 + + -- If there is no distance code, assume one distance code of bit length 0. + -- RFC1951: One distance code of zero bits means that + -- there are no distance codes used at all (the data is all literals). + max_non_zero_bitlen_dcode = (max_non_zero_bitlen_dcode < 0) and 0 or + max_non_zero_bitlen_dcode + local max_code = max_non_zero_bitlen_lcode + max_non_zero_bitlen_dcode + 1 + + for code = 0, max_code + 1 do + local len = (code <= max_non_zero_bitlen_lcode) and + (lcode_bitlens[code] or 0) or ((code <= max_code) and + (dcode_bitlens[code - max_non_zero_bitlen_lcode - 1] or 0) or + nil) + if len == prev then + count = count + 1 + if len ~= 0 and count == 6 then + rle_code_tblsize = rle_code_tblsize + 1 + rle_codes[rle_code_tblsize] = 16 + rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1 + rle_extra_bits[rle_extra_bits_tblsize] = 3 + rle_code_counts[16] = (rle_code_counts[16] or 0) + 1 + count = 0 + elseif len == 0 and count == 138 then + rle_code_tblsize = rle_code_tblsize + 1 + rle_codes[rle_code_tblsize] = 18 + rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1 + rle_extra_bits[rle_extra_bits_tblsize] = 127 + rle_code_counts[18] = (rle_code_counts[18] or 0) + 1 + count = 0 + end + else + if count == 1 then + rle_code_tblsize = rle_code_tblsize + 1 + rle_codes[rle_code_tblsize] = prev + rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 1 + elseif count == 2 then + rle_code_tblsize = rle_code_tblsize + 1 + rle_codes[rle_code_tblsize] = prev + rle_code_tblsize = rle_code_tblsize + 1 + rle_codes[rle_code_tblsize] = prev + rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 2 + elseif count >= 3 then + rle_code_tblsize = rle_code_tblsize + 1 + local rleCode = (prev ~= 0) and 16 or (count <= 10 and 17 or 18) + rle_codes[rle_code_tblsize] = rleCode + rle_code_counts[rleCode] = (rle_code_counts[rleCode] or 0) + 1 + rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1 + rle_extra_bits[rle_extra_bits_tblsize] = + (count <= 10) and (count - 3) or (count - 11) + end + + prev = len + if len and len ~= 0 then + rle_code_tblsize = rle_code_tblsize + 1 + rle_codes[rle_code_tblsize] = len + rle_code_counts[len] = (rle_code_counts[len] or 0) + 1 + count = 0 + else + count = 1 + end + end + end + + return rle_codes, rle_extra_bits, rle_code_counts +end + +-- Load the string into a table, in order to speed up LZ77. +-- Loop unrolled 16 times to speed this function up. +-- @param str The string to be loaded. +-- @param t The load destination +-- @param start str[index] will be the first character to be loaded. +-- @param end str[index] will be the last character to be loaded +-- @param offset str[index] will be loaded into t[index-offset] +-- @return t +local function LoadStringToTable(str, t, start, stop, offset) + local i = start - offset + while i <= stop - 15 - offset do + t[i], t[i + 1], t[i + 2], t[i + 3], t[i + 4], t[i + 5], t[i + 6], t[i + 7], t[i + + 8], t[i + 9], t[i + 10], t[i + 11], t[i + 12], t[i + 13], t[i + 14], t[i + + 15] = string_byte(str, i + offset, i + 15 + offset) + i = i + 16 + end + while (i <= stop - offset) do + t[i] = string_byte(str, i + offset, i + offset) + i = i + 1 + end + return t +end + +-- Do LZ77 process. This function uses the majority of the CPU time. +-- @see zlib/deflate.c:deflate_fast(), zlib/deflate.c:deflate_slow() +-- @see https://github.com/madler/zlib/blob/master/doc/algorithm.txt +-- This function uses the algorithms used above. You should read the +-- algorithm.txt above to understand what is the hash function and the +-- lazy evaluation. +-- +-- The special optimization used here is hash functions used here. +-- The hash function is just the multiplication of the three consective +-- characters. So if the hash matches, it guarantees 3 characters are matched. +-- This optimization can be implemented because Lua table is a hash table. +-- +-- @param level integer that describes compression level. +-- @param string_table table that stores the value of string to be compressed. +-- The index of this table starts from 1. +-- The caller needs to make sure all values needed by this function +-- are loaded. +-- Assume "str" is the origin input string into the compressor +-- str[block_start]..str[block_end+3] needs to be loaded into +-- string_table[block_start-offset]..string_table[block_end-offset] +-- If dictionary is presented, the last 258 bytes of the dictionary +-- needs to be loaded into sing_table[-257..0] +-- (See more in the description of offset.) +-- @param hash_tables. The table key is the hash value (0<=hash<=16777216=256^3) +-- The table value is an array0 that stores the indexes of the +-- input data string to be compressed, such that +-- hash == str[index]*str[index+1]*str[index+2] +-- Indexes are ordered in this array. +-- @param block_start The indexes of the input data string to be compressed. +-- that starts the LZ77 block. +-- @param block_end The indexes of the input data string to be compressed. +-- that stores the LZ77 block. +-- @param offset str[index] is stored in string_table[index-offset], +-- This offset is mainly an optimization to limit the index +-- of string_table, so lua can access this table quicker. +-- @param dictionary See LibDeflate:CreateDictionary +-- @return literal/LZ77_length deflate codes. +-- @return the extra bits of literal/LZ77_length deflate codes. +-- @return the count of each literal/LZ77 deflate code. +-- @return LZ77 distance deflate codes. +-- @return the extra bits of LZ77 distance deflate codes. +-- @return the count of each LZ77 distance deflate code. +local function GetBlockLZ77Result(level, string_table, hash_tables, block_start, + block_end, offset, dictionary) + local config = _compression_level_configs[level] + local config_use_lazy, config_good_prev_length, config_max_lazy_match, + config_nice_length, config_max_hash_chain = config[1], config[2], + config[3], config[4], + config[5] + + local config_max_insert_length = (not config_use_lazy) and + config_max_lazy_match or 2147483646 + local config_good_hash_chain = + (config_max_hash_chain - config_max_hash_chain % 4 / 4) + + local hash + + local dict_hash_tables + local dict_string_table + local dict_string_len = 0 + + if dictionary then + dict_hash_tables = dictionary.hash_tables + dict_string_table = dictionary.string_table + dict_string_len = dictionary.strlen + assert(block_start == 1) + if block_end >= block_start and dict_string_len >= 2 then + hash = dict_string_table[dict_string_len - 1] * 65536 + + dict_string_table[dict_string_len] * 256 + string_table[1] + local t = hash_tables[hash] + if not t then + t = {}; + hash_tables[hash] = t + end + t[#t + 1] = -1 + end + if block_end >= block_start + 1 and dict_string_len >= 1 then + hash = + dict_string_table[dict_string_len] * 65536 + string_table[1] * 256 + + string_table[2] + local t = hash_tables[hash] + if not t then + t = {}; + hash_tables[hash] = t + end + t[#t + 1] = 0 + end + end + + local dict_string_len_plus3 = dict_string_len + 3 + + hash = (string_table[block_start - offset] or 0) * 256 + + (string_table[block_start + 1 - offset] or 0) + + local lcodes = {} + local lcode_tblsize = 0 + local lcodes_counts = {} + local dcodes = {} + local dcodes_tblsize = 0 + local dcodes_counts = {} + + local lextra_bits = {} + local lextra_bits_tblsize = 0 + local dextra_bits = {} + local dextra_bits_tblsize = 0 + + local match_available = false + local prev_len + local prev_dist + local cur_len = 0 + local cur_dist = 0 + + local index = block_start + local index_end = block_end + (config_use_lazy and 1 or 0) + + -- the zlib source code writes separate code for lazy evaluation and + -- not lazy evaluation, which is easier to understand. + -- I put them together, so it is a bit harder to understand. + -- because I think this is easier for me to maintain it. + while (index <= index_end) do + local string_table_index = index - offset + local offset_minus_three = offset - 3 + prev_len = cur_len + prev_dist = cur_dist + cur_len = 0 + + hash = (hash * 256 + (string_table[string_table_index + 2] or 0)) % 16777216 + + local chain_index + local cur_chain + local hash_chain = hash_tables[hash] + local chain_old_size + if not hash_chain then + chain_old_size = 0 + hash_chain = {} + hash_tables[hash] = hash_chain + if dict_hash_tables then + cur_chain = dict_hash_tables[hash] + chain_index = cur_chain and #cur_chain or 0 + else + chain_index = 0 + end + else + chain_old_size = #hash_chain + cur_chain = hash_chain + chain_index = chain_old_size + end + + if index <= block_end then hash_chain[chain_old_size + 1] = index end + + if (chain_index > 0 and index + 2 <= block_end and + (not config_use_lazy or prev_len < config_max_lazy_match)) then + + local depth = + (config_use_lazy and prev_len >= config_good_prev_length) and + config_good_hash_chain or config_max_hash_chain + + local max_len_minus_one = block_end - index + max_len_minus_one = (max_len_minus_one >= 257) and 257 or + max_len_minus_one + max_len_minus_one = max_len_minus_one + string_table_index + local string_table_index_plus_three = string_table_index + 3 + + while chain_index >= 1 and depth > 0 do + local prev = cur_chain[chain_index] + + if index - prev > 32768 then break end + if prev < index then + local sj = string_table_index_plus_three + + if prev >= -257 then + local pj = prev - offset_minus_three + while (sj <= max_len_minus_one and string_table[pj] == + string_table[sj]) do + sj = sj + 1 + pj = pj + 1 + end + else + local pj = dict_string_len_plus3 + prev + while (sj <= max_len_minus_one and dict_string_table[pj] == + string_table[sj]) do + sj = sj + 1 + pj = pj + 1 + end + end + local j = sj - string_table_index + if j > cur_len then + cur_len = j + cur_dist = index - prev + end + if cur_len >= config_nice_length then break end + end + + chain_index = chain_index - 1 + depth = depth - 1 + if chain_index == 0 and prev > 0 and dict_hash_tables then + cur_chain = dict_hash_tables[hash] + chain_index = cur_chain and #cur_chain or 0 + end + end + end + + if not config_use_lazy then prev_len, prev_dist = cur_len, cur_dist end + if ((not config_use_lazy or match_available) and + (prev_len > 3 or (prev_len == 3 and prev_dist < 4096)) and cur_len <= + prev_len) then + local code = _length_to_deflate_code[prev_len] + local length_extra_bits_bitlen = _length_to_deflate_extra_bitlen[prev_len] + local dist_code, dist_extra_bits_bitlen, dist_extra_bits + if prev_dist <= 256 then -- have cached code for small distance. + dist_code = _dist256_to_deflate_code[prev_dist] + dist_extra_bits = _dist256_to_deflate_extra_bits[prev_dist] + dist_extra_bits_bitlen = _dist256_to_deflate_extra_bitlen[prev_dist] + else + dist_code = 16 + dist_extra_bits_bitlen = 7 + local a = 384 + local b = 512 + + while true do + if prev_dist <= a then + dist_extra_bits = (prev_dist - (b / 2) - 1) % (b / 4) + break + elseif prev_dist <= b then + dist_extra_bits = (prev_dist - (b / 2) - 1) % (b / 4) + dist_code = dist_code + 1 + break + else + dist_code = dist_code + 2 + dist_extra_bits_bitlen = dist_extra_bits_bitlen + 1 + a = a * 2 + b = b * 2 + end + end + end + lcode_tblsize = lcode_tblsize + 1 + lcodes[lcode_tblsize] = code + lcodes_counts[code] = (lcodes_counts[code] or 0) + 1 + + dcodes_tblsize = dcodes_tblsize + 1 + dcodes[dcodes_tblsize] = dist_code + dcodes_counts[dist_code] = (dcodes_counts[dist_code] or 0) + 1 + + if length_extra_bits_bitlen > 0 then + local lenExtraBits = _length_to_deflate_extra_bits[prev_len] + lextra_bits_tblsize = lextra_bits_tblsize + 1 + lextra_bits[lextra_bits_tblsize] = lenExtraBits + end + if dist_extra_bits_bitlen > 0 then + dextra_bits_tblsize = dextra_bits_tblsize + 1 + dextra_bits[dextra_bits_tblsize] = dist_extra_bits + end + + for i = index + 1, index + prev_len - (config_use_lazy and 2 or 1) do + hash = (hash * 256 + (string_table[i - offset + 2] or 0)) % 16777216 + if prev_len <= config_max_insert_length then + hash_chain = hash_tables[hash] + if not hash_chain then + hash_chain = {} + hash_tables[hash] = hash_chain + end + hash_chain[#hash_chain + 1] = i + end + end + index = index + prev_len - (config_use_lazy and 1 or 0) + match_available = false + elseif (not config_use_lazy) or match_available then + local code = string_table[config_use_lazy and (string_table_index - 1) or + string_table_index] + lcode_tblsize = lcode_tblsize + 1 + lcodes[lcode_tblsize] = code + lcodes_counts[code] = (lcodes_counts[code] or 0) + 1 + index = index + 1 + else + match_available = true + index = index + 1 + end + end + + -- Write "end of block" symbol + lcode_tblsize = lcode_tblsize + 1 + lcodes[lcode_tblsize] = 256 + lcodes_counts[256] = (lcodes_counts[256] or 0) + 1 + + return lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits, dcodes_counts +end + +-- Get the header data of dynamic block. +-- @param lcodes_count The count of each literal/LZ77_length codes. +-- @param dcodes_count The count of each Lz77 distance codes. +-- @return a lots of stuffs. +-- @see RFC1951 Page 12 +local function GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts) + local lcodes_huffman_bitlens, lcodes_huffman_codes, max_non_zero_bitlen_lcode = + GetHuffmanBitlenAndCode(lcodes_counts, 15, 285) + local dcodes_huffman_bitlens, dcodes_huffman_codes, max_non_zero_bitlen_dcode = + GetHuffmanBitlenAndCode(dcodes_counts, 15, 29) + + local rle_deflate_codes, rle_extra_bits, rle_codes_counts = + RunLengthEncodeHuffmanBitlen(lcodes_huffman_bitlens, + max_non_zero_bitlen_lcode, + dcodes_huffman_bitlens, + max_non_zero_bitlen_dcode) + + local rle_codes_huffman_bitlens, rle_codes_huffman_codes = + GetHuffmanBitlenAndCode(rle_codes_counts, 7, 18) + + local HCLEN = 0 + for i = 1, 19 do + local symbol = _rle_codes_huffman_bitlen_order[i] + local length = rle_codes_huffman_bitlens[symbol] or 0 + if length ~= 0 then HCLEN = i end + end + + HCLEN = HCLEN - 4 + local HLIT = max_non_zero_bitlen_lcode + 1 - 257 + local HDIST = max_non_zero_bitlen_dcode + 1 - 1 + if HDIST < 0 then HDIST = 0 end + + return HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens, rle_codes_huffman_codes, + rle_deflate_codes, rle_extra_bits, lcodes_huffman_bitlens, + lcodes_huffman_codes, dcodes_huffman_bitlens, dcodes_huffman_codes +end + +-- Get the size of dynamic block without writing any bits into the writer. +-- @param ... Read the source code of GetBlockDynamicHuffmanHeader() +-- @return the bit length of the dynamic block +local function GetDynamicHuffmanBlockSize(lcodes, dcodes, HCLEN, + rle_codes_huffman_bitlens, + rle_deflate_codes, + lcodes_huffman_bitlens, + dcodes_huffman_bitlens) + + local block_bitlen = 17 -- 1+2+5+5+4 + block_bitlen = block_bitlen + (HCLEN + 4) * 3 + + for i = 1, #rle_deflate_codes do + local code = rle_deflate_codes[i] + block_bitlen = block_bitlen + rle_codes_huffman_bitlens[code] + if code >= 16 then + block_bitlen = block_bitlen + + ((code == 16) and 2 or (code == 17 and 3 or 7)) + end + end + + local length_code_count = 0 + for i = 1, #lcodes do + local code = lcodes[i] + local huffman_bitlen = lcodes_huffman_bitlens[code] + block_bitlen = block_bitlen + huffman_bitlen + if code > 256 then -- Length code + length_code_count = length_code_count + 1 + if code > 264 and code < 285 then -- Length code with extra bits + local extra_bits_bitlen = _literal_deflate_code_to_extra_bitlen[code - + 256] + block_bitlen = block_bitlen + extra_bits_bitlen + end + local dist_code = dcodes[length_code_count] + local dist_huffman_bitlen = dcodes_huffman_bitlens[dist_code] + block_bitlen = block_bitlen + dist_huffman_bitlen + + if dist_code > 3 then -- dist code with extra bits + local dist_extra_bits_bitlen = (dist_code - dist_code % 2) / 2 - 1 + block_bitlen = block_bitlen + dist_extra_bits_bitlen + end + end + end + return block_bitlen +end + +-- Write dynamic block. +-- @param ... Read the source code of GetBlockDynamicHuffmanHeader() +local function CompressDynamicHuffmanBlock(WriteBits, is_last_block, lcodes, + lextra_bits, dcodes, dextra_bits, + HLIT, HDIST, HCLEN, + rle_codes_huffman_bitlens, + rle_codes_huffman_codes, + rle_deflate_codes, rle_extra_bits, + lcodes_huffman_bitlens, + lcodes_huffman_codes, + dcodes_huffman_bitlens, + dcodes_huffman_codes) + + WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier + WriteBits(2, 2) -- Dynamic Huffman block identifier + + WriteBits(HLIT, 5) + WriteBits(HDIST, 5) + WriteBits(HCLEN, 4) + + for i = 1, HCLEN + 4 do + local symbol = _rle_codes_huffman_bitlen_order[i] + local length = rle_codes_huffman_bitlens[symbol] or 0 + WriteBits(length, 3) + end + + local rleExtraBitsIndex = 1 + for i = 1, #rle_deflate_codes do + local code = rle_deflate_codes[i] + WriteBits(rle_codes_huffman_codes[code], rle_codes_huffman_bitlens[code]) + if code >= 16 then + local extraBits = rle_extra_bits[rleExtraBitsIndex] + WriteBits(extraBits, (code == 16) and 2 or (code == 17 and 3 or 7)) + rleExtraBitsIndex = rleExtraBitsIndex + 1 + end + end + + local length_code_count = 0 + local length_code_with_extra_count = 0 + local dist_code_with_extra_count = 0 + + for i = 1, #lcodes do + local deflate_codee = lcodes[i] + local huffman_code = lcodes_huffman_codes[deflate_codee] + local huffman_bitlen = lcodes_huffman_bitlens[deflate_codee] + WriteBits(huffman_code, huffman_bitlen) + if deflate_codee > 256 then -- Length code + length_code_count = length_code_count + 1 + if deflate_codee > 264 and deflate_codee < 285 then + -- Length code with extra bits + length_code_with_extra_count = length_code_with_extra_count + 1 + local extra_bits = lextra_bits[length_code_with_extra_count] + local extra_bits_bitlen = + _literal_deflate_code_to_extra_bitlen[deflate_codee - 256] + WriteBits(extra_bits, extra_bits_bitlen) + end + -- Write distance code + local dist_deflate_code = dcodes[length_code_count] + local dist_huffman_code = dcodes_huffman_codes[dist_deflate_code] + local dist_huffman_bitlen = dcodes_huffman_bitlens[dist_deflate_code] + WriteBits(dist_huffman_code, dist_huffman_bitlen) + + if dist_deflate_code > 3 then -- dist code with extra bits + dist_code_with_extra_count = dist_code_with_extra_count + 1 + local dist_extra_bits = dextra_bits[dist_code_with_extra_count] + local dist_extra_bits_bitlen = (dist_deflate_code - dist_deflate_code % + 2) / 2 - 1 + WriteBits(dist_extra_bits, dist_extra_bits_bitlen) + end + end + end +end + +-- Get the size of fixed block without writing any bits into the writer. +-- @param lcodes literal/LZ77_length deflate codes +-- @param decodes LZ77 distance deflate codes +-- @return the bit length of the fixed block +local function GetFixedHuffmanBlockSize(lcodes, dcodes) + local block_bitlen = 3 + local length_code_count = 0 + for i = 1, #lcodes do + local code = lcodes[i] + local huffman_bitlen = _fix_block_literal_huffman_bitlen[code] + block_bitlen = block_bitlen + huffman_bitlen + if code > 256 then -- Length code + length_code_count = length_code_count + 1 + if code > 264 and code < 285 then -- Length code with extra bits + local extra_bits_bitlen = _literal_deflate_code_to_extra_bitlen[code - + 256] + block_bitlen = block_bitlen + extra_bits_bitlen + end + local dist_code = dcodes[length_code_count] + block_bitlen = block_bitlen + 5 + + if dist_code > 3 then -- dist code with extra bits + local dist_extra_bits_bitlen = (dist_code - dist_code % 2) / 2 - 1 + block_bitlen = block_bitlen + dist_extra_bits_bitlen + end + end + end + return block_bitlen +end + +-- Write fixed block. +-- @param lcodes literal/LZ77_length deflate codes +-- @param decodes LZ77 distance deflate codes +local function CompressFixedHuffmanBlock(WriteBits, is_last_block, lcodes, + lextra_bits, dcodes, dextra_bits) + WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier + WriteBits(1, 2) -- Fixed Huffman block identifier + local length_code_count = 0 + local length_code_with_extra_count = 0 + local dist_code_with_extra_count = 0 + for i = 1, #lcodes do + local deflate_code = lcodes[i] + local huffman_code = _fix_block_literal_huffman_code[deflate_code] + local huffman_bitlen = _fix_block_literal_huffman_bitlen[deflate_code] + WriteBits(huffman_code, huffman_bitlen) + if deflate_code > 256 then -- Length code + length_code_count = length_code_count + 1 + if deflate_code > 264 and deflate_code < 285 then + -- Length code with extra bits + length_code_with_extra_count = length_code_with_extra_count + 1 + local extra_bits = lextra_bits[length_code_with_extra_count] + local extra_bits_bitlen = + _literal_deflate_code_to_extra_bitlen[deflate_code - 256] + WriteBits(extra_bits, extra_bits_bitlen) + end + -- Write distance code + local dist_code = dcodes[length_code_count] + local dist_huffman_code = _fix_block_dist_huffman_code[dist_code] + WriteBits(dist_huffman_code, 5) + + if dist_code > 3 then -- dist code with extra bits + dist_code_with_extra_count = dist_code_with_extra_count + 1 + local dist_extra_bits = dextra_bits[dist_code_with_extra_count] + local dist_extra_bits_bitlen = (dist_code - dist_code % 2) / 2 - 1 + WriteBits(dist_extra_bits, dist_extra_bits_bitlen) + end + end + end +end + +-- Get the size of store block without writing any bits into the writer. +-- @param block_start The start index of the origin input string +-- @param block_end The end index of the origin input string +-- @param Total bit lens had been written into the compressed result before, +-- because store block needs to shift to byte boundary. +-- @return the bit length of the fixed block +local function GetStoreBlockSize(block_start, block_end, total_bitlen) + assert(block_end - block_start + 1 <= 65535) + local block_bitlen = 3 + total_bitlen = total_bitlen + 3 + local padding_bitlen = (8 - total_bitlen % 8) % 8 + block_bitlen = block_bitlen + padding_bitlen + block_bitlen = block_bitlen + 32 + block_bitlen = block_bitlen + (block_end - block_start + 1) * 8 + return block_bitlen +end + +-- Write the store block. +-- @param ... lots of stuffs +-- @return nil +local function CompressStoreBlock(WriteBits, WriteString, is_last_block, str, + block_start, block_end, total_bitlen) + assert(block_end - block_start + 1 <= 65535) + WriteBits(is_last_block and 1 or 0, 1) -- Last block identifer. + WriteBits(0, 2) -- Store block identifier. + total_bitlen = total_bitlen + 3 + local padding_bitlen = (8 - total_bitlen % 8) % 8 + if padding_bitlen > 0 then + WriteBits(_pow2[padding_bitlen] - 1, padding_bitlen) + end + local size = block_end - block_start + 1 + WriteBits(size, 16) + + -- Write size's one's complement + local comp = (255 - size % 256) + (255 - (size - size % 256) / 256) * 256 + WriteBits(comp, 16) + + WriteString(str:sub(block_start, block_end)) +end + +-- Do the deflate +-- Currently using a simple way to determine the block size +-- (This is why the compression ratio is little bit worse than zlib when +-- the input size is very large +-- The first block is 64KB, the following block is 32KB. +-- After each block, there is a memory cleanup operation. +-- This is not a fast operation, but it is needed to save memory usage, so +-- the memory usage does not grow unboundly. If the data size is less than +-- 64KB, then memory cleanup won't happen. +-- This function determines whether to use store/fixed/dynamic blocks by +-- calculating the block size of each block type and chooses the smallest one. +local function Deflate(configs, WriteBits, WriteString, FlushWriter, str, + dictionary) + local string_table = {} + local hash_tables = {} + local is_last_block = nil + local block_start + local block_end + local bitlen_written + local total_bitlen = FlushWriter(_FLUSH_MODE_NO_FLUSH) + local strlen = #str + local offset + + local level + local strategy + if configs then + if configs.level then level = configs.level end + if configs.strategy then strategy = configs.strategy end + end + + if not level then + if strlen < 2048 then + level = 7 + elseif strlen > 65536 then + level = 3 + else + level = 5 + end + end + + while not is_last_block do + if not block_start then + block_start = 1 + block_end = 64 * 1024 - 1 + offset = 0 + else + block_start = block_end + 1 + block_end = block_end + 32 * 1024 + offset = block_start - 32 * 1024 - 1 + end + + if block_end >= strlen then + block_end = strlen + is_last_block = true + else + is_last_block = false + end + + local lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits, dcodes_counts + + local HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens, + rle_codes_huffman_codes, rle_deflate_codes, rle_extra_bits, + lcodes_huffman_bitlens, lcodes_huffman_codes, dcodes_huffman_bitlens, + dcodes_huffman_codes + + local dynamic_block_bitlen + local fixed_block_bitlen + local store_block_bitlen + + if level ~= 0 then + + -- GetBlockLZ77 needs block_start to block_end+3 to be loaded. + LoadStringToTable(str, string_table, block_start, block_end + 3, offset) + if block_start == 1 and dictionary then + local dict_string_table = dictionary.string_table + local dict_strlen = dictionary.strlen + for i = 0, (-dict_strlen + 1) < -257 and -257 or (-dict_strlen + 1), -1 do + string_table[i] = dict_string_table[dict_strlen + i] + end + end + + if strategy == "huffman_only" then + lcodes = {} + LoadStringToTable(str, lcodes, block_start, block_end, block_start - 1) + lextra_bits = {} + lcodes_counts = {} + lcodes[block_end - block_start + 2] = 256 -- end of block + for i = 1, block_end - block_start + 2 do + local code = lcodes[i] + lcodes_counts[code] = (lcodes_counts[code] or 0) + 1 + end + dcodes = {} + dextra_bits = {} + dcodes_counts = {} + else + lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits, dcodes_counts = + GetBlockLZ77Result(level, string_table, hash_tables, block_start, + block_end, offset, dictionary) + end + + -- LuaFormatter off + HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens, rle_codes_huffman_codes, rle_deflate_codes, + rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes, dcodes_huffman_bitlens, dcodes_huffman_codes = + -- LuaFormatter on + GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts) + dynamic_block_bitlen = GetDynamicHuffmanBlockSize(lcodes, dcodes, HCLEN, + rle_codes_huffman_bitlens, + rle_deflate_codes, + lcodes_huffman_bitlens, + dcodes_huffman_bitlens) + fixed_block_bitlen = GetFixedHuffmanBlockSize(lcodes, dcodes) + end + + store_block_bitlen = GetStoreBlockSize(block_start, block_end, total_bitlen) + + local min_bitlen = store_block_bitlen + min_bitlen = (fixed_block_bitlen and fixed_block_bitlen < min_bitlen) and + fixed_block_bitlen or min_bitlen + min_bitlen = + (dynamic_block_bitlen and dynamic_block_bitlen < min_bitlen) and + dynamic_block_bitlen or min_bitlen + + if level == 0 or + (strategy ~= "fixed" and strategy ~= "dynamic" and store_block_bitlen == + min_bitlen) then + CompressStoreBlock(WriteBits, WriteString, is_last_block, str, + block_start, block_end, total_bitlen) + total_bitlen = total_bitlen + store_block_bitlen + elseif strategy ~= "dynamic" and + (strategy == "fixed" or fixed_block_bitlen == min_bitlen) then + CompressFixedHuffmanBlock(WriteBits, is_last_block, lcodes, lextra_bits, + dcodes, dextra_bits) + total_bitlen = total_bitlen + fixed_block_bitlen + elseif strategy == "dynamic" or dynamic_block_bitlen == min_bitlen then + CompressDynamicHuffmanBlock(WriteBits, is_last_block, lcodes, lextra_bits, + dcodes, dextra_bits, HLIT, HDIST, HCLEN, + rle_codes_huffman_bitlens, + rle_codes_huffman_codes, rle_deflate_codes, + rle_extra_bits, lcodes_huffman_bitlens, + lcodes_huffman_codes, dcodes_huffman_bitlens, + dcodes_huffman_codes) + total_bitlen = total_bitlen + dynamic_block_bitlen + end + + if is_last_block then + bitlen_written = FlushWriter(_FLUSH_MODE_NO_FLUSH) + else + bitlen_written = FlushWriter(_FLUSH_MODE_MEMORY_CLEANUP) + end + + assert(bitlen_written == total_bitlen) + + -- Memory clean up, so memory consumption does not always grow linearly + -- , even if input string is > 64K. + -- Not a very efficient operation, but this operation won't happen + -- when the input data size is less than 64K. + if not is_last_block then + local j + if dictionary and block_start == 1 then + j = 0 + while (string_table[j]) do + string_table[j] = nil + j = j - 1 + end + end + dictionary = nil + j = 1 + for i = block_end - 32767, block_end do + string_table[j] = string_table[i - offset] + j = j + 1 + end + + for k, t in pairs(hash_tables) do + local tSize = #t + if tSize > 0 and block_end + 1 - t[1] > 32768 then + if tSize == 1 then + hash_tables[k] = nil + else + local new = {} + local newSize = 0 + for i = 2, tSize do + j = t[i] + if block_end + 1 - j <= 32768 then + newSize = newSize + 1 + new[newSize] = j + end + end + hash_tables[k] = new + end + end + end + end + end +end + +--- The description to compression configuration table.
+-- Any field can be nil to use its default.
+-- Table with keys other than those below is an invalid table. +-- @class table +-- @name compression_configs +-- @field level The compression level ranged from 0 to 9. 0 is no compression. +-- 9 is the slowest but best compression. Use nil for default level. +-- @field strategy The compression strategy. "fixed" to only use fixed deflate +-- compression block. "dynamic" to only use dynamic block. "huffman_only" to +-- do no LZ77 compression. Only do huffman compression. + +-- @see LibDeflate:CompressDeflate(str, configs) +-- @see LibDeflate:CompressDeflateWithDict(str, dictionary, configs) +local function CompressDeflateInternal(str, dictionary, configs) + local WriteBits, WriteString, FlushWriter = CreateWriter() + Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary) + local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT) + local padding_bitlen = (8 - total_bitlen % 8) % 8 + return result, padding_bitlen +end + +-- @see LibDeflate:CompressZlib +-- @see LibDeflate:CompressZlibWithDict +local function CompressZlibInternal(str, dictionary, configs) + local WriteBits, WriteString, FlushWriter = CreateWriter() + + local CM = 8 -- Compression method + local CINFO = 7 -- Window Size = 32K + local CMF = CINFO * 16 + CM + WriteBits(CMF, 8) + + local FDIST = dictionary and 1 or 0 + local FLEVEL = 2 -- Default compression + local FLG = FLEVEL * 64 + FDIST * 32 + local FCHECK = (31 - (CMF * 256 + FLG) % 31) + -- The FCHECK value must be such that CMF and FLG, + -- when viewed as a 16-bit unsigned integer stored + -- in MSB order (CMF*256 + FLG), is a multiple of 31. + FLG = FLG + FCHECK + WriteBits(FLG, 8) + + if FDIST == 1 then + local adler32 = dictionary.adler32 + local byte0 = adler32 % 256 + adler32 = (adler32 - byte0) / 256 + local byte1 = adler32 % 256 + adler32 = (adler32 - byte1) / 256 + local byte2 = adler32 % 256 + adler32 = (adler32 - byte2) / 256 + local byte3 = adler32 % 256 + WriteBits(byte3, 8) + WriteBits(byte2, 8) + WriteBits(byte1, 8) + WriteBits(byte0, 8) + end + + Deflate(configs, WriteBits, WriteString, FlushWriter, str, dictionary) + FlushWriter(_FLUSH_MODE_BYTE_BOUNDARY) + + local adler32 = LibDeflate:Adler32(str) + + -- Most significant byte first + local byte3 = adler32 % 256 + adler32 = (adler32 - byte3) / 256 + local byte2 = adler32 % 256 + adler32 = (adler32 - byte2) / 256 + local byte1 = adler32 % 256 + adler32 = (adler32 - byte1) / 256 + local byte0 = adler32 % 256 + + WriteBits(byte0, 8) + WriteBits(byte1, 8) + WriteBits(byte2, 8) + WriteBits(byte3, 8) + local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT) + local padding_bitlen = (8 - total_bitlen % 8) % 8 + return result, padding_bitlen +end + +--- Compress using the raw deflate format. +-- @param str [string] The data to be compressed. +-- @param configs [table/nil] The configuration table to control the compression +-- . If nil, use the default configuration. +-- @return [string] The compressed data. +-- @return [integer] The number of bits padded at the end of output. +-- 0 <= bits < 8
+-- This means the most significant "bits" of the last byte of the returned +-- compressed data are padding bits and they don't affect decompression. +-- You don't need to use this value unless you want to do some postprocessing +-- to the compressed data. +-- @see compression_configs +-- @see LibDeflate:DecompressDeflate +function LibDeflate:CompressDeflate(str, configs) + local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs) + if not arg_valid then + error(("Usage: LibDeflate:CompressDeflate(str, configs): " .. arg_err), 2) + end + return CompressDeflateInternal(str, nil, configs) +end + +--- Compress using the raw deflate format with a preset dictionary. +-- @param str [string] The data to be compressed. +-- @param dictionary [table] The preset dictionary produced by +-- LibDeflate:CreateDictionary +-- @param configs [table/nil] The configuration table to control the compression +-- . If nil, use the default configuration. +-- @return [string] The compressed data. +-- @return [integer] The number of bits padded at the end of output. +-- 0 <= bits < 8
+-- This means the most significant "bits" of the last byte of the returned +-- compressed data are padding bits and they don't affect decompression. +-- You don't need to use this value unless you want to do some postprocessing +-- to the compressed data. +-- @see compression_configs +-- @see LibDeflate:CreateDictionary +-- @see LibDeflate:DecompressDeflateWithDict +function LibDeflate:CompressDeflateWithDict(str, dictionary, configs) + local arg_valid, arg_err = IsValidArguments(str, true, dictionary, true, + configs) + if not arg_valid then + error(("Usage: LibDeflate:CompressDeflateWithDict" .. + "(str, dictionary, configs): " .. arg_err), 2) + end + return CompressDeflateInternal(str, dictionary, configs) +end + +--- Compress using the zlib format. +-- @param str [string] the data to be compressed. +-- @param configs [table/nil] The configuration table to control the compression +-- . If nil, use the default configuration. +-- @return [string] The compressed data. +-- @return [integer] The number of bits padded at the end of output. +-- Should always be 0. +-- Zlib formatted compressed data never has padding bits at the end. +-- @see compression_configs +-- @see LibDeflate:DecompressZlib +function LibDeflate:CompressZlib(str, configs) + local arg_valid, arg_err = IsValidArguments(str, false, nil, true, configs) + if not arg_valid then + error(("Usage: LibDeflate:CompressZlib(str, configs): " .. arg_err), 2) + end + return CompressZlibInternal(str, nil, configs) +end + +--- Compress using the zlib format with a preset dictionary. +-- @param str [string] the data to be compressed. +-- @param dictionary [table] A preset dictionary produced +-- by LibDeflate:CreateDictionary() +-- @param configs [table/nil] The configuration table to control the compression +-- . If nil, use the default configuration. +-- @return [string] The compressed data. +-- @return [integer] The number of bits padded at the end of output. +-- Should always be 0. +-- Zlib formatted compressed data never has padding bits at the end. +-- @see compression_configs +-- @see LibDeflate:CreateDictionary +-- @see LibDeflate:DecompressZlibWithDict +function LibDeflate:CompressZlibWithDict(str, dictionary, configs) + local arg_valid, arg_err = IsValidArguments(str, true, dictionary, true, + configs) + if not arg_valid then + error(("Usage: LibDeflate:CompressZlibWithDict" .. + "(str, dictionary, configs): " .. arg_err), 2) + end + return CompressZlibInternal(str, dictionary, configs) +end + +--[[ -------------------------------------------------------------------------- + Decompress code +--]] -------------------------------------------------------------------------- + +--[[ + Create a reader to easily reader stuffs as the unit of bits. + Return values: + 1. ReadBits(bitlen) + 2. ReadBytes(bytelen, buffer, buffer_size) + 3. Decode(huffman_bitlen_count, huffman_symbol, min_bitlen) + 4. ReaderBitlenLeft() + 5. SkipToByteBoundary() +--]] +local function CreateReader(input_string) + local input = input_string + local input_strlen = #input_string + local input_next_byte_pos = 1 + local cache_bitlen = 0 + local cache = 0 + + -- Read some bits. + -- To improve speed, this function does not + -- check if the input has been exhausted. + -- Use ReaderBitlenLeft() < 0 to check it. + -- @param bitlen the number of bits to read + -- @return the data is read. + local function ReadBits(bitlen) + local rshift_mask = _pow2[bitlen] + local code + if bitlen <= cache_bitlen then + code = cache % rshift_mask + cache = (cache - code) / rshift_mask + cache_bitlen = cache_bitlen - bitlen + else -- Whether input has been exhausted is not checked. + local lshift_mask = _pow2[cache_bitlen] + local byte1, byte2, byte3, byte4 = + string_byte(input, input_next_byte_pos, input_next_byte_pos + 3) + -- This requires lua number to be at least double () + cache = cache + + ((byte1 or 0) + (byte2 or 0) * 256 + (byte3 or 0) * 65536 + + (byte4 or 0) * 16777216) * lshift_mask + input_next_byte_pos = input_next_byte_pos + 4 + cache_bitlen = cache_bitlen + 32 - bitlen + code = cache % rshift_mask + cache = (cache - code) / rshift_mask + end + return code + end + + -- Read some bytes from the reader. + -- Assume reader is on the byte boundary. + -- @param bytelen The number of bytes to be read. + -- @param buffer The byte read will be stored into this buffer. + -- @param buffer_size The buffer will be modified starting from + -- buffer[buffer_size+1], ending at buffer[buffer_size+bytelen-1] + -- @return the new buffer_size + local function ReadBytes(bytelen, buffer, buffer_size) + assert(cache_bitlen % 8 == 0) + + local byte_from_cache = + (cache_bitlen / 8 < bytelen) and (cache_bitlen / 8) or bytelen + for _ = 1, byte_from_cache do + local byte = cache % 256 + buffer_size = buffer_size + 1 + buffer[buffer_size] = string_char(byte) + cache = (cache - byte) / 256 + end + cache_bitlen = cache_bitlen - byte_from_cache * 8 + bytelen = bytelen - byte_from_cache + if (input_strlen - input_next_byte_pos - bytelen + 1) * 8 + cache_bitlen < 0 then + return -1 -- out of input + end + for i = input_next_byte_pos, input_next_byte_pos + bytelen - 1 do + buffer_size = buffer_size + 1 + buffer[buffer_size] = string_sub(input, i, i) + end + + input_next_byte_pos = input_next_byte_pos + bytelen + return buffer_size + end + + -- Decode huffman code + -- To improve speed, this function does not check + -- if the input has been exhausted. + -- Use ReaderBitlenLeft() < 0 to check it. + -- Credits for Mark Adler. This code is from puff:Decode() + -- @see puff:Decode(...) + -- @param huffman_bitlen_count + -- @param huffman_symbol + -- @param min_bitlen The minimum huffman bit length of all symbols + -- @return The decoded deflate code. + -- Negative value is returned if decoding fails. + local function Decode(huffman_bitlen_counts, huffman_symbols, min_bitlen) + local code = 0 + local first = 0 + local index = 0 + local count + if min_bitlen > 0 then + if cache_bitlen < 15 and input then + local lshift_mask = _pow2[cache_bitlen] + local byte1, byte2, byte3, byte4 = + string_byte(input, input_next_byte_pos, input_next_byte_pos + 3) + -- This requires lua number to be at least double () + cache = cache + + ((byte1 or 0) + (byte2 or 0) * 256 + (byte3 or 0) * 65536 + + (byte4 or 0) * 16777216) * lshift_mask + input_next_byte_pos = input_next_byte_pos + 4 + cache_bitlen = cache_bitlen + 32 + end + + local rshift_mask = _pow2[min_bitlen] + cache_bitlen = cache_bitlen - min_bitlen + code = cache % rshift_mask + cache = (cache - code) / rshift_mask + -- Reverse the bits + code = _reverse_bits_tbl[min_bitlen][code] + + count = huffman_bitlen_counts[min_bitlen] + if code < count then return huffman_symbols[code] end + index = count + first = count * 2 + code = code * 2 + end + + for bitlen = min_bitlen + 1, 15 do + local bit + bit = cache % 2 + cache = (cache - bit) / 2 + cache_bitlen = cache_bitlen - 1 + + code = (bit == 1) and (code + 1 - code % 2) or code + count = huffman_bitlen_counts[bitlen] or 0 + local diff = code - first + if diff < count then return huffman_symbols[index + diff] end + index = index + count + first = first + count + first = first * 2 + code = code * 2 + end + -- invalid literal/length or distance code + -- in fixed or dynamic block (run out of code) + return -10 + end + + local function ReaderBitlenLeft() + return (input_strlen - input_next_byte_pos + 1) * 8 + cache_bitlen + end + + local function SkipToByteBoundary() + local skipped_bitlen = cache_bitlen % 8 + local rshift_mask = _pow2[skipped_bitlen] + cache_bitlen = cache_bitlen - skipped_bitlen + cache = (cache - cache % rshift_mask) / rshift_mask + end + + return ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary +end + +-- Create a deflate state, so I can pass in less arguments to functions. +-- @param str the whole string to be decompressed. +-- @param dictionary The preset dictionary. nil if not provided. +-- This dictionary should be produced by LibDeflate:CreateDictionary(str) +-- @return The decomrpess state. +local function CreateDecompressState(str, dictionary) + local ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary = + CreateReader(str) + local state = { + ReadBits = ReadBits, + ReadBytes = ReadBytes, + Decode = Decode, + ReaderBitlenLeft = ReaderBitlenLeft, + SkipToByteBoundary = SkipToByteBoundary, + buffer_size = 0, + buffer = {}, + result_buffer = {}, + dictionary = dictionary + } + return state +end + +-- Get the stuffs needed to decode huffman codes +-- @see puff.c:construct(...) +-- @param huffman_bitlen The huffman bit length of the huffman codes. +-- @param max_symbol The maximum symbol +-- @param max_bitlen The min huffman bit length of all codes +-- @return zero or positive for success, negative for failure. +-- @return The count of each huffman bit length. +-- @return A table to convert huffman codes to deflate codes. +-- @return The minimum huffman bit length. +local function GetHuffmanForDecode(huffman_bitlens, max_symbol, max_bitlen) + local huffman_bitlen_counts = {} + local min_bitlen = max_bitlen + for symbol = 0, max_symbol do + local bitlen = huffman_bitlens[symbol] or 0 + min_bitlen = (bitlen > 0 and bitlen < min_bitlen) and bitlen or min_bitlen + huffman_bitlen_counts[bitlen] = (huffman_bitlen_counts[bitlen] or 0) + 1 + end + + if huffman_bitlen_counts[0] == max_symbol + 1 then -- No Codes + return 0, huffman_bitlen_counts, {}, 0 -- Complete, but decode will fail + end + + local left = 1 + for len = 1, max_bitlen do + left = left * 2 + left = left - (huffman_bitlen_counts[len] or 0) + if left < 0 then + return left -- Over-subscribed, return negative + end + end + + -- Generate offsets info symbol table for each length for sorting + local offsets = {} + offsets[1] = 0 + for len = 1, max_bitlen - 1 do + offsets[len + 1] = offsets[len] + (huffman_bitlen_counts[len] or 0) + end + + local huffman_symbols = {} + for symbol = 0, max_symbol do + local bitlen = huffman_bitlens[symbol] or 0 + if bitlen ~= 0 then + local offset = offsets[bitlen] + huffman_symbols[offset] = symbol + offsets[bitlen] = offsets[bitlen] + 1 + end + end + + -- Return zero for complete set, positive for incomplete set. + return left, huffman_bitlen_counts, huffman_symbols, min_bitlen +end + +-- Decode a fixed or dynamic huffman blocks, excluding last block identifier +-- and block type identifer. +-- @see puff.c:codes() +-- @param state decompression state that will be modified by this function. +-- @see CreateDecompressState +-- @param ... Read the source code +-- @return 0 on success, other value on failure. +local function DecodeUntilEndOfBlock(state, lcodes_huffman_bitlens, + lcodes_huffman_symbols, + lcodes_huffman_min_bitlen, + dcodes_huffman_bitlens, + dcodes_huffman_symbols, + dcodes_huffman_min_bitlen) + local buffer, buffer_size, ReadBits, Decode, ReaderBitlenLeft, result_buffer = + state.buffer, state.buffer_size, state.ReadBits, state.Decode, + state.ReaderBitlenLeft, state.result_buffer + local dictionary = state.dictionary + local dict_string_table + local dict_strlen + + local buffer_end = 1 + if dictionary and not buffer[0] then + -- If there is a dictionary, copy the last 258 bytes into + -- the string_table to make the copy in the main loop quicker. + -- This is done only once per decompression. + dict_string_table = dictionary.string_table + dict_strlen = dictionary.strlen + buffer_end = -dict_strlen + 1 + for i = 0, (-dict_strlen + 1) < -257 and -257 or (-dict_strlen + 1), -1 do + buffer[i] = _byte_to_char[dict_string_table[dict_strlen + i]] + end + end + + repeat + local symbol = Decode(lcodes_huffman_bitlens, lcodes_huffman_symbols, + lcodes_huffman_min_bitlen) + if symbol < 0 or symbol > 285 then + -- invalid literal/length or distance code in fixed or dynamic block + return -10 + elseif symbol < 256 then -- Literal + buffer_size = buffer_size + 1 + buffer[buffer_size] = _byte_to_char[symbol] + elseif symbol > 256 then -- Length code + symbol = symbol - 256 + local bitlen = _literal_deflate_code_to_base_len[symbol] + bitlen = (symbol >= 8) and + (bitlen + + ReadBits(_literal_deflate_code_to_extra_bitlen[symbol])) or + bitlen + symbol = Decode(dcodes_huffman_bitlens, dcodes_huffman_symbols, + dcodes_huffman_min_bitlen) + if symbol < 0 or symbol > 29 then + -- invalid literal/length or distance code in fixed or dynamic block + return -10 + end + local dist = _dist_deflate_code_to_base_dist[symbol] + dist = (dist > 4) and + (dist + ReadBits(_dist_deflate_code_to_extra_bitlen[symbol])) or + dist + + local char_buffer_index = buffer_size - dist + 1 + if char_buffer_index < buffer_end then + -- distance is too far back in fixed or dynamic block + return -11 + end + if char_buffer_index >= -257 then + for _ = 1, bitlen do + buffer_size = buffer_size + 1 + buffer[buffer_size] = buffer[char_buffer_index] + char_buffer_index = char_buffer_index + 1 + end + else + char_buffer_index = dict_strlen + char_buffer_index + for _ = 1, bitlen do + buffer_size = buffer_size + 1 + buffer[buffer_size] = + _byte_to_char[dict_string_table[char_buffer_index]] + char_buffer_index = char_buffer_index + 1 + end + end + end + + if ReaderBitlenLeft() < 0 then + return 2 -- available inflate data did not terminate + end + + if buffer_size >= 65536 then + result_buffer[#result_buffer + 1] = table_concat(buffer, "", 1, 32768) + for i = 32769, buffer_size do buffer[i - 32768] = buffer[i] end + buffer_size = buffer_size - 32768 + buffer[buffer_size + 1] = nil + -- NOTE: buffer[32769..end] and buffer[-257..0] are not cleared. + -- This is why "buffer_size" variable is needed. + end + until symbol == 256 + + state.buffer_size = buffer_size + + return 0 +end + +-- Decompress a store block +-- @param state decompression state that will be modified by this function. +-- @return 0 if succeeds, other value if fails. +local function DecompressStoreBlock(state) + local buffer, buffer_size, ReadBits, ReadBytes, ReaderBitlenLeft, + SkipToByteBoundary, result_buffer = state.buffer, state.buffer_size, + state.ReadBits, state.ReadBytes, + state.ReaderBitlenLeft, + state.SkipToByteBoundary, + state.result_buffer + + SkipToByteBoundary() + local bytelen = ReadBits(16) + if ReaderBitlenLeft() < 0 then + return 2 -- available inflate data did not terminate + end + local bytelenComp = ReadBits(16) + if ReaderBitlenLeft() < 0 then + return 2 -- available inflate data did not terminate + end + + if bytelen % 256 + bytelenComp % 256 ~= 255 then + return -2 -- Not one's complement + end + if (bytelen - bytelen % 256) / 256 + (bytelenComp - bytelenComp % 256) / 256 ~= + 255 then + return -2 -- Not one's complement + end + + -- Note that ReadBytes will skip to the next byte boundary first. + buffer_size = ReadBytes(bytelen, buffer, buffer_size) + if buffer_size < 0 then + return 2 -- available inflate data did not terminate + end + + -- memory clean up when there are enough bytes in the buffer. + if buffer_size >= 65536 then + result_buffer[#result_buffer + 1] = table_concat(buffer, "", 1, 32768) + for i = 32769, buffer_size do buffer[i - 32768] = buffer[i] end + buffer_size = buffer_size - 32768 + buffer[buffer_size + 1] = nil + end + state.buffer_size = buffer_size + return 0 +end + +-- Decompress a fixed block +-- @param state decompression state that will be modified by this function. +-- @return 0 if succeeds other value if fails. +local function DecompressFixBlock(state) + return DecodeUntilEndOfBlock(state, _fix_block_literal_huffman_bitlen_count, + _fix_block_literal_huffman_to_deflate_code, 7, + _fix_block_dist_huffman_bitlen_count, + _fix_block_dist_huffman_to_deflate_code, 5) +end + +-- Decompress a dynamic block +-- @param state decompression state that will be modified by this function. +-- @return 0 if success, other value if fails. +local function DecompressDynamicBlock(state) + local ReadBits, Decode = state.ReadBits, state.Decode + local nlen = ReadBits(5) + 257 + local ndist = ReadBits(5) + 1 + local ncode = ReadBits(4) + 4 + if nlen > 286 or ndist > 30 then + -- dynamic block code description: too many length or distance codes + return -3 + end + + local rle_codes_huffman_bitlens = {} + + for i = 1, ncode do + rle_codes_huffman_bitlens[_rle_codes_huffman_bitlen_order[i]] = ReadBits(3) + end + + local rle_codes_err, rle_codes_huffman_bitlen_counts, + rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen = + GetHuffmanForDecode(rle_codes_huffman_bitlens, 18, 7) + if rle_codes_err ~= 0 then -- Require complete code set here + -- dynamic block code description: code lengths codes incomplete + return -4 + end + + local lcodes_huffman_bitlens = {} + local dcodes_huffman_bitlens = {} + -- Read length/literal and distance code length tables + local index = 0 + while index < nlen + ndist do + local symbol -- Decoded value + local bitlen -- Last length to repeat + + symbol = Decode(rle_codes_huffman_bitlen_counts, rle_codes_huffman_symbols, + rle_codes_huffman_min_bitlen) + + if symbol < 0 then + return symbol -- Invalid symbol + elseif symbol < 16 then + if index < nlen then + lcodes_huffman_bitlens[index] = symbol + else + dcodes_huffman_bitlens[index - nlen] = symbol + end + index = index + 1 + else + bitlen = 0 + if symbol == 16 then + if index == 0 then + -- dynamic block code description: repeat lengths + -- with no first length + return -5 + end + if index - 1 < nlen then + bitlen = lcodes_huffman_bitlens[index - 1] + else + bitlen = dcodes_huffman_bitlens[index - nlen - 1] + end + symbol = 3 + ReadBits(2) + elseif symbol == 17 then -- Repeat zero 3..10 times + symbol = 3 + ReadBits(3) + else -- == 18, repeat zero 11.138 times + symbol = 11 + ReadBits(7) + end + if index + symbol > nlen + ndist then + -- dynamic block code description: + -- repeat more than specified lengths + return -6 + end + while symbol > 0 do -- Repeat last or zero symbol times + symbol = symbol - 1 + if index < nlen then + lcodes_huffman_bitlens[index] = bitlen + else + dcodes_huffman_bitlens[index - nlen] = bitlen + end + index = index + 1 + end + end + end + + if (lcodes_huffman_bitlens[256] or 0) == 0 then + -- dynamic block code description: missing end-of-block code + return -9 + end + + local lcodes_err, lcodes_huffman_bitlen_counts, lcodes_huffman_symbols, + lcodes_huffman_min_bitlen = GetHuffmanForDecode(lcodes_huffman_bitlens, + nlen - 1, 15) + -- dynamic block code description: invalid literal/length code lengths, + -- Incomplete code ok only for single length 1 code + if (lcodes_err ~= 0 and + (lcodes_err < 0 or nlen ~= (lcodes_huffman_bitlen_counts[0] or 0) + + (lcodes_huffman_bitlen_counts[1] or 0))) then return -7 end + + local dcodes_err, dcodes_huffman_bitlen_counts, dcodes_huffman_symbols, + dcodes_huffman_min_bitlen = GetHuffmanForDecode(dcodes_huffman_bitlens, + ndist - 1, 15) + -- dynamic block code description: invalid distance code lengths, + -- Incomplete code ok only for single length 1 code + if (dcodes_err ~= 0 and + (dcodes_err < 0 or ndist ~= (dcodes_huffman_bitlen_counts[0] or 0) + + (dcodes_huffman_bitlen_counts[1] or 0))) then return -8 end + + -- Build buffman table for literal/length codes + return DecodeUntilEndOfBlock(state, lcodes_huffman_bitlen_counts, + lcodes_huffman_symbols, + lcodes_huffman_min_bitlen, + dcodes_huffman_bitlen_counts, + dcodes_huffman_symbols, dcodes_huffman_min_bitlen) +end + +-- Decompress a deflate stream +-- @param state: a decompression state +-- @return the decompressed string if succeeds. nil if fails. +local function Inflate(state) + local ReadBits = state.ReadBits + + local is_last_block + while not is_last_block do + is_last_block = (ReadBits(1) == 1) + local block_type = ReadBits(2) + local status + if block_type == 0 then + status = DecompressStoreBlock(state) + elseif block_type == 1 then + status = DecompressFixBlock(state) + elseif block_type == 2 then + status = DecompressDynamicBlock(state) + else + return nil, -1 -- invalid block type (type == 3) + end + if status ~= 0 then return nil, status end + end + + state.result_buffer[#state.result_buffer + 1] = + table_concat(state.buffer, "", 1, state.buffer_size) + local result = table_concat(state.result_buffer) + return result +end + +-- @see LibDeflate:DecompressDeflate(str) +-- @see LibDeflate:DecompressDeflateWithDict(str, dictionary) +local function DecompressDeflateInternal(str, dictionary) + local state = CreateDecompressState(str, dictionary) + local result, status = Inflate(state) + if not result then return nil, status end + + local bitlen_left = state.ReaderBitlenLeft() + local bytelen_left = (bitlen_left - bitlen_left % 8) / 8 + return result, bytelen_left +end + +-- @see LibDeflate:DecompressZlib(str) +-- @see LibDeflate:DecompressZlibWithDict(str) +local function DecompressZlibInternal(str, dictionary) + local state = CreateDecompressState(str, dictionary) + local ReadBits = state.ReadBits + + local CMF = ReadBits(8) + if state.ReaderBitlenLeft() < 0 then + return nil, 2 -- available inflate data did not terminate + end + local CM = CMF % 16 + local CINFO = (CMF - CM) / 16 + if CM ~= 8 then + return nil, -12 -- invalid compression method + end + if CINFO > 7 then + return nil, -13 -- invalid window size + end + + local FLG = ReadBits(8) + if state.ReaderBitlenLeft() < 0 then + return nil, 2 -- available inflate data did not terminate + end + if (CMF * 256 + FLG) % 31 ~= 0 then + return nil, -14 -- invalid header checksum + end + + local FDIST = ((FLG - FLG % 32) / 32 % 2) + local FLEVEL = ((FLG - FLG % 64) / 64 % 4) -- luacheck: ignore FLEVEL + + if FDIST == 1 then + if not dictionary then + return nil, -16 -- need dictonary, but dictionary is not provided. + end + local byte3 = ReadBits(8) + local byte2 = ReadBits(8) + local byte1 = ReadBits(8) + local byte0 = ReadBits(8) + local actual_adler32 = byte3 * 16777216 + byte2 * 65536 + byte1 * 256 + + byte0 + if state.ReaderBitlenLeft() < 0 then + return nil, 2 -- available inflate data did not terminate + end + if not IsEqualAdler32(actual_adler32, dictionary.adler32) then + return nil, -17 -- dictionary adler32 does not match + end + end + local result, status = Inflate(state) + if not result then return nil, status end + state.SkipToByteBoundary() + + local adler_byte0 = ReadBits(8) + local adler_byte1 = ReadBits(8) + local adler_byte2 = ReadBits(8) + local adler_byte3 = ReadBits(8) + if state.ReaderBitlenLeft() < 0 then + return nil, 2 -- available inflate data did not terminate + end + + local adler32_expected = adler_byte0 * 16777216 + adler_byte1 * 65536 + + adler_byte2 * 256 + adler_byte3 + local adler32_actual = LibDeflate:Adler32(result) + if not IsEqualAdler32(adler32_expected, adler32_actual) then + return nil, -15 -- Adler32 checksum does not match + end + + local bitlen_left = state.ReaderBitlenLeft() + local bytelen_left = (bitlen_left - bitlen_left % 8) / 8 + return result, bytelen_left +end + +--- Decompress a raw deflate compressed data. +-- @param str [string] The data to be decompressed. +-- @return [string/nil] If the decompression succeeds, return the decompressed +-- data. If the decompression fails, return nil. You should check if this return +-- value is non-nil to know if the decompression succeeds. +-- @return [integer] If the decompression succeeds, return the number of +-- unprocessed bytes in the input compressed data. This return value is a +-- positive integer if the input data is a valid compressed data appended by an +-- arbitary non-empty string. This return value is 0 if the input data does not +-- contain any extra bytes.
+-- If the decompression fails (The first return value of this function is nil), +-- this return value is undefined. +-- @see LibDeflate:CompressDeflate +function LibDeflate:DecompressDeflate(str) + local arg_valid, arg_err = IsValidArguments(str) + if not arg_valid then + error(("Usage: LibDeflate:DecompressDeflate(str): " .. arg_err), 2) + end + return DecompressDeflateInternal(str) +end + +--- Decompress a raw deflate compressed data with a preset dictionary. +-- @param str [string] The data to be decompressed. +-- @param dictionary [table] The preset dictionary used by +-- LibDeflate:CompressDeflateWithDict when the compressed data is produced. +-- Decompression and compression must use the same dictionary. +-- Otherwise wrong decompressed data could be produced without generating any +-- error. +-- @return [string/nil] If the decompression succeeds, return the decompressed +-- data. If the decompression fails, return nil. You should check if this return +-- value is non-nil to know if the decompression succeeds. +-- @return [integer] If the decompression succeeds, return the number of +-- unprocessed bytes in the input compressed data. This return value is a +-- positive integer if the input data is a valid compressed data appended by an +-- arbitary non-empty string. This return value is 0 if the input data does not +-- contain any extra bytes.
+-- If the decompression fails (The first return value of this function is nil), +-- this return value is undefined. +-- @see LibDeflate:CompressDeflateWithDict +function LibDeflate:DecompressDeflateWithDict(str, dictionary) + local arg_valid, arg_err = IsValidArguments(str, true, dictionary) + if not arg_valid then + error(("Usage: LibDeflate:DecompressDeflateWithDict(str, dictionary): " .. + arg_err), 2) + end + return DecompressDeflateInternal(str, dictionary) +end + +--- Decompress a zlib compressed data. +-- @param str [string] The data to be decompressed +-- @return [string/nil] If the decompression succeeds, return the decompressed +-- data. If the decompression fails, return nil. You should check if this return +-- value is non-nil to know if the decompression succeeds. +-- @return [integer] If the decompression succeeds, return the number of +-- unprocessed bytes in the input compressed data. This return value is a +-- positive integer if the input data is a valid compressed data appended by an +-- arbitary non-empty string. This return value is 0 if the input data does not +-- contain any extra bytes.
+-- If the decompression fails (The first return value of this function is nil), +-- this return value is undefined. +-- @see LibDeflate:CompressZlib +function LibDeflate:DecompressZlib(str) + local arg_valid, arg_err = IsValidArguments(str) + if not arg_valid then + error(("Usage: LibDeflate:DecompressZlib(str): " .. arg_err), 2) + end + return DecompressZlibInternal(str) +end + +--- Decompress a zlib compressed data with a preset dictionary. +-- @param str [string] The data to be decompressed +-- @param dictionary [table] The preset dictionary used by +-- LibDeflate:CompressDeflateWithDict when the compressed data is produced. +-- Decompression and compression must use the same dictionary. +-- Otherwise wrong decompressed data could be produced without generating any +-- error. +-- @return [string/nil] If the decompression succeeds, return the decompressed +-- data. If the decompression fails, return nil. You should check if this return +-- value is non-nil to know if the decompression succeeds. +-- @return [integer] If the decompression succeeds, return the number of +-- unprocessed bytes in the input compressed data. This return value is a +-- positive integer if the input data is a valid compressed data appended by an +-- arbitary non-empty string. This return value is 0 if the input data does not +-- contain any extra bytes.
+-- If the decompression fails (The first return value of this function is nil), +-- this return value is undefined. +-- @see LibDeflate:CompressZlibWithDict +function LibDeflate:DecompressZlibWithDict(str, dictionary) + local arg_valid, arg_err = IsValidArguments(str, true, dictionary) + if not arg_valid then + error(("Usage: LibDeflate:DecompressZlibWithDict(str, dictionary): " .. + arg_err), 2) + end + return DecompressZlibInternal(str, dictionary) +end + +-- Calculate the huffman code of fixed block +do + _fix_block_literal_huffman_bitlen = {} + for sym = 0, 143 do _fix_block_literal_huffman_bitlen[sym] = 8 end + for sym = 144, 255 do _fix_block_literal_huffman_bitlen[sym] = 9 end + for sym = 256, 279 do _fix_block_literal_huffman_bitlen[sym] = 7 end + for sym = 280, 287 do _fix_block_literal_huffman_bitlen[sym] = 8 end + + _fix_block_dist_huffman_bitlen = {} + for dist = 0, 31 do _fix_block_dist_huffman_bitlen[dist] = 5 end + local status + status, _fix_block_literal_huffman_bitlen_count, _fix_block_literal_huffman_to_deflate_code = + GetHuffmanForDecode(_fix_block_literal_huffman_bitlen, 287, 9) + assert(status == 0) + status, _fix_block_dist_huffman_bitlen_count, _fix_block_dist_huffman_to_deflate_code = + GetHuffmanForDecode(_fix_block_dist_huffman_bitlen, 31, 5) + assert(status == 0) + + _fix_block_literal_huffman_code = GetHuffmanCodeFromBitlen( + _fix_block_literal_huffman_bitlen_count, + _fix_block_literal_huffman_bitlen, 287, 9) + _fix_block_dist_huffman_code = GetHuffmanCodeFromBitlen( + _fix_block_dist_huffman_bitlen_count, + _fix_block_dist_huffman_bitlen, 31, 5) +end + +-- Prefix encoding algorithm +-- Credits to LibCompress. +-- The code has been rewritten by the author of LibDeflate. +------------------------------------------------------------------------------ + +-- to be able to match any requested byte value, the search +-- string must be preprocessed characters to escape with %: +-- ( ) . % + - * ? [ ] ^ $ +-- "illegal" byte values: +-- 0 is replaces %z +local _gsub_escape_table = { + ["\000"] = "%z", + ["("] = "%(", + [")"] = "%)", + ["."] = "%.", + ["%"] = "%%", + ["+"] = "%+", + ["-"] = "%-", + ["*"] = "%*", + ["?"] = "%?", + ["["] = "%[", + ["]"] = "%]", + ["^"] = "%^", + ["$"] = "%$" +} + +local function escape_for_gsub(str) + return str:gsub("([%z%(%)%.%%%+%-%*%?%[%]%^%$])", _gsub_escape_table) +end + +--- Create a custom codec with encoder and decoder.
+-- This codec is used to convert an input string to make it not contain +-- some specific bytes. +-- This created codec and the parameters of this function do NOT take +-- localization into account. One byte (0-255) in the string is exactly one +-- character (0-255). +-- Credits to LibCompress. +-- The code has been rewritten by the author of LibDeflate.
+-- @param reserved_chars [string] The created encoder will ensure encoded +-- data does not contain any single character in reserved_chars. This parameter +-- should be non-empty. +-- @param escape_chars [string] The escape character(s) used in the created +-- codec. The codec converts any character included in reserved\_chars / +-- escape\_chars / map\_chars to (one escape char + one character not in +-- reserved\_chars / escape\_chars / map\_chars). +-- You usually only need to provide a length-1 string for this parameter. +-- Length-2 string is only needed when +-- reserved\_chars + escape\_chars + map\_chars is longer than 127. +-- This parameter should be non-empty. +-- @param map_chars [string] The created encoder will map every +-- reserved\_chars:sub(i, i) (1 <= i <= #map\_chars) to map\_chars:sub(i, i). +-- This parameter CAN be empty string. +-- @return [table/nil] If the codec cannot be created, return nil.
+-- If the codec can be created according to the given +-- parameters, return the codec, which is a encode/decode table. +-- The table contains two functions:
+-- t:Encode(str) returns the encoded string.
+-- t:Decode(str) returns the decoded string if succeeds. nil if fails. +-- @return [nil/string] If the codec is successfully created, return nil. +-- If not, return a string that describes the reason why the codec cannot be +-- created. +-- @usage +-- -- Create an encoder/decoder that maps all "\000" to "\003", +-- -- and escape "\001" (and "\002" and "\003") properly +-- local codec = LibDeflate:CreateCodec("\000\001", "\002", "\003") +-- +-- local encoded = codec:Encode(SOME_STRING) +-- -- "encoded" does not contain "\000" or "\001" +-- local decoded = codec:Decode(encoded) +-- -- assert(decoded == SOME_STRING) +function LibDeflate:CreateCodec(reserved_chars, escape_chars, map_chars) + if type(reserved_chars) ~= "string" or type(escape_chars) ~= "string" or + type(map_chars) ~= "string" then + error("Usage: LibDeflate:CreateCodec(reserved_chars," .. + " escape_chars, map_chars):" .. " All arguments must be string.", 2) + end + + if escape_chars == "" then return nil, "No escape characters supplied." end + if #reserved_chars < #map_chars then + return nil, "The number of reserved characters must be" .. + " at least as many as the number of mapped chars." + end + if reserved_chars == "" then return nil, "No characters to encode." end + + local encode_bytes = reserved_chars .. escape_chars .. map_chars + -- build list of bytes not available as a suffix to a prefix byte + local taken = {} + for i = 1, #encode_bytes do + local byte = string_byte(encode_bytes, i, i) + if taken[byte] then + return nil, "There must be no duplicate characters in the" .. + " concatenation of reserved_chars, escape_chars and" .. + " map_chars." + end + taken[byte] = true + end + + local decode_patterns = {} + local decode_repls = {} + + -- the encoding can be a single gsub + -- , but the decoding can require multiple gsubs + local encode_search = {} + local encode_translate = {} + + -- map single byte to single byte + if #map_chars > 0 then + local decode_search = {} + local decode_translate = {} + for i = 1, #map_chars do + local from = string_sub(reserved_chars, i, i) + local to = string_sub(map_chars, i, i) + encode_translate[from] = to + encode_search[#encode_search + 1] = from + decode_translate[to] = from + decode_search[#decode_search + 1] = to + end + decode_patterns[#decode_patterns + 1] = + "([" .. escape_for_gsub(table_concat(decode_search)) .. "])" + decode_repls[#decode_repls + 1] = decode_translate + end + + local escape_char_index = 1 + local escape_char = string_sub(escape_chars, escape_char_index, + escape_char_index) + -- map single byte to double-byte + local r = 0 -- suffix char value to the escapeChar + + local decode_search = {} + local decode_translate = {} + for i = 1, #encode_bytes do + local c = string_sub(encode_bytes, i, i) + if not encode_translate[c] then + while r >= 256 or taken[r] do + r = r + 1 + if r > 255 then -- switch to next escapeChar + decode_patterns[#decode_patterns + 1] = + escape_for_gsub(escape_char) .. "([" .. + escape_for_gsub(table_concat(decode_search)) .. "])" + decode_repls[#decode_repls + 1] = decode_translate + + escape_char_index = escape_char_index + 1 + escape_char = string_sub(escape_chars, escape_char_index, + escape_char_index) + r = 0 + decode_search = {} + decode_translate = {} + + if not escape_char or escape_char == "" then + -- actually I don't need to check + -- "not ecape_char", but what if Lua changes + -- the behavior of string.sub() in the future? + -- we are out of escape chars and we need more! + return nil, "Out of escape characters." + end + end + end + + local char_r = _byte_to_char[r] + encode_translate[c] = escape_char .. char_r + encode_search[#encode_search + 1] = c + decode_translate[char_r] = c + decode_search[#decode_search + 1] = char_r + r = r + 1 + end + if i == #encode_bytes then + decode_patterns[#decode_patterns + 1] = + escape_for_gsub(escape_char) .. "([" .. + escape_for_gsub(table_concat(decode_search)) .. "])" + decode_repls[#decode_repls + 1] = decode_translate + end + end + + local codec = {} + + local encode_pattern = "([" .. escape_for_gsub(table_concat(encode_search)) .. + "])" + local encode_repl = encode_translate + + function codec:Encode(str) + if type(str) ~= "string" then + error( + ("Usage: codec:Encode(str):" .. " 'str' - string expected got '%s'."):format( + type(str)), 2) + end + return string_gsub(str, encode_pattern, encode_repl) + end + + local decode_tblsize = #decode_patterns + local decode_fail_pattern = "([" .. escape_for_gsub(reserved_chars) .. "])" + + function codec:Decode(str) + if type(str) ~= "string" then + error( + ("Usage: codec:Decode(str):" .. " 'str' - string expected got '%s'."):format( + type(str)), 2) + end + if string_find(str, decode_fail_pattern) then return nil end + for i = 1, decode_tblsize do + str = string_gsub(str, decode_patterns[i], decode_repls[i]) + end + return str + end + + return codec +end + +local _addon_channel_codec + +local function GenerateWoWAddonChannelCodec() + return LibDeflate:CreateCodec("\000", "\001", "") +end + +--- Encode the string to make it ready to be transmitted in World of +-- Warcraft addon channel.
+-- The encoded string is guaranteed to contain no NULL ("\000") character. +-- @param str [string] The string to be encoded. +-- @return The encoded string. +-- @see LibDeflate:DecodeForWoWAddonChannel +function LibDeflate:EncodeForWoWAddonChannel(str) + if type(str) ~= "string" then + error(("Usage: LibDeflate:EncodeForWoWAddonChannel(str):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + if not _addon_channel_codec then + _addon_channel_codec = GenerateWoWAddonChannelCodec() + end + return _addon_channel_codec:Encode(str) +end + +--- Decode the string produced by LibDeflate:EncodeForWoWAddonChannel +-- @param str [string] The string to be decoded. +-- @return [string/nil] The decoded string if succeeds. nil if fails. +-- @see LibDeflate:EncodeForWoWAddonChannel +function LibDeflate:DecodeForWoWAddonChannel(str) + if type(str) ~= "string" then + error(("Usage: LibDeflate:DecodeForWoWAddonChannel(str):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + if not _addon_channel_codec then + _addon_channel_codec = GenerateWoWAddonChannelCodec() + end + return _addon_channel_codec:Decode(str) +end + +-- For World of Warcraft Chat Channel Encoding +-- Credits to LibCompress. +-- The code has been rewritten by the author of LibDeflate.
+-- Following byte values are not allowed: +-- \000, s, S, \010, \013, \124, % +-- Because SendChatMessage will error +-- if an UTF8 multibyte character is incomplete, +-- all character values above 127 have to be encoded to avoid this. +-- This costs quite a bit of bandwidth (about 13-14%) +-- Also, because drunken status is unknown for the received +-- , strings used with SendChatMessage should be terminated with +-- an identifying byte value, after which the server MAY add "...hic!" +-- or as much as it can fit(!). +-- Pass the identifying byte as a reserved character to this function +-- to ensure the encoding doesn't contain that value. +-- or use this: local message, match = arg1:gsub("^(.*)\029.-$", "%1") +-- arg1 is message from channel, \029 is the string terminator +-- , but may be used in the encoded datastream as well. :-) +-- This encoding will expand data anywhere from: +-- 0% (average with pure ascii text) +-- 53.5% (average with random data valued zero to 255) +-- 100% (only encoding data that encodes to two bytes) +local function GenerateWoWChatChannelCodec() + local r = {} + for i = 128, 255 do r[#r + 1] = _byte_to_char[i] end + + local reserved_chars = "sS\000\010\013\124%" .. table_concat(r) + return LibDeflate:CreateCodec(reserved_chars, "\029\031", "\015\020") +end + +local _chat_channel_codec + +--- Encode the string to make it ready to be transmitted in World of +-- Warcraft chat channel.
+-- See also https://wow.gamepedia.com/ValidChatMessageCharacters +-- @param str [string] The string to be encoded. +-- @return [string] The encoded string. +-- @see LibDeflate:DecodeForWoWChatChannel +function LibDeflate:EncodeForWoWChatChannel(str) + if type(str) ~= "string" then + error(("Usage: LibDeflate:EncodeForWoWChatChannel(str):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + if not _chat_channel_codec then + _chat_channel_codec = GenerateWoWChatChannelCodec() + end + return _chat_channel_codec:Encode(str) +end + +--- Decode the string produced by LibDeflate:EncodeForWoWChatChannel. +-- @param str [string] The string to be decoded. +-- @return [string/nil] The decoded string if succeeds. nil if fails. +-- @see LibDeflate:EncodeForWoWChatChannel +function LibDeflate:DecodeForWoWChatChannel(str) + if type(str) ~= "string" then + error(("Usage: LibDeflate:DecodeForWoWChatChannel(str):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + if not _chat_channel_codec then + _chat_channel_codec = GenerateWoWChatChannelCodec() + end + return _chat_channel_codec:Decode(str) +end + +-- Credits to WeakAuras2 and Galmok for the 6 bit encoding algorithm. +-- The code has been rewritten by the author of LibDeflate. +-- The result of encoding will be 25% larger than the +-- origin string, but every single byte of the encoding result will be +-- printable characters as the following. +local _byte_to_6bit_char = { + [0] = "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "(", + ")" +} + +local _6bit_to_byte = { + [97] = 0, + [98] = 1, + [99] = 2, + [100] = 3, + [101] = 4, + [102] = 5, + [103] = 6, + [104] = 7, + [105] = 8, + [106] = 9, + [107] = 10, + [108] = 11, + [109] = 12, + [110] = 13, + [111] = 14, + [112] = 15, + [113] = 16, + [114] = 17, + [115] = 18, + [116] = 19, + [117] = 20, + [118] = 21, + [119] = 22, + [120] = 23, + [121] = 24, + [122] = 25, + [65] = 26, + [66] = 27, + [67] = 28, + [68] = 29, + [69] = 30, + [70] = 31, + [71] = 32, + [72] = 33, + [73] = 34, + [74] = 35, + [75] = 36, + [76] = 37, + [77] = 38, + [78] = 39, + [79] = 40, + [80] = 41, + [81] = 42, + [82] = 43, + [83] = 44, + [84] = 45, + [85] = 46, + [86] = 47, + [87] = 48, + [88] = 49, + [89] = 50, + [90] = 51, + [48] = 52, + [49] = 53, + [50] = 54, + [51] = 55, + [52] = 56, + [53] = 57, + [54] = 58, + [55] = 59, + [56] = 60, + [57] = 61, + [40] = 62, + [41] = 63 +} + +--- Encode the string to make it printable.
+-- +-- Credit to WeakAuras2, this function is equivalant to the implementation +-- it is using right now.
+-- The code has been rewritten by the author of LibDeflate.
+-- The encoded string will be 25% larger than the origin string. However, every +-- single byte of the encoded string will be one of 64 printable ASCII +-- characters, which are can be easier copied, pasted and displayed. +-- (26 lowercase letters, 26 uppercase letters, 10 numbers digits, +-- left parenthese, or right parenthese) +-- @param str [string] The string to be encoded. +-- @return [string] The encoded string. +function LibDeflate:EncodeForPrint(str) + if type(str) ~= "string" then + error(("Usage: LibDeflate:EncodeForPrint(str):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + local strlen = #str + local strlenMinus2 = strlen - 2 + local i = 1 + local buffer = {} + local buffer_size = 0 + while i <= strlenMinus2 do + local x1, x2, x3 = string_byte(str, i, i + 2) + i = i + 3 + local cache = x1 + x2 * 256 + x3 * 65536 + local b1 = cache % 64 + cache = (cache - b1) / 64 + local b2 = cache % 64 + cache = (cache - b2) / 64 + local b3 = cache % 64 + local b4 = (cache - b3) / 64 + buffer_size = buffer_size + 1 + buffer[buffer_size] = _byte_to_6bit_char[b1] .. _byte_to_6bit_char[b2] .. + _byte_to_6bit_char[b3] .. _byte_to_6bit_char[b4] + end + + local cache = 0 + local cache_bitlen = 0 + while i <= strlen do + local x = string_byte(str, i, i) + cache = cache + x * _pow2[cache_bitlen] + cache_bitlen = cache_bitlen + 8 + i = i + 1 + end + while cache_bitlen > 0 do + local bit6 = cache % 64 + buffer_size = buffer_size + 1 + buffer[buffer_size] = _byte_to_6bit_char[bit6] + cache = (cache - bit6) / 64 + cache_bitlen = cache_bitlen - 6 + end + + return table_concat(buffer) +end + +--- Decode the printable string produced by LibDeflate:EncodeForPrint. +-- "str" will have its prefixed and trailing control characters or space +-- removed before it is decoded, so it is easier to use if "str" comes form +-- user copy and paste with some prefixed or trailing spaces. +-- Then decode fails if the string contains any characters cant be produced by +-- LibDeflate:EncodeForPrint. That means, decode fails if the string contains a +-- characters NOT one of 26 lowercase letters, 26 uppercase letters, +-- 10 numbers digits, left parenthese, or right parenthese. +-- @param str [string] The string to be decoded +-- @return [string/nil] The decoded string if succeeds. nil if fails. +function LibDeflate:DecodeForPrint(str) + if type(str) ~= "string" then + error(("Usage: LibDeflate:DecodeForPrint(str):" .. + " 'str' - string expected got '%s'."):format(type(str)), 2) + end + str = str:gsub("^[%c ]+", "") + str = str:gsub("[%c ]+$", "") + + local strlen = #str + if strlen == 1 then return nil end + local strlenMinus3 = strlen - 3 + local i = 1 + local buffer = {} + local buffer_size = 0 + while i <= strlenMinus3 do + local x1, x2, x3, x4 = string_byte(str, i, i + 3) + x1 = _6bit_to_byte[x1] + x2 = _6bit_to_byte[x2] + x3 = _6bit_to_byte[x3] + x4 = _6bit_to_byte[x4] + if not (x1 and x2 and x3 and x4) then return nil end + i = i + 4 + local cache = x1 + x2 * 64 + x3 * 4096 + x4 * 262144 + local b1 = cache % 256 + cache = (cache - b1) / 256 + local b2 = cache % 256 + local b3 = (cache - b2) / 256 + buffer_size = buffer_size + 1 + buffer[buffer_size] = _byte_to_char[b1] .. _byte_to_char[b2] .. + _byte_to_char[b3] + end + + local cache = 0 + local cache_bitlen = 0 + while i <= strlen do + local x = string_byte(str, i, i) + x = _6bit_to_byte[x] + if not x then return nil end + cache = cache + x * _pow2[cache_bitlen] + cache_bitlen = cache_bitlen + 6 + i = i + 1 + end + + while cache_bitlen >= 8 do + local byte = cache % 256 + buffer_size = buffer_size + 1 + buffer[buffer_size] = _byte_to_char[byte] + cache = (cache - byte) / 256 + cache_bitlen = cache_bitlen - 8 + end + + return table_concat(buffer) +end + +local function InternalClearCache() + _chat_channel_codec = nil + _addon_channel_codec = nil +end + +-- For test. Don't use the functions in this table for real application. +-- Stuffs in this table is subject to change. +LibDeflate.internals = { + LoadStringToTable = LoadStringToTable, + IsValidDictionary = IsValidDictionary, + IsEqualAdler32 = IsEqualAdler32, + _byte_to_6bit_char = _byte_to_6bit_char, + _6bit_to_byte = _6bit_to_byte, + InternalClearCache = InternalClearCache +} + +--[[-- Commandline options +@class table +@name CommandlineOptions +@usage lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT] +\-0 store only. no compression. +\-1 fastest compression. +\-9 slowest and best compression. +\-d do decompression instead of compression. +\--dict specify the file that contains +the entire preset dictionary. +\-h give this help. +\--strategy specify a special compression strategy. +\-v print the version and copyright info. +\--zlib use zlib format instead of raw deflate. +]] + +-- currently no plan to support stdin and stdout. +-- Because Lua in Windows does not set stdout with binary mode. +if io and os and debug and _G.arg then + local io = io + local os = os + local debug = debug + local arg = _G.arg + local debug_info = debug.getinfo(1) + if debug_info.source == arg[0] or debug_info.short_src == arg[0] then + -- We are indeed runnning THIS file from the commandline. + local input + local output + local i = 1 + local status + local is_zlib = false + local is_decompress = false + local level + local strategy + local dictionary + while (arg[i]) do + local a = arg[i] + if a == "-h" then + print(LibDeflate._COPYRIGHT .. + "\nUsage: lua LibDeflate.lua [OPTION] [INPUT] [OUTPUT]\n" .. + " -0 store only. no compression.\n" .. + " -1 fastest compression.\n" .. + " -9 slowest and best compression.\n" .. + " -d do decompression instead of compression.\n" .. + " --dict specify the file that contains" .. + " the entire preset dictionary.\n" .. + " -h give this help.\n" .. + " --strategy " .. + " specify a special compression strategy.\n" .. + " -v print the version and copyright info.\n" .. + " --zlib use zlib format instead of raw deflate.\n") + os.exit(0) + elseif a == "-v" then + print(LibDeflate._COPYRIGHT) + os.exit(0) + elseif a:find("^%-[0-9]$") then + level = tonumber(a:sub(2, 2)) + elseif a == "-d" then + is_decompress = true + elseif a == "--dict" then + i = i + 1 + local dict_filename = arg[i] + if not dict_filename then + io.stderr:write("You must speicify the dict filename") + os.exit(1) + end + local dict_file, dict_status = io.open(dict_filename, "rb") + if not dict_file then + io.stderr:write( + ("LibDeflate: Cannot read the dictionary file '%s': %s"):format( + dict_filename, dict_status)) + os.exit(1) + end + local dict_str = dict_file:read("*all") + dict_file:close() + -- In your lua program, you should pass in adler32 as a CONSTANT + -- , so it actually prevent you from modifying dictionary + -- unintentionally during the program development. I do this + -- here just because no convenient way to verify in commandline. + dictionary = LibDeflate:CreateDictionary(dict_str, #dict_str, + LibDeflate:Adler32(dict_str)) + elseif a == "--strategy" then + -- Not sure if I should check error here + -- If I do, redudant code. + i = i + 1 + strategy = arg[i] + elseif a == "--zlib" then + is_zlib = true + elseif a:find("^%-") then + io.stderr:write(("LibDeflate: Invalid argument: %s"):format(a)) + os.exit(1) + else + if not input then + input, status = io.open(a, "rb") + if not input then + io.stderr:write( + ("LibDeflate: Cannot read the file '%s': %s"):format(a, tostring( + status))) + os.exit(1) + end + elseif not output then + output, status = io.open(a, "wb") + if not output then + io.stderr:write( + ("LibDeflate: Cannot write the file '%s': %s"):format(a, tostring( + status))) + os.exit(1) + end + end + end + i = i + 1 + end -- while (arg[i]) + + if not input or not output then + io.stderr:write("LibDeflate:" .. + " You must specify both input and output files.") + os.exit(1) + end + + local input_data = input:read("*all") + local configs = {level = level, strategy = strategy} + local output_data + if not is_decompress then + if not is_zlib then + if not dictionary then + output_data = LibDeflate:CompressDeflate(input_data, configs) + else + output_data = LibDeflate:CompressDeflateWithDict(input_data, + dictionary, configs) + end + else + if not dictionary then + output_data = LibDeflate:CompressZlib(input_data, configs) + else + output_data = LibDeflate:CompressZlibWithDict(input_data, dictionary, + configs) + end + end + else + if not is_zlib then + if not dictionary then + output_data = LibDeflate:DecompressDeflate(input_data) + else + output_data = LibDeflate:DecompressDeflateWithDict(input_data, + dictionary) + end + else + if not dictionary then + output_data = LibDeflate:DecompressZlib(input_data) + else + output_data = + LibDeflate:DecompressZlibWithDict(input_data, dictionary) + end + end + end + + if not output_data then + io.stderr:write("LibDeflate: Decompress fails.") + os.exit(1) + end + + output:write(output_data) + if input and input ~= io.stdin then input:close() end + if output and output ~= io.stdout then output:close() end + + io.stderr:write(("Successfully writes %d bytes"):format(output_data:len())) + os.exit(0) + end +end + +return LibDeflate diff --git a/mc/bufferstuff.lua b/mc/bufferstuff.lua index 9c8f5e9..c6a8f47 100644 --- a/mc/bufferstuff.lua +++ b/mc/bufferstuff.lua @@ -137,6 +137,12 @@ function bufferStuff.Reader:readByte() return value end +function bufferStuff.Reader:readBool() + local value = string.unpack(">b", string.sub(self.bufferData, self.offset, self.offset + 1)) + self.offset = self.offset + 1 + return value >= 1 +end + function bufferStuff.Reader:readUByte() local value = string.unpack(">B", string.sub(self.bufferData, self.offset, self.offset + 1)) self.offset = self.offset + 1 diff --git a/mc/chunk.lua b/mc/chunk.lua index e712b5d..b54fd77 100644 --- a/mc/chunk.lua +++ b/mc/chunk.lua @@ -1,14 +1,22 @@ local util = require("util") local nibbleArray = require("nibbleArray") +local bit32 = require("bit32") local MAX_HEIGHT = 128 local chunk = {} local defaultsTable = { - blocks = {} + blocks = {}, + metadata = {}, + blockLight = {}, + skyLight = {} } +function chunk:CreateCoordPair(x, z) + return bit32.bor((x >= 0 and 0 or 2147483648), bit32.bor(bit32.rshift(bit32.band(x, 0x7fff), -16), bit32.bor((z >= 0 and 0 or 0x8000), bit32.band(z, 0x7fff)))) +end + function chunk:new() local newTable = util.deepCopyTable(defaultsTable) or {} @@ -16,7 +24,11 @@ function chunk:new() for i = 1, blockBufferSize do table.insert(newTable.blocks, 0) end - print(#newTable.blocks) + for i = 1, blockBufferSize / 2 do + table.insert(newTable.metadata, 0) + table.insert(newTable.blockLight, 255) + table.insert(newTable.skyLight, 255) + end -- Set this table as this """class"""'s metatable setmetatable(newTable, self) @@ -25,4 +37,12 @@ function chunk:new() return newTable end +function chunk:setBlock(blockId, x, y, z) + self.blocks[bit32.bor(bit32.rshift(x, -11), bit32.bor(bit32.rshift(z, -7), y)) + 1] = blockId +end + +function chunk:getBlockId(x, y, z) + return self.blocks[bit32.bor(bit32.rshift(x, -11), bit32.bor(bit32.rshift(z, -7), y)) + 1] +end + return chunk \ No newline at end of file diff --git a/mc/server.lua b/mc/server.lua index f1caa36..1be36b5 100644 --- a/mc/server.lua +++ b/mc/server.lua @@ -1,63 +1,152 @@ --- local bufferStuff = require("bufferstuff") --- local writer = bufferStuff.Writer:new() --- writer:writeUByte(10) --- writer:writeUByte(67) --- writer:writeString16("Hello World!") - --- local reader = bufferStuff.Reader:new(writer:toBuffer()) --- print(reader:readUByte()) --- print(reader:readUByte()) --- print(reader:readString16()) - -local chunk = require("chunk") +--local chunk = require("chunk") local packet = require("packet") local bufferStuff = require("bufferstuff") +--local libDeflate = require("LibDeflate") -local tempChunk = chunk:new() +-- local tempChunk = chunk:new() +-- for x = 0, 15 do +-- for z = 0, 15 do +-- tempChunk:setBlock(2, x, 62, z) +-- end +-- end + +local ws = nil + +local playerX = 0 +local playerY = 0 +local playerZ = 0 +local playerYaw = 0 +local playerPitch = 0 +local onGround = false local function doWebsocket() - local ws = http.websocket("ws://localhost:25567") + ws = http.websocket("ws://localhost:25567") while true do local b, bv, data, bx = os.pullEvent("websocket_message") local reader = bufferStuff.Reader:new(data) local packetId = reader:readByte() - if packetId == packet.LoginRequest then + if packetId == packet.KeepAlive then + ws.send(data, true) + elseif packetId == packet.LoginRequest then local protocolVersion = reader:readInt() local username = reader:readString16() local mapSeed = reader:readLong() local dimension = reader:readByte() - print(username .. " is requesting login") local writer = bufferStuff.Writer:new() + -- Login packet writer:writeByte(packet.LoginRequest) -- Packet ID writer:writeInt(1) -- Entity ID writer:writeString16("") -- Login Token writer:writeLong(0) -- Seed - writer.writeByte(0) -- Dimension - ws.send(writer:toBuffer()) - print("Login granted") + writer:writeByte(0) -- Dimension + ws.send(writer:toBuffer(), true) + + -- teleport + -- local teleportWriter = bufferStuff.Writer:new() + -- teleportWriter:writeByte(packet.PlayerPositionLook) + -- teleportWriter:writeDouble(8) + -- teleportWriter:writeDouble(75) + -- teleportWriter:writeDouble(75.62) + -- teleportWriter:writeDouble(8) + -- teleportWriter:writeFloat(0) + -- teleportWriter:writeFloat(0) + -- teleportWriter:writeBool(true) + -- ws.send(teleportWriter:toBuffer(), true) + + -- pre chunk + local preChunkWriter = bufferStuff.Writer:new() + preChunkWriter:writeByte(packet.PreChunk) + preChunkWriter:writeInt(0) + preChunkWriter:writeInt(0) + preChunkWriter:writeBool(true) + ws.send(preChunkWriter:toBuffer(), true) + + local blockWriter = bufferStuff.Writer:new() + for x = 0, 15 do + for z = 0, 15 do + blockWriter:writeByte(packet.BlockChange) + blockWriter:writeInt(x) + blockWriter:writeByte(62) + blockWriter:writeInt(z) + blockWriter:writeByte(2) + blockWriter:writeByte(0) + end + end + ws.send(blockWriter:toBuffer(), true) + + -- chunk + -- local chunkWriter = bufferStuff.Writer:new() + -- local chunkData = bufferStuff.Writer:new() + -- for _, id in ipairs(tempChunk.blocks) do + -- chunkData:writeUByte(id) + -- end + -- for _, id in ipairs(tempChunk.metadata) do + -- chunkData:writeUByte(id) + -- end + -- for _, id in ipairs(tempChunk.blockLight) do + -- chunkData:writeUByte(id) + -- end + -- for _, id in ipairs(tempChunk.skyLight) do + -- chunkData:writeUByte(id) + -- end + -- chunkWriter:writeByte(packet.MapChunk) + -- chunkWriter:writeInt(0) + -- chunkWriter:writeShort(0) + -- chunkWriter:writeInt(0) + -- chunkWriter:writeByte(15) + -- chunkWriter:writeByte(127) + -- chunkWriter:writeByte(15) + + -- local compressedData = libDeflate:CompressDeflate(chunkData:toBuffer()) + + -- chunkWriter:writeInt(#compressedData) + -- local chunkPacket = chunkWriter:toBuffer() + -- ws.send(chunkPacket .. compressedData, true) elseif packetId == packet.Handshake then local username = reader:readString16() - print(username .. " is probing the server") local writer = bufferStuff.Writer:new() writer:writeByte(packet.Handshake) writer:writeString16("-") - ws.send(writer:toBuffer()) - print("Handshake ACK sent") + ws.send(writer:toBuffer(), true) + elseif packetId == packet.Player then + onGround = reader:readBool() + elseif packetId == packet.PlayerPosition then + playerX = reader:readDouble() + playerY = reader:readDouble() + reader:readDouble() + playerZ = reader:readDouble() + onGround = reader:readBool() + elseif packetId == packet.PlayerPositionLook then + playerX = reader:readDouble() + playerY = reader:readDouble() + reader:readDouble() + playerZ = reader:readDouble() + playerYaw = reader:readFloat() + playerPitch = reader:readFloat() + onGround = reader:readBool() + elseif packet == packet.PlayerLook then + playerYaw = reader:readFloat() + playerPitch = reader:readFloat() + onGround = reader:readBool() end end end +local keepAliveWriter = bufferStuff.Writer:new() +keepAliveWriter:writeByte(packet.KeepAlive) +local keepAlivePacket = keepAliveWriter:toBuffer() + local tickCount = 0 local function serverTickLoop() while true do - - - os.sleep(0.05) + --print(playerX, playerY, playerZ, playerYaw, playerPitch, onGround) + + os.sleep(0.1) tickCount = tickCount + 1 end end diff --git a/theforwarder/index.ts b/theforwarder/index.ts index cd5df04..9f4e17f 100644 --- a/theforwarder/index.ts +++ b/theforwarder/index.ts @@ -36,7 +36,8 @@ server.on("connection", (socket) => { writeClient(data as Buffer); }); - socket.on("error", () => { + socket.on("error", (err) => { + console.log(err); clearInterval(interval); }); socket.on("close", () => {