[8ebc79b] | 1 | /* |
---|
| 2 | LZ4 HC - High Compression Mode of LZ4 |
---|
| 3 | Copyright (C) 2011-2015, Yann Collet. |
---|
| 4 | |
---|
| 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
---|
| 6 | |
---|
| 7 | Redistribution and use in source and binary forms, with or without |
---|
| 8 | modification, are permitted provided that the following conditions are |
---|
| 9 | met: |
---|
| 10 | |
---|
| 11 | * Redistributions of source code must retain the above copyright |
---|
| 12 | notice, this list of conditions and the following disclaimer. |
---|
| 13 | * Redistributions in binary form must reproduce the above |
---|
| 14 | copyright notice, this list of conditions and the following disclaimer |
---|
| 15 | in the documentation and/or other materials provided with the |
---|
| 16 | distribution. |
---|
| 17 | |
---|
| 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
| 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
| 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
| 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
| 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
| 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
| 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
---|
| 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
---|
| 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
| 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
| 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
| 29 | |
---|
| 30 | You can contact the author at : |
---|
| 31 | - LZ4 source repository : https://github.com/Cyan4973/lz4 |
---|
| 32 | - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c |
---|
| 33 | */ |
---|
| 34 | |
---|
| 35 | |
---|
| 36 | |
---|
| 37 | /* ************************************* |
---|
| 38 | * Tuning Parameter |
---|
| 39 | ***************************************/ |
---|
| 40 | static const int LZ4HC_compressionLevel_default = 9; |
---|
| 41 | |
---|
| 42 | /*! |
---|
| 43 | * HEAPMODE : |
---|
| 44 | * Select how default compression function will allocate workplace memory, |
---|
| 45 | * in stack (0:fastest), or in heap (1:requires malloc()). |
---|
| 46 | * Since workplace is rather large, heap mode is recommended. |
---|
| 47 | */ |
---|
| 48 | #define LZ4HC_HEAPMODE 0 |
---|
| 49 | |
---|
| 50 | |
---|
| 51 | /* ************************************* |
---|
| 52 | * Includes |
---|
| 53 | ***************************************/ |
---|
| 54 | #include "lz4hc.h" |
---|
| 55 | |
---|
| 56 | |
---|
| 57 | /* ************************************* |
---|
| 58 | * Local Compiler Options |
---|
| 59 | ***************************************/ |
---|
| 60 | #if defined(__GNUC__) |
---|
| 61 | # pragma GCC diagnostic ignored "-Wunused-function" |
---|
| 62 | #endif |
---|
| 63 | |
---|
| 64 | #if defined (__clang__) |
---|
| 65 | # pragma clang diagnostic ignored "-Wunused-function" |
---|
| 66 | #endif |
---|
| 67 | |
---|
| 68 | |
---|
| 69 | /* ************************************* |
---|
| 70 | * Common LZ4 definition |
---|
| 71 | ***************************************/ |
---|
| 72 | #define LZ4_COMMONDEFS_ONLY |
---|
| 73 | #include "lz4.c" |
---|
| 74 | |
---|
| 75 | |
---|
| 76 | /* ************************************* |
---|
| 77 | * Local Constants |
---|
| 78 | ***************************************/ |
---|
| 79 | #define DICTIONARY_LOGSIZE 16 |
---|
| 80 | #define MAXD (1<<DICTIONARY_LOGSIZE) |
---|
| 81 | #define MAXD_MASK (MAXD - 1) |
---|
| 82 | |
---|
| 83 | #define HASH_LOG (DICTIONARY_LOGSIZE-1) |
---|
| 84 | #define HASHTABLESIZE (1 << HASH_LOG) |
---|
| 85 | #define HASH_MASK (HASHTABLESIZE - 1) |
---|
| 86 | |
---|
| 87 | #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) |
---|
| 88 | |
---|
| 89 | static const int g_maxCompressionLevel = 16; |
---|
| 90 | |
---|
| 91 | |
---|
| 92 | /************************************** |
---|
| 93 | * Local Types |
---|
| 94 | **************************************/ |
---|
| 95 | typedef struct |
---|
| 96 | { |
---|
| 97 | U32 hashTable[HASHTABLESIZE]; |
---|
| 98 | U16 chainTable[MAXD]; |
---|
| 99 | const BYTE* end; /* next block here to continue on current prefix */ |
---|
| 100 | const BYTE* base; /* All index relative to this position */ |
---|
| 101 | const BYTE* dictBase; /* alternate base for extDict */ |
---|
| 102 | BYTE* inputBuffer; /* deprecated */ |
---|
| 103 | U32 dictLimit; /* below that point, need extDict */ |
---|
| 104 | U32 lowLimit; /* below that point, no more dict */ |
---|
| 105 | U32 nextToUpdate; /* index from which to continue dictionary update */ |
---|
| 106 | U32 compressionLevel; |
---|
| 107 | } LZ4HC_Data_Structure; |
---|
| 108 | |
---|
| 109 | |
---|
| 110 | /************************************** |
---|
| 111 | * Local Macros |
---|
| 112 | **************************************/ |
---|
| 113 | #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) |
---|
| 114 | //#define DELTANEXTU16(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */ |
---|
| 115 | #define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */ |
---|
| 116 | |
---|
| 117 | static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } |
---|
| 118 | |
---|
| 119 | |
---|
| 120 | |
---|
| 121 | /************************************** |
---|
| 122 | * HC Compression |
---|
| 123 | **************************************/ |
---|
| 124 | static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start) |
---|
| 125 | { |
---|
| 126 | MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); |
---|
| 127 | MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); |
---|
| 128 | hc4->nextToUpdate = 64 KB; |
---|
| 129 | hc4->base = start - 64 KB; |
---|
| 130 | hc4->end = start; |
---|
| 131 | hc4->dictBase = start - 64 KB; |
---|
| 132 | hc4->dictLimit = 64 KB; |
---|
| 133 | hc4->lowLimit = 64 KB; |
---|
| 134 | } |
---|
| 135 | |
---|
| 136 | |
---|
| 137 | /* Update chains up to ip (excluded) */ |
---|
| 138 | FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) |
---|
| 139 | { |
---|
| 140 | U16* chainTable = hc4->chainTable; |
---|
| 141 | U32* HashTable = hc4->hashTable; |
---|
| 142 | const BYTE* const base = hc4->base; |
---|
| 143 | const U32 target = (U32)(ip - base); |
---|
| 144 | U32 idx = hc4->nextToUpdate; |
---|
| 145 | |
---|
| 146 | while(idx < target) |
---|
| 147 | { |
---|
| 148 | U32 h = LZ4HC_hashPtr(base+idx); |
---|
| 149 | size_t delta = idx - HashTable[h]; |
---|
| 150 | if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; |
---|
| 151 | DELTANEXTU16(idx) = (U16)delta; |
---|
| 152 | HashTable[h] = idx; |
---|
| 153 | idx++; |
---|
| 154 | } |
---|
| 155 | |
---|
| 156 | hc4->nextToUpdate = target; |
---|
| 157 | } |
---|
| 158 | |
---|
| 159 | |
---|
| 160 | FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, /* Index table will be updated */ |
---|
| 161 | const BYTE* ip, const BYTE* const iLimit, |
---|
| 162 | const BYTE** matchpos, |
---|
| 163 | const int maxNbAttempts) |
---|
| 164 | { |
---|
| 165 | U16* const chainTable = hc4->chainTable; |
---|
| 166 | U32* const HashTable = hc4->hashTable; |
---|
| 167 | const BYTE* const base = hc4->base; |
---|
| 168 | const BYTE* const dictBase = hc4->dictBase; |
---|
| 169 | const U32 dictLimit = hc4->dictLimit; |
---|
| 170 | const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); |
---|
| 171 | U32 matchIndex; |
---|
| 172 | const BYTE* match; |
---|
| 173 | int nbAttempts=maxNbAttempts; |
---|
| 174 | size_t ml=0; |
---|
| 175 | |
---|
| 176 | /* HC4 match finder */ |
---|
| 177 | LZ4HC_Insert(hc4, ip); |
---|
| 178 | matchIndex = HashTable[LZ4HC_hashPtr(ip)]; |
---|
| 179 | |
---|
| 180 | while ((matchIndex>=lowLimit) && (nbAttempts)) |
---|
| 181 | { |
---|
| 182 | nbAttempts--; |
---|
| 183 | if (matchIndex >= dictLimit) |
---|
| 184 | { |
---|
| 185 | match = base + matchIndex; |
---|
| 186 | if (*(match+ml) == *(ip+ml) |
---|
| 187 | && (LZ4_read32(match) == LZ4_read32(ip))) |
---|
| 188 | { |
---|
| 189 | size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; |
---|
| 190 | if (mlt > ml) { ml = mlt; *matchpos = match; } |
---|
| 191 | } |
---|
| 192 | } |
---|
| 193 | else |
---|
| 194 | { |
---|
| 195 | match = dictBase + matchIndex; |
---|
| 196 | if (LZ4_read32(match) == LZ4_read32(ip)) |
---|
| 197 | { |
---|
| 198 | size_t mlt; |
---|
| 199 | const BYTE* vLimit = ip + (dictLimit - matchIndex); |
---|
| 200 | if (vLimit > iLimit) vLimit = iLimit; |
---|
| 201 | mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; |
---|
| 202 | if ((ip+mlt == vLimit) && (vLimit < iLimit)) |
---|
| 203 | mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit); |
---|
| 204 | if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ |
---|
| 205 | } |
---|
| 206 | } |
---|
| 207 | matchIndex -= DELTANEXTU16(matchIndex); |
---|
| 208 | } |
---|
| 209 | |
---|
| 210 | return (int)ml; |
---|
| 211 | } |
---|
| 212 | |
---|
| 213 | |
---|
| 214 | FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( |
---|
| 215 | LZ4HC_Data_Structure* hc4, |
---|
| 216 | const BYTE* const ip, |
---|
| 217 | const BYTE* const iLowLimit, |
---|
| 218 | const BYTE* const iHighLimit, |
---|
| 219 | int longest, |
---|
| 220 | const BYTE** matchpos, |
---|
| 221 | const BYTE** startpos, |
---|
| 222 | const int maxNbAttempts) |
---|
| 223 | { |
---|
| 224 | U16* const chainTable = hc4->chainTable; |
---|
| 225 | U32* const HashTable = hc4->hashTable; |
---|
| 226 | const BYTE* const base = hc4->base; |
---|
| 227 | const U32 dictLimit = hc4->dictLimit; |
---|
| 228 | const BYTE* const lowPrefixPtr = base + dictLimit; |
---|
| 229 | const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1); |
---|
| 230 | const BYTE* const dictBase = hc4->dictBase; |
---|
| 231 | U32 matchIndex; |
---|
| 232 | int nbAttempts = maxNbAttempts; |
---|
| 233 | int delta = (int)(ip-iLowLimit); |
---|
| 234 | |
---|
| 235 | |
---|
| 236 | /* First Match */ |
---|
| 237 | LZ4HC_Insert(hc4, ip); |
---|
| 238 | matchIndex = HashTable[LZ4HC_hashPtr(ip)]; |
---|
| 239 | |
---|
| 240 | while ((matchIndex>=lowLimit) && (nbAttempts)) |
---|
| 241 | { |
---|
| 242 | nbAttempts--; |
---|
| 243 | if (matchIndex >= dictLimit) |
---|
| 244 | { |
---|
| 245 | const BYTE* matchPtr = base + matchIndex; |
---|
| 246 | if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) |
---|
| 247 | if (LZ4_read32(matchPtr) == LZ4_read32(ip)) |
---|
| 248 | { |
---|
| 249 | int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); |
---|
| 250 | int back = 0; |
---|
| 251 | |
---|
| 252 | while ((ip+back>iLowLimit) |
---|
| 253 | && (matchPtr+back > lowPrefixPtr) |
---|
| 254 | && (ip[back-1] == matchPtr[back-1])) |
---|
| 255 | back--; |
---|
| 256 | |
---|
| 257 | mlt -= back; |
---|
| 258 | |
---|
| 259 | if (mlt > longest) |
---|
| 260 | { |
---|
| 261 | longest = (int)mlt; |
---|
| 262 | *matchpos = matchPtr+back; |
---|
| 263 | *startpos = ip+back; |
---|
| 264 | } |
---|
| 265 | } |
---|
| 266 | } |
---|
| 267 | else |
---|
| 268 | { |
---|
| 269 | const BYTE* matchPtr = dictBase + matchIndex; |
---|
| 270 | if (LZ4_read32(matchPtr) == LZ4_read32(ip)) |
---|
| 271 | { |
---|
| 272 | size_t mlt; |
---|
| 273 | int back=0; |
---|
| 274 | const BYTE* vLimit = ip + (dictLimit - matchIndex); |
---|
| 275 | if (vLimit > iHighLimit) vLimit = iHighLimit; |
---|
| 276 | mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; |
---|
| 277 | if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) |
---|
| 278 | mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit); |
---|
| 279 | while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--; |
---|
| 280 | mlt -= back; |
---|
| 281 | if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } |
---|
| 282 | } |
---|
| 283 | } |
---|
| 284 | matchIndex -= DELTANEXTU16(matchIndex); |
---|
| 285 | } |
---|
| 286 | |
---|
| 287 | return longest; |
---|
| 288 | } |
---|
| 289 | |
---|
| 290 | |
---|
| 291 | typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; |
---|
| 292 | |
---|
| 293 | #define LZ4HC_DEBUG 0 |
---|
| 294 | #if LZ4HC_DEBUG |
---|
| 295 | static unsigned debug = 0; |
---|
| 296 | #endif |
---|
| 297 | |
---|
| 298 | FORCE_INLINE int LZ4HC_encodeSequence ( |
---|
| 299 | const BYTE** ip, |
---|
| 300 | BYTE** op, |
---|
| 301 | const BYTE** anchor, |
---|
| 302 | int matchLength, |
---|
| 303 | const BYTE* const match, |
---|
| 304 | limitedOutput_directive limitedOutputBuffer, |
---|
| 305 | BYTE* oend) |
---|
| 306 | { |
---|
| 307 | int length; |
---|
| 308 | BYTE* token; |
---|
| 309 | |
---|
| 310 | #if LZ4HC_DEBUG |
---|
| 311 | if (debug) printf("literal : %u -- match : %u -- offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match)); |
---|
| 312 | #endif |
---|
| 313 | |
---|
| 314 | /* Encode Literal length */ |
---|
| 315 | length = (int)(*ip - *anchor); |
---|
| 316 | token = (*op)++; |
---|
| 317 | if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ |
---|
| 318 | if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } |
---|
| 319 | else *token = (BYTE)(length<<ML_BITS); |
---|
| 320 | |
---|
| 321 | /* Copy Literals */ |
---|
| 322 | LZ4_wildCopy(*op, *anchor, (*op) + length); |
---|
| 323 | *op += length; |
---|
| 324 | |
---|
| 325 | /* Encode Offset */ |
---|
| 326 | LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; |
---|
| 327 | |
---|
| 328 | /* Encode MatchLength */ |
---|
| 329 | length = (int)(matchLength-MINMATCH); |
---|
| 330 | if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ |
---|
| 331 | if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } |
---|
| 332 | else *token += (BYTE)(length); |
---|
| 333 | |
---|
| 334 | /* Prepare next loop */ |
---|
| 335 | *ip += matchLength; |
---|
| 336 | *anchor = *ip; |
---|
| 337 | |
---|
| 338 | return 0; |
---|
| 339 | } |
---|
| 340 | |
---|
| 341 | |
---|
| 342 | static int LZ4HC_compress_generic ( |
---|
| 343 | void* ctxvoid, |
---|
| 344 | const char* source, |
---|
| 345 | char* dest, |
---|
| 346 | int inputSize, |
---|
| 347 | int maxOutputSize, |
---|
| 348 | int compressionLevel, |
---|
| 349 | limitedOutput_directive limit |
---|
| 350 | ) |
---|
| 351 | { |
---|
| 352 | LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid; |
---|
| 353 | const BYTE* ip = (const BYTE*) source; |
---|
| 354 | const BYTE* anchor = ip; |
---|
| 355 | const BYTE* const iend = ip + inputSize; |
---|
| 356 | const BYTE* const mflimit = iend - MFLIMIT; |
---|
| 357 | const BYTE* const matchlimit = (iend - LASTLITERALS); |
---|
| 358 | |
---|
| 359 | BYTE* op = (BYTE*) dest; |
---|
| 360 | BYTE* const oend = op + maxOutputSize; |
---|
| 361 | |
---|
| 362 | unsigned maxNbAttempts; |
---|
| 363 | int ml, ml2, ml3, ml0; |
---|
| 364 | const BYTE* ref=NULL; |
---|
| 365 | const BYTE* start2=NULL; |
---|
| 366 | const BYTE* ref2=NULL; |
---|
| 367 | const BYTE* start3=NULL; |
---|
| 368 | const BYTE* ref3=NULL; |
---|
| 369 | const BYTE* start0; |
---|
| 370 | const BYTE* ref0; |
---|
| 371 | |
---|
| 372 | |
---|
| 373 | /* init */ |
---|
| 374 | if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel; |
---|
| 375 | if (compressionLevel < 1) compressionLevel = LZ4HC_compressionLevel_default; |
---|
| 376 | maxNbAttempts = 1 << (compressionLevel-1); |
---|
| 377 | ctx->end += inputSize; |
---|
| 378 | |
---|
| 379 | ip++; |
---|
| 380 | |
---|
| 381 | /* Main Loop */ |
---|
| 382 | while (ip < mflimit) |
---|
| 383 | { |
---|
| 384 | ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); |
---|
| 385 | if (!ml) { ip++; continue; } |
---|
| 386 | |
---|
| 387 | /* saved, in case we would skip too much */ |
---|
| 388 | start0 = ip; |
---|
| 389 | ref0 = ref; |
---|
| 390 | ml0 = ml; |
---|
| 391 | |
---|
| 392 | _Search2: |
---|
| 393 | if (ip+ml < mflimit) |
---|
| 394 | ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts); |
---|
| 395 | else ml2 = ml; |
---|
| 396 | |
---|
| 397 | if (ml2 == ml) /* No better match */ |
---|
| 398 | { |
---|
| 399 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; |
---|
| 400 | continue; |
---|
| 401 | } |
---|
| 402 | |
---|
| 403 | if (start0 < ip) |
---|
| 404 | { |
---|
| 405 | if (start2 < ip + ml0) /* empirical */ |
---|
| 406 | { |
---|
| 407 | ip = start0; |
---|
| 408 | ref = ref0; |
---|
| 409 | ml = ml0; |
---|
| 410 | } |
---|
| 411 | } |
---|
| 412 | |
---|
| 413 | /* Here, start0==ip */ |
---|
| 414 | if ((start2 - ip) < 3) /* First Match too small : removed */ |
---|
| 415 | { |
---|
| 416 | ml = ml2; |
---|
| 417 | ip = start2; |
---|
| 418 | ref =ref2; |
---|
| 419 | goto _Search2; |
---|
| 420 | } |
---|
| 421 | |
---|
| 422 | _Search3: |
---|
| 423 | /* |
---|
| 424 | * Currently we have : |
---|
| 425 | * ml2 > ml1, and |
---|
| 426 | * ip1+3 <= ip2 (usually < ip1+ml1) |
---|
| 427 | */ |
---|
| 428 | if ((start2 - ip) < OPTIMAL_ML) |
---|
| 429 | { |
---|
| 430 | int correction; |
---|
| 431 | int new_ml = ml; |
---|
| 432 | if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; |
---|
| 433 | if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; |
---|
| 434 | correction = new_ml - (int)(start2 - ip); |
---|
| 435 | if (correction > 0) |
---|
| 436 | { |
---|
| 437 | start2 += correction; |
---|
| 438 | ref2 += correction; |
---|
| 439 | ml2 -= correction; |
---|
| 440 | } |
---|
| 441 | } |
---|
| 442 | /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ |
---|
| 443 | |
---|
| 444 | if (start2 + ml2 < mflimit) |
---|
| 445 | ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts); |
---|
| 446 | else ml3 = ml2; |
---|
| 447 | |
---|
| 448 | if (ml3 == ml2) /* No better match : 2 sequences to encode */ |
---|
| 449 | { |
---|
| 450 | /* ip & ref are known; Now for ml */ |
---|
| 451 | if (start2 < ip+ml) ml = (int)(start2 - ip); |
---|
| 452 | /* Now, encode 2 sequences */ |
---|
| 453 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; |
---|
| 454 | ip = start2; |
---|
| 455 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; |
---|
| 456 | continue; |
---|
| 457 | } |
---|
| 458 | |
---|
| 459 | if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */ |
---|
| 460 | { |
---|
| 461 | if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ |
---|
| 462 | { |
---|
| 463 | if (start2 < ip+ml) |
---|
| 464 | { |
---|
| 465 | int correction = (int)(ip+ml - start2); |
---|
| 466 | start2 += correction; |
---|
| 467 | ref2 += correction; |
---|
| 468 | ml2 -= correction; |
---|
| 469 | if (ml2 < MINMATCH) |
---|
| 470 | { |
---|
| 471 | start2 = start3; |
---|
| 472 | ref2 = ref3; |
---|
| 473 | ml2 = ml3; |
---|
| 474 | } |
---|
| 475 | } |
---|
| 476 | |
---|
| 477 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; |
---|
| 478 | ip = start3; |
---|
| 479 | ref = ref3; |
---|
| 480 | ml = ml3; |
---|
| 481 | |
---|
| 482 | start0 = start2; |
---|
| 483 | ref0 = ref2; |
---|
| 484 | ml0 = ml2; |
---|
| 485 | goto _Search2; |
---|
| 486 | } |
---|
| 487 | |
---|
| 488 | start2 = start3; |
---|
| 489 | ref2 = ref3; |
---|
| 490 | ml2 = ml3; |
---|
| 491 | goto _Search3; |
---|
| 492 | } |
---|
| 493 | |
---|
| 494 | /* |
---|
| 495 | * OK, now we have 3 ascending matches; let's write at least the first one |
---|
| 496 | * ip & ref are known; Now for ml |
---|
| 497 | */ |
---|
| 498 | if (start2 < ip+ml) |
---|
| 499 | { |
---|
| 500 | if ((start2 - ip) < (int)ML_MASK) |
---|
| 501 | { |
---|
| 502 | int correction; |
---|
| 503 | if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; |
---|
| 504 | if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; |
---|
| 505 | correction = ml - (int)(start2 - ip); |
---|
| 506 | if (correction > 0) |
---|
| 507 | { |
---|
| 508 | start2 += correction; |
---|
| 509 | ref2 += correction; |
---|
| 510 | ml2 -= correction; |
---|
| 511 | } |
---|
| 512 | } |
---|
| 513 | else |
---|
| 514 | { |
---|
| 515 | ml = (int)(start2 - ip); |
---|
| 516 | } |
---|
| 517 | } |
---|
| 518 | if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; |
---|
| 519 | |
---|
| 520 | ip = start2; |
---|
| 521 | ref = ref2; |
---|
| 522 | ml = ml2; |
---|
| 523 | |
---|
| 524 | start2 = start3; |
---|
| 525 | ref2 = ref3; |
---|
| 526 | ml2 = ml3; |
---|
| 527 | |
---|
| 528 | goto _Search3; |
---|
| 529 | } |
---|
| 530 | |
---|
| 531 | /* Encode Last Literals */ |
---|
| 532 | { |
---|
| 533 | int lastRun = (int)(iend - anchor); |
---|
| 534 | if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ |
---|
| 535 | if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } |
---|
| 536 | else *op++ = (BYTE)(lastRun<<ML_BITS); |
---|
| 537 | memcpy(op, anchor, iend - anchor); |
---|
| 538 | op += iend-anchor; |
---|
| 539 | } |
---|
| 540 | |
---|
| 541 | /* End */ |
---|
| 542 | return (int) (((char*)op)-dest); |
---|
| 543 | } |
---|
| 544 | |
---|
| 545 | |
---|
| 546 | int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); } |
---|
| 547 | |
---|
| 548 | int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel) |
---|
| 549 | { |
---|
| 550 | if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ |
---|
| 551 | LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)src); |
---|
| 552 | if (maxDstSize < LZ4_compressBound(srcSize)) |
---|
| 553 | return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput); |
---|
| 554 | else |
---|
| 555 | return LZ4HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit); |
---|
| 556 | } |
---|
| 557 | |
---|
| 558 | int LZ4_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel) |
---|
| 559 | { |
---|
| 560 | #if LZ4HC_HEAPMODE==1 |
---|
| 561 | LZ4HC_Data_Structure* statePtr = malloc(sizeof(LZ4HC_Data_Structure)); |
---|
| 562 | #else |
---|
| 563 | LZ4HC_Data_Structure state; |
---|
| 564 | LZ4HC_Data_Structure* const statePtr = &state; |
---|
| 565 | #endif |
---|
| 566 | int cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel); |
---|
| 567 | #if LZ4HC_HEAPMODE==1 |
---|
| 568 | free(statePtr); |
---|
| 569 | #endif |
---|
| 570 | return cSize; |
---|
| 571 | } |
---|
| 572 | |
---|
| 573 | |
---|
| 574 | |
---|
| 575 | /************************************** |
---|
| 576 | * Streaming Functions |
---|
| 577 | **************************************/ |
---|
| 578 | /* allocation */ |
---|
| 579 | LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); } |
---|
| 580 | int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; } |
---|
| 581 | |
---|
| 582 | |
---|
| 583 | /* initialization */ |
---|
| 584 | void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) |
---|
| 585 | { |
---|
| 586 | LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= sizeof(LZ4_streamHC_t)); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ |
---|
| 587 | ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL; |
---|
| 588 | ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel; |
---|
| 589 | } |
---|
| 590 | |
---|
| 591 | int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize) |
---|
| 592 | { |
---|
| 593 | LZ4HC_Data_Structure* ctxPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr; |
---|
| 594 | if (dictSize > 64 KB) |
---|
| 595 | { |
---|
| 596 | dictionary += dictSize - 64 KB; |
---|
| 597 | dictSize = 64 KB; |
---|
| 598 | } |
---|
| 599 | LZ4HC_init (ctxPtr, (const BYTE*)dictionary); |
---|
| 600 | if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3)); |
---|
| 601 | ctxPtr->end = (const BYTE*)dictionary + dictSize; |
---|
| 602 | return dictSize; |
---|
| 603 | } |
---|
| 604 | |
---|
| 605 | |
---|
| 606 | /* compression */ |
---|
| 607 | |
---|
| 608 | static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock) |
---|
| 609 | { |
---|
| 610 | if (ctxPtr->end >= ctxPtr->base + 4) |
---|
| 611 | LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ |
---|
| 612 | /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ |
---|
| 613 | ctxPtr->lowLimit = ctxPtr->dictLimit; |
---|
| 614 | ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); |
---|
| 615 | ctxPtr->dictBase = ctxPtr->base; |
---|
| 616 | ctxPtr->base = newBlock - ctxPtr->dictLimit; |
---|
| 617 | ctxPtr->end = newBlock; |
---|
| 618 | ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ |
---|
| 619 | } |
---|
| 620 | |
---|
| 621 | static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr, |
---|
| 622 | const char* source, char* dest, |
---|
| 623 | int inputSize, int maxOutputSize, limitedOutput_directive limit) |
---|
| 624 | { |
---|
| 625 | /* auto-init if forgotten */ |
---|
| 626 | if (ctxPtr->base == NULL) |
---|
| 627 | LZ4HC_init (ctxPtr, (const BYTE*) source); |
---|
| 628 | |
---|
| 629 | /* Check overflow */ |
---|
| 630 | if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) |
---|
| 631 | { |
---|
| 632 | size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit; |
---|
| 633 | if (dictSize > 64 KB) dictSize = 64 KB; |
---|
| 634 | |
---|
| 635 | LZ4_loadDictHC((LZ4_streamHC_t*)ctxPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); |
---|
| 636 | } |
---|
| 637 | |
---|
| 638 | /* Check if blocks follow each other */ |
---|
| 639 | if ((const BYTE*)source != ctxPtr->end) |
---|
| 640 | LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source); |
---|
| 641 | |
---|
| 642 | /* Check overlapping input/dictionary space */ |
---|
| 643 | { |
---|
| 644 | const BYTE* sourceEnd = (const BYTE*) source + inputSize; |
---|
| 645 | const BYTE* dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit; |
---|
| 646 | const BYTE* dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit; |
---|
| 647 | if ((sourceEnd > dictBegin) && ((const BYTE*)source < dictEnd)) |
---|
| 648 | { |
---|
| 649 | if (sourceEnd > dictEnd) sourceEnd = dictEnd; |
---|
| 650 | ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); |
---|
| 651 | if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit; |
---|
| 652 | } |
---|
| 653 | } |
---|
| 654 | |
---|
| 655 | return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit); |
---|
| 656 | } |
---|
| 657 | |
---|
| 658 | int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize) |
---|
| 659 | { |
---|
| 660 | if (maxOutputSize < LZ4_compressBound(inputSize)) |
---|
| 661 | return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput); |
---|
| 662 | else |
---|
| 663 | return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit); |
---|
| 664 | } |
---|
| 665 | |
---|
| 666 | |
---|
| 667 | /* dictionary saving */ |
---|
| 668 | |
---|
| 669 | int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) |
---|
| 670 | { |
---|
| 671 | LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr; |
---|
| 672 | int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit)); |
---|
| 673 | if (dictSize > 64 KB) dictSize = 64 KB; |
---|
| 674 | if (dictSize < 4) dictSize = 0; |
---|
| 675 | if (dictSize > prefixSize) dictSize = prefixSize; |
---|
| 676 | memmove(safeBuffer, streamPtr->end - dictSize, dictSize); |
---|
| 677 | { |
---|
| 678 | U32 endIndex = (U32)(streamPtr->end - streamPtr->base); |
---|
| 679 | streamPtr->end = (const BYTE*)safeBuffer + dictSize; |
---|
| 680 | streamPtr->base = streamPtr->end - endIndex; |
---|
| 681 | streamPtr->dictLimit = endIndex - dictSize; |
---|
| 682 | streamPtr->lowLimit = endIndex - dictSize; |
---|
| 683 | if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit; |
---|
| 684 | } |
---|
| 685 | return dictSize; |
---|
| 686 | } |
---|
| 687 | |
---|
| 688 | |
---|
| 689 | /*********************************** |
---|
| 690 | * Deprecated Functions |
---|
| 691 | ***********************************/ |
---|
| 692 | /* Deprecated compression functions */ |
---|
| 693 | /* These functions are planned to start generate warnings by r131 approximately */ |
---|
| 694 | int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } |
---|
| 695 | int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } |
---|
| 696 | int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } |
---|
| 697 | int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } |
---|
| 698 | int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } |
---|
| 699 | int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } |
---|
| 700 | int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } |
---|
| 701 | int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } |
---|
| 702 | int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } |
---|
| 703 | int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } |
---|
| 704 | |
---|
| 705 | |
---|
| 706 | /* Deprecated streaming functions */ |
---|
| 707 | /* These functions currently generate deprecation warnings */ |
---|
| 708 | int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; } |
---|
| 709 | |
---|
| 710 | int LZ4_resetStreamStateHC(void* state, char* inputBuffer) |
---|
| 711 | { |
---|
| 712 | if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */ |
---|
| 713 | LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer); |
---|
| 714 | ((LZ4HC_Data_Structure*)state)->inputBuffer = (BYTE*)inputBuffer; |
---|
| 715 | return 0; |
---|
| 716 | } |
---|
| 717 | |
---|
| 718 | void* LZ4_createHC (char* inputBuffer) |
---|
| 719 | { |
---|
| 720 | void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure)); |
---|
| 721 | if (hc4 == NULL) return NULL; /* not enough memory */ |
---|
| 722 | LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer); |
---|
| 723 | ((LZ4HC_Data_Structure*)hc4)->inputBuffer = (BYTE*)inputBuffer; |
---|
| 724 | return hc4; |
---|
| 725 | } |
---|
| 726 | |
---|
| 727 | int LZ4_freeHC (void* LZ4HC_Data) |
---|
| 728 | { |
---|
| 729 | FREEMEM(LZ4HC_Data); |
---|
| 730 | return (0); |
---|
| 731 | } |
---|
| 732 | |
---|
| 733 | int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel) |
---|
| 734 | { |
---|
| 735 | return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit); |
---|
| 736 | } |
---|
| 737 | |
---|
| 738 | int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel) |
---|
| 739 | { |
---|
| 740 | return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput); |
---|
| 741 | } |
---|
| 742 | |
---|
| 743 | char* LZ4_slideInputBufferHC(void* LZ4HC_Data) |
---|
| 744 | { |
---|
| 745 | LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data; |
---|
| 746 | int dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB); |
---|
| 747 | return (char*)(hc4->inputBuffer + dictSize); |
---|
| 748 | } |
---|