[00587dc] | 1 | /********************************************************************* |
---|
| 2 | Blosc - Blocked Suffling and Compression Library |
---|
| 3 | |
---|
| 4 | Author: Francesc Alted <[email protected]> |
---|
| 5 | Creation date: 2009-05-20 |
---|
| 6 | |
---|
| 7 | See LICENSES/BLOSC.txt for details about copyright and rights to use. |
---|
| 8 | **********************************************************************/ |
---|
| 9 | |
---|
| 10 | /********************************************************************* |
---|
| 11 | The code in this file is heavily based on FastLZ, a lightning-fast |
---|
| 12 | lossless compression library. See LICENSES/FASTLZ.txt for details. |
---|
| 13 | **********************************************************************/ |
---|
| 14 | |
---|
| 15 | |
---|
| 16 | #include <stdlib.h> |
---|
| 17 | #include <stdio.h> |
---|
| 18 | #include <string.h> |
---|
| 19 | #include "blosclz.h" |
---|
| 20 | |
---|
| 21 | #if defined(_WIN32) && !defined(__MINGW32__) |
---|
| 22 | #include <windows.h> |
---|
| 23 | #include "win32/stdint-windows.h" |
---|
| 24 | #else |
---|
| 25 | #include <stdint.h> |
---|
| 26 | #endif /* _WIN32 */ |
---|
| 27 | |
---|
| 28 | |
---|
| 29 | /* |
---|
| 30 | * Prevent accessing more than 8-bit at once, except on x86 architectures. |
---|
| 31 | */ |
---|
| 32 | #if !defined(BLOSCLZ_STRICT_ALIGN) |
---|
| 33 | #define BLOSCLZ_STRICT_ALIGN |
---|
| 34 | #if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */ |
---|
| 35 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 36 | #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ |
---|
| 37 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 38 | #elif defined(_M_IX86) /* Intel, MSVC */ |
---|
| 39 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 40 | #elif defined(__386) |
---|
| 41 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 42 | #elif defined(_X86_) /* MinGW */ |
---|
| 43 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 44 | #elif defined(__I86__) /* Digital Mars */ |
---|
| 45 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 46 | #endif |
---|
| 47 | #endif |
---|
| 48 | |
---|
| 49 | /* |
---|
| 50 | * Always check for bound when decompressing. |
---|
| 51 | * Generally it is best to leave it defined. |
---|
| 52 | */ |
---|
| 53 | #define BLOSCLZ_SAFE |
---|
| 54 | |
---|
| 55 | /* |
---|
| 56 | * Give hints to the compiler for branch prediction optimization. |
---|
| 57 | */ |
---|
| 58 | #if defined(__GNUC__) && (__GNUC__ > 2) |
---|
| 59 | #define BLOSCLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) |
---|
| 60 | #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) |
---|
| 61 | #else |
---|
| 62 | #define BLOSCLZ_EXPECT_CONDITIONAL(c) (c) |
---|
| 63 | #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (c) |
---|
| 64 | #endif |
---|
| 65 | |
---|
| 66 | /* |
---|
| 67 | * Use inlined functions for supported systems. |
---|
| 68 | */ |
---|
| 69 | #if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C) |
---|
| 70 | #define BLOSCLZ_INLINE inline |
---|
| 71 | #elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__) |
---|
| 72 | #define BLOSCLZ_INLINE __inline |
---|
| 73 | #else |
---|
| 74 | #define BLOSCLZ_INLINE |
---|
| 75 | #endif |
---|
| 76 | |
---|
| 77 | #define MAX_COPY 32 |
---|
| 78 | #define MAX_LEN 264 /* 256 + 8 */ |
---|
| 79 | #define MAX_DISTANCE 8191 |
---|
| 80 | #define MAX_FARDISTANCE (65535+MAX_DISTANCE-1) |
---|
| 81 | |
---|
| 82 | #ifdef BLOSCLZ_STRICT_ALIGN |
---|
| 83 | #define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8) |
---|
| 84 | #else |
---|
| 85 | #define BLOSCLZ_READU16(p) *((const uint16_t*)(p)) |
---|
| 86 | #endif |
---|
| 87 | |
---|
| 88 | |
---|
| 89 | static BLOSCLZ_INLINE int32_t hash_function(uint8_t* p, uint8_t hash_log) |
---|
| 90 | { |
---|
| 91 | int32_t v; |
---|
| 92 | |
---|
| 93 | v = BLOSCLZ_READU16(p); |
---|
| 94 | v ^= BLOSCLZ_READU16(p+1)^(v>>(16-hash_log)); |
---|
| 95 | v &= (1 << hash_log) - 1; |
---|
| 96 | return v; |
---|
| 97 | } |
---|
| 98 | |
---|
| 99 | |
---|
| 100 | #define IP_BOUNDARY 2 |
---|
| 101 | |
---|
| 102 | int blosclz_compress(int opt_level, const void* input, |
---|
| 103 | int length, void* output, int maxout) |
---|
| 104 | { |
---|
| 105 | uint8_t* ip = (uint8_t*) input; |
---|
| 106 | uint8_t* ibase = (uint8_t*) input; |
---|
| 107 | uint8_t* ip_bound = ip + length - IP_BOUNDARY; |
---|
| 108 | uint8_t* ip_limit = ip + length - 12; |
---|
| 109 | uint8_t* op = (uint8_t*) output; |
---|
| 110 | |
---|
| 111 | /* Hash table depends on the opt level. Hash_log cannot be larger than 15. */ |
---|
| 112 | uint8_t hash_log_[10] = {-1, 8, 9, 9, 11, 11, 12, 13, 14, 15}; |
---|
| 113 | uint8_t hash_log = hash_log_[opt_level]; |
---|
| 114 | uint16_t hash_size = 1 << hash_log; |
---|
| 115 | uint16_t *htab; |
---|
| 116 | uint8_t* op_limit; |
---|
| 117 | |
---|
| 118 | int32_t hslot; |
---|
| 119 | int32_t hval; |
---|
| 120 | uint8_t copy; |
---|
| 121 | |
---|
| 122 | double maxlength_[10] = {-1, .1, .15, .2, .5, .7, .85, .925, .975, 1.0}; |
---|
| 123 | int32_t maxlength = (int32_t) (length * maxlength_[opt_level]); |
---|
| 124 | if (maxlength > (int32_t) maxout) { |
---|
| 125 | maxlength = (int32_t) maxout; |
---|
| 126 | } |
---|
| 127 | op_limit = op + maxlength; |
---|
| 128 | |
---|
| 129 | /* output buffer cannot be less than 66 bytes or we can get into problems. |
---|
| 130 | As output is usually the same length than input, we take input length. */ |
---|
| 131 | if (length < 66) { |
---|
| 132 | return 0; /* Mark this as uncompressible */ |
---|
| 133 | } |
---|
| 134 | |
---|
| 135 | htab = (uint16_t *) malloc(hash_size*sizeof(uint16_t)); |
---|
| 136 | |
---|
| 137 | /* sanity check */ |
---|
| 138 | if(BLOSCLZ_UNEXPECT_CONDITIONAL(length < 4)) { |
---|
| 139 | if(length) { |
---|
| 140 | /* create literal copy only */ |
---|
| 141 | *op++ = length-1; |
---|
| 142 | ip_bound++; |
---|
| 143 | while(ip <= ip_bound) |
---|
| 144 | *op++ = *ip++; |
---|
| 145 | free(htab); |
---|
| 146 | return length+1; |
---|
| 147 | } |
---|
| 148 | else goto out; |
---|
| 149 | } |
---|
| 150 | |
---|
| 151 | /* initializes hash table */ |
---|
| 152 | for (hslot = 0; hslot < hash_size; hslot++) |
---|
| 153 | htab[hslot] = 0; |
---|
| 154 | |
---|
| 155 | /* we start with literal copy */ |
---|
| 156 | copy = 2; |
---|
| 157 | *op++ = MAX_COPY-1; |
---|
| 158 | *op++ = *ip++; |
---|
| 159 | *op++ = *ip++; |
---|
| 160 | |
---|
| 161 | /* main loop */ |
---|
| 162 | while(BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) { |
---|
| 163 | const uint8_t* ref; |
---|
| 164 | int32_t distance; |
---|
| 165 | int32_t len = 3; /* minimum match length */ |
---|
| 166 | uint8_t* anchor = ip; /* comparison starting-point */ |
---|
| 167 | |
---|
| 168 | /* check for a run */ |
---|
| 169 | if(ip[0] == ip[-1] && BLOSCLZ_READU16(ip-1)==BLOSCLZ_READU16(ip+1)) { |
---|
| 170 | distance = 1; |
---|
| 171 | ip += 3; |
---|
| 172 | ref = anchor - 1 + 3; |
---|
| 173 | goto match; |
---|
| 174 | } |
---|
| 175 | |
---|
| 176 | /* find potential match */ |
---|
| 177 | hval = hash_function(ip, hash_log); |
---|
| 178 | ref = ibase + htab[hval]; |
---|
| 179 | /* update hash table */ |
---|
| 180 | htab[hval] = (uint16_t)(anchor - ibase); |
---|
| 181 | |
---|
| 182 | /* calculate distance to the match */ |
---|
| 183 | distance = (int32_t)(anchor - ref); |
---|
| 184 | |
---|
| 185 | /* is this a match? check the first 3 bytes */ |
---|
| 186 | if (distance==0 || (distance >= MAX_FARDISTANCE) || |
---|
| 187 | *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++) |
---|
| 188 | goto literal; |
---|
| 189 | |
---|
| 190 | /* far, needs at least 5-byte match */ |
---|
| 191 | if (distance >= MAX_DISTANCE) { |
---|
| 192 | if (*ip++ != *ref++ || *ip++ != *ref++) |
---|
| 193 | goto literal; |
---|
| 194 | len += 2; |
---|
| 195 | } |
---|
| 196 | |
---|
| 197 | match: |
---|
| 198 | |
---|
| 199 | /* last matched byte */ |
---|
| 200 | ip = anchor + len; |
---|
| 201 | |
---|
| 202 | /* distance is biased */ |
---|
| 203 | distance--; |
---|
| 204 | |
---|
| 205 | if(!distance) { |
---|
| 206 | /* zero distance means a run */ |
---|
| 207 | uint8_t x = ip[-1]; |
---|
| 208 | int64_t value, value2; |
---|
| 209 | /* Broadcast the value for every byte in a 64-bit register */ |
---|
| 210 | memset(&value, x, 8); |
---|
| 211 | /* safe because the outer check against ip limit */ |
---|
| 212 | while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) { |
---|
| 213 | value2 = ((int64_t *)ref)[0]; |
---|
| 214 | if (value != value2) { |
---|
| 215 | /* Find the byte that starts to differ */ |
---|
| 216 | while (ip < ip_bound) { |
---|
| 217 | if (*ref++ != x) break; else ip++; |
---|
| 218 | } |
---|
| 219 | break; |
---|
| 220 | } |
---|
| 221 | else { |
---|
| 222 | ip += 8; |
---|
| 223 | ref += 8; |
---|
| 224 | } |
---|
| 225 | } |
---|
| 226 | if (ip > ip_bound) { |
---|
| 227 | long l = (long)(ip - ip_bound); |
---|
| 228 | ip -= l; |
---|
| 229 | ref -= l; |
---|
| 230 | } /* End of optimization */ |
---|
| 231 | } |
---|
| 232 | else { |
---|
| 233 | for(;;) { |
---|
| 234 | /* safe because the outer check against ip limit */ |
---|
| 235 | while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) { |
---|
| 236 | if (*ref++ != *ip++) break; |
---|
| 237 | if (((int64_t *)ref)[0] != ((int64_t *)ip)[0]) { |
---|
| 238 | /* Find the byte that starts to differ */ |
---|
| 239 | while (ip < ip_bound) { |
---|
| 240 | if (*ref++ != *ip++) break; |
---|
| 241 | } |
---|
| 242 | break; |
---|
| 243 | } |
---|
| 244 | else { |
---|
| 245 | ip += 8; |
---|
| 246 | ref += 8; |
---|
| 247 | } |
---|
| 248 | } |
---|
| 249 | /* Last correction before exiting loop */ |
---|
| 250 | if (ip > ip_bound) { |
---|
| 251 | int32_t l = (int32_t)(ip - ip_bound); |
---|
| 252 | ip -= l; |
---|
| 253 | ref -= l; |
---|
| 254 | } /* End of optimization */ |
---|
| 255 | break; |
---|
| 256 | } |
---|
| 257 | } |
---|
| 258 | |
---|
| 259 | /* if we have copied something, adjust the copy count */ |
---|
| 260 | if (copy) |
---|
| 261 | /* copy is biased, '0' means 1 byte copy */ |
---|
| 262 | *(op-copy-1) = copy-1; |
---|
| 263 | else |
---|
| 264 | /* back, to overwrite the copy count */ |
---|
| 265 | op--; |
---|
| 266 | |
---|
| 267 | /* reset literal counter */ |
---|
| 268 | copy = 0; |
---|
| 269 | |
---|
| 270 | /* length is biased, '1' means a match of 3 bytes */ |
---|
| 271 | ip -= 3; |
---|
| 272 | len = (int32_t)(ip - anchor); |
---|
| 273 | |
---|
| 274 | /* check that we have space enough to encode the match for all the cases */ |
---|
| 275 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+(len/255)+6 > op_limit)) goto out; |
---|
| 276 | |
---|
| 277 | /* encode the match */ |
---|
| 278 | if(distance < MAX_DISTANCE) { |
---|
| 279 | if(len < 7) { |
---|
| 280 | *op++ = (len << 5) + (distance >> 8); |
---|
| 281 | *op++ = (distance & 255); |
---|
| 282 | } |
---|
| 283 | else { |
---|
| 284 | *op++ = (uint8_t)((7 << 5) + (distance >> 8)); |
---|
| 285 | for(len-=7; len >= 255; len-= 255) |
---|
| 286 | *op++ = 255; |
---|
| 287 | *op++ = len; |
---|
| 288 | *op++ = (distance & 255); |
---|
| 289 | } |
---|
| 290 | } |
---|
| 291 | else { |
---|
| 292 | /* far away, but not yet in the another galaxy... */ |
---|
| 293 | if(len < 7) { |
---|
| 294 | distance -= MAX_DISTANCE; |
---|
| 295 | *op++ = (uint8_t)((len << 5) + 31); |
---|
| 296 | *op++ = 255; |
---|
| 297 | *op++ = (uint8_t)(distance >> 8); |
---|
| 298 | *op++ = distance & 255; |
---|
| 299 | } |
---|
| 300 | else { |
---|
| 301 | distance -= MAX_DISTANCE; |
---|
| 302 | *op++ = (7 << 5) + 31; |
---|
| 303 | for(len-=7; len >= 255; len-= 255) |
---|
| 304 | *op++ = 255; |
---|
| 305 | *op++ = len; |
---|
| 306 | *op++ = 255; |
---|
| 307 | *op++ = (uint8_t)(distance >> 8); |
---|
| 308 | *op++ = distance & 255; |
---|
| 309 | } |
---|
| 310 | } |
---|
| 311 | |
---|
| 312 | /* update the hash at match boundary */ |
---|
| 313 | hval = hash_function(ip, hash_log); |
---|
| 314 | htab[hval] = (uint16_t)(ip++ - ibase); |
---|
| 315 | hval = hash_function(ip, hash_log); |
---|
| 316 | htab[hval] = (uint16_t)(ip++ - ibase); |
---|
| 317 | |
---|
| 318 | /* assuming literal copy */ |
---|
| 319 | *op++ = MAX_COPY-1; |
---|
| 320 | |
---|
| 321 | continue; |
---|
| 322 | |
---|
| 323 | literal: |
---|
| 324 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) goto out; |
---|
| 325 | *op++ = *anchor++; |
---|
| 326 | ip = anchor; |
---|
| 327 | copy++; |
---|
| 328 | if(BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { |
---|
| 329 | copy = 0; |
---|
| 330 | *op++ = MAX_COPY-1; |
---|
| 331 | } |
---|
| 332 | } |
---|
| 333 | |
---|
| 334 | /* left-over as literal copy */ |
---|
| 335 | ip_bound++; |
---|
| 336 | while(ip <= ip_bound) { |
---|
| 337 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) goto out; |
---|
| 338 | *op++ = *ip++; |
---|
| 339 | copy++; |
---|
| 340 | if(copy == MAX_COPY) { |
---|
| 341 | copy = 0; |
---|
| 342 | *op++ = MAX_COPY-1; |
---|
| 343 | } |
---|
| 344 | } |
---|
| 345 | |
---|
| 346 | /* if we have copied something, adjust the copy length */ |
---|
| 347 | if(copy) |
---|
| 348 | *(op-copy-1) = copy-1; |
---|
| 349 | else |
---|
| 350 | op--; |
---|
| 351 | |
---|
| 352 | /* marker for blosclz */ |
---|
| 353 | *(uint8_t*)output |= (1 << 5); |
---|
| 354 | |
---|
| 355 | free(htab); |
---|
| 356 | return (int)(op - (uint8_t*)output); |
---|
| 357 | |
---|
| 358 | out: |
---|
| 359 | free(htab); |
---|
| 360 | return 0; |
---|
| 361 | |
---|
| 362 | } |
---|
| 363 | |
---|
| 364 | |
---|
| 365 | int blosclz_decompress(const void* input, int length, void* output, int maxout) |
---|
| 366 | { |
---|
| 367 | const uint8_t* ip = (const uint8_t*) input; |
---|
| 368 | const uint8_t* ip_limit = ip + length; |
---|
| 369 | uint8_t* op = (uint8_t*) output; |
---|
| 370 | uint8_t* op_limit = op + maxout; |
---|
| 371 | int32_t ctrl = (*ip++) & 31; |
---|
| 372 | int32_t loop = 1; |
---|
| 373 | |
---|
| 374 | do { |
---|
| 375 | const uint8_t* ref = op; |
---|
| 376 | int32_t len = ctrl >> 5; |
---|
| 377 | int32_t ofs = (ctrl & 31) << 8; |
---|
| 378 | |
---|
| 379 | if(ctrl >= 32) { |
---|
| 380 | uint8_t code; |
---|
| 381 | len--; |
---|
| 382 | ref -= ofs; |
---|
| 383 | if (len == 7-1) |
---|
| 384 | do { |
---|
| 385 | code = *ip++; |
---|
| 386 | len += code; |
---|
| 387 | } while (code==255); |
---|
| 388 | code = *ip++; |
---|
| 389 | ref -= code; |
---|
| 390 | |
---|
| 391 | /* match from 16-bit distance */ |
---|
| 392 | if(BLOSCLZ_UNEXPECT_CONDITIONAL(code==255)) |
---|
| 393 | if(BLOSCLZ_EXPECT_CONDITIONAL(ofs==(31 << 8))) { |
---|
| 394 | ofs = (*ip++) << 8; |
---|
| 395 | ofs += *ip++; |
---|
| 396 | ref = op - ofs - MAX_DISTANCE; |
---|
| 397 | } |
---|
| 398 | |
---|
| 399 | #ifdef BLOSCLZ_SAFE |
---|
| 400 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) { |
---|
| 401 | return 0; |
---|
| 402 | } |
---|
| 403 | |
---|
| 404 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(ref-1 < (uint8_t *)output)) { |
---|
| 405 | return 0; |
---|
| 406 | } |
---|
| 407 | #endif |
---|
| 408 | |
---|
| 409 | if(BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) |
---|
| 410 | ctrl = *ip++; |
---|
| 411 | else |
---|
| 412 | loop = 0; |
---|
| 413 | |
---|
| 414 | if(ref == op) { |
---|
| 415 | /* optimize copy for a run */ |
---|
| 416 | uint8_t b = ref[-1]; |
---|
| 417 | memset(op, b, len+3); |
---|
| 418 | op += len+3; |
---|
| 419 | } |
---|
| 420 | else { |
---|
| 421 | /* copy from reference */ |
---|
| 422 | ref--; |
---|
| 423 | len += 3; |
---|
| 424 | if (abs((int32_t)(ref-op)) <= (int32_t)len) { |
---|
| 425 | /* src and dst do overlap: do a loop */ |
---|
| 426 | for(; len; --len) |
---|
| 427 | *op++ = *ref++; |
---|
| 428 | /* The memmove below does not work well (don't know why) */ |
---|
| 429 | /* memmove(op, ref, len); |
---|
| 430 | op += len; |
---|
| 431 | ref += len; |
---|
| 432 | len = 0; */ |
---|
| 433 | } |
---|
| 434 | else { |
---|
| 435 | memcpy(op, ref, len); |
---|
| 436 | op += len; |
---|
| 437 | ref += len; |
---|
| 438 | } |
---|
| 439 | } |
---|
| 440 | } |
---|
| 441 | else { |
---|
| 442 | ctrl++; |
---|
| 443 | #ifdef BLOSCLZ_SAFE |
---|
| 444 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) { |
---|
| 445 | return 0; |
---|
| 446 | } |
---|
| 447 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) { |
---|
| 448 | return 0; |
---|
| 449 | } |
---|
| 450 | #endif |
---|
| 451 | |
---|
| 452 | memcpy(op, ip, ctrl); |
---|
| 453 | ip += ctrl; |
---|
| 454 | op += ctrl; |
---|
| 455 | |
---|
| 456 | loop = (int32_t)BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit); |
---|
| 457 | if(loop) |
---|
| 458 | ctrl = *ip++; |
---|
| 459 | } |
---|
| 460 | } while(BLOSCLZ_EXPECT_CONDITIONAL(loop)); |
---|
| 461 | |
---|
| 462 | return (int)(op - (uint8_t*)output); |
---|
| 463 | } |
---|