[00587dc] | 1 | /********************************************************************* |
---|
[981e22c] | 2 | Blosc - Blocked Shuffling and Compression Library |
---|
[00587dc] | 3 | |
---|
[981e22c] | 4 | Author: Francesc Alted <[email protected]> |
---|
[00587dc] | 5 | Creation date: 2009-05-20 |
---|
| 6 | |
---|
| 7 | See LICENSES/BLOSC.txt for details about copyright and rights to use. |
---|
| 8 | **********************************************************************/ |
---|
| 9 | |
---|
| 10 | /********************************************************************* |
---|
| 11 | The code in this file is heavily based on FastLZ, a lightning-fast |
---|
| 12 | lossless compression library. See LICENSES/FASTLZ.txt for details. |
---|
| 13 | **********************************************************************/ |
---|
| 14 | |
---|
| 15 | |
---|
| 16 | #include <stdlib.h> |
---|
| 17 | #include <stdio.h> |
---|
| 18 | #include <string.h> |
---|
| 19 | #include "blosclz.h" |
---|
| 20 | |
---|
| 21 | #if defined(_WIN32) && !defined(__MINGW32__) |
---|
| 22 | #include <windows.h> |
---|
[981e22c] | 23 | |
---|
| 24 | /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ |
---|
| 25 | #if defined(_MSC_VER) && _MSC_VER < 1600 |
---|
| 26 | #include "win32/stdint-windows.h" |
---|
| 27 | #else |
---|
| 28 | #include <stdint.h> |
---|
| 29 | #endif |
---|
| 30 | /* llabs only available in VS2013 (VC++ 18.0) and newer */ |
---|
| 31 | #if defined(_MSC_VER) && _MSC_VER < 1800 |
---|
| 32 | #define llabs(v) abs(v) |
---|
| 33 | #endif |
---|
[00587dc] | 34 | #else |
---|
| 35 | #include <stdint.h> |
---|
| 36 | #endif /* _WIN32 */ |
---|
| 37 | |
---|
| 38 | |
---|
| 39 | /* |
---|
| 40 | * Prevent accessing more than 8-bit at once, except on x86 architectures. |
---|
| 41 | */ |
---|
| 42 | #if !defined(BLOSCLZ_STRICT_ALIGN) |
---|
| 43 | #define BLOSCLZ_STRICT_ALIGN |
---|
| 44 | #if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */ |
---|
| 45 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 46 | #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ |
---|
| 47 | #undef BLOSCLZ_STRICT_ALIGN |
---|
[981e22c] | 48 | #elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */ |
---|
[00587dc] | 49 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 50 | #elif defined(__386) |
---|
| 51 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 52 | #elif defined(_X86_) /* MinGW */ |
---|
| 53 | #undef BLOSCLZ_STRICT_ALIGN |
---|
| 54 | #elif defined(__I86__) /* Digital Mars */ |
---|
| 55 | #undef BLOSCLZ_STRICT_ALIGN |
---|
[981e22c] | 56 | /* Seems like unaligned access in ARM (at least ARMv6) is pretty |
---|
| 57 | expensive, so we are going to always enforce strict aligment in ARM. |
---|
| 58 | If anybody suggest that newer ARMs are better, we can revisit this. */ |
---|
| 59 | /* #elif defined(__ARM_FEATURE_UNALIGNED) */ /* ARM, GNU C */ |
---|
| 60 | /* #undef BLOSCLZ_STRICT_ALIGN */ |
---|
[00587dc] | 61 | #endif |
---|
| 62 | #endif |
---|
| 63 | |
---|
| 64 | /* |
---|
| 65 | * Always check for bound when decompressing. |
---|
| 66 | * Generally it is best to leave it defined. |
---|
| 67 | */ |
---|
| 68 | #define BLOSCLZ_SAFE |
---|
| 69 | |
---|
| 70 | /* |
---|
| 71 | * Give hints to the compiler for branch prediction optimization. |
---|
| 72 | */ |
---|
| 73 | #if defined(__GNUC__) && (__GNUC__ > 2) |
---|
| 74 | #define BLOSCLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) |
---|
| 75 | #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) |
---|
| 76 | #else |
---|
| 77 | #define BLOSCLZ_EXPECT_CONDITIONAL(c) (c) |
---|
| 78 | #define BLOSCLZ_UNEXPECT_CONDITIONAL(c) (c) |
---|
| 79 | #endif |
---|
| 80 | |
---|
| 81 | /* |
---|
| 82 | * Use inlined functions for supported systems. |
---|
| 83 | */ |
---|
[981e22c] | 84 | #if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ |
---|
| 85 | #define inline __inline /* Visual C is not C99, but supports some kind of inline */ |
---|
[00587dc] | 86 | #endif |
---|
| 87 | |
---|
| 88 | #define MAX_COPY 32 |
---|
| 89 | #define MAX_DISTANCE 8191 |
---|
| 90 | #define MAX_FARDISTANCE (65535+MAX_DISTANCE-1) |
---|
| 91 | |
---|
| 92 | #ifdef BLOSCLZ_STRICT_ALIGN |
---|
| 93 | #define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8) |
---|
| 94 | #else |
---|
| 95 | #define BLOSCLZ_READU16(p) *((const uint16_t*)(p)) |
---|
| 96 | #endif |
---|
| 97 | |
---|
| 98 | |
---|
[981e22c] | 99 | /* |
---|
| 100 | * Fast copy macros |
---|
| 101 | */ |
---|
| 102 | #if defined(_WIN32) |
---|
| 103 | #define CPYSIZE 32 |
---|
| 104 | #else |
---|
| 105 | #define CPYSIZE 8 |
---|
| 106 | #endif |
---|
| 107 | #define MCPY(d,s) { memcpy(d, s, CPYSIZE); d+=CPYSIZE; s+=CPYSIZE; } |
---|
| 108 | #define FASTCOPY(d,s,e) { do { MCPY(d,s) } while (d<e); } |
---|
| 109 | #define SAFECOPY(d,s,e) { while (d<e) { MCPY(d,s) } } |
---|
| 110 | |
---|
| 111 | /* Copy optimized for copying in blocks */ |
---|
| 112 | #define BLOCK_COPY(op, ref, len, op_limit) \ |
---|
| 113 | { int ilen = len % CPYSIZE; \ |
---|
| 114 | uint8_t *cpy = op + len; \ |
---|
| 115 | if (cpy + CPYSIZE - ilen <= op_limit) { \ |
---|
| 116 | FASTCOPY(op, ref, cpy); \ |
---|
| 117 | ref -= (op-cpy); op = cpy; \ |
---|
| 118 | } \ |
---|
| 119 | else { \ |
---|
| 120 | cpy -= ilen; \ |
---|
| 121 | SAFECOPY(op, ref, cpy); \ |
---|
| 122 | ref -= (op-cpy); op = cpy; \ |
---|
| 123 | for(; ilen; --ilen) \ |
---|
| 124 | *op++ = *ref++; \ |
---|
| 125 | } \ |
---|
| 126 | } |
---|
[00587dc] | 127 | |
---|
[981e22c] | 128 | #define SAFE_COPY(op, ref, len, op_limit) \ |
---|
| 129 | if (llabs(op-ref) < CPYSIZE) { \ |
---|
| 130 | for(; len; --len) \ |
---|
| 131 | *op++ = *ref++; \ |
---|
| 132 | } \ |
---|
| 133 | else BLOCK_COPY(op, ref, len, op_limit); |
---|
| 134 | |
---|
| 135 | /* Copy optimized for GCC 4.8. Seems like long copy loops are optimal. */ |
---|
| 136 | #define GCC_SAFE_COPY(op, ref, len, op_limit) \ |
---|
| 137 | if ((len > 32) || (llabs(op-ref) < CPYSIZE)) { \ |
---|
| 138 | for(; len; --len) \ |
---|
| 139 | *op++ = *ref++; \ |
---|
| 140 | } \ |
---|
| 141 | else BLOCK_COPY(op, ref, len, op_limit); |
---|
| 142 | |
---|
| 143 | /* Simple, but pretty effective hash function for 3-byte sequence */ |
---|
| 144 | #define HASH_FUNCTION(v, p, l) { \ |
---|
| 145 | v = BLOSCLZ_READU16(p); \ |
---|
| 146 | v ^= BLOSCLZ_READU16(p + 1) ^ ( v >> (16 - l)); \ |
---|
| 147 | v &= (1 << l) - 1; \ |
---|
[00587dc] | 148 | } |
---|
| 149 | |
---|
[981e22c] | 150 | /* Another version which seems to be a bit more effective than the above, |
---|
| 151 | * but a bit slower. Could be interesting for high opt_level. |
---|
| 152 | */ |
---|
| 153 | #define MINMATCH 3 |
---|
| 154 | #define HASH_FUNCTION2(v, p, l) { \ |
---|
| 155 | v = BLOSCLZ_READU16(p); \ |
---|
| 156 | v = (v * 2654435761U) >> ((MINMATCH * 8) - (l + 1)); \ |
---|
| 157 | v &= (1 << l) - 1; \ |
---|
| 158 | } |
---|
| 159 | |
---|
| 160 | #define LITERAL(ip, op, op_limit, anchor, copy) { \ |
---|
| 161 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) \ |
---|
| 162 | goto out; \ |
---|
| 163 | *op++ = *anchor++; \ |
---|
| 164 | ip = anchor; \ |
---|
| 165 | copy++; \ |
---|
| 166 | if(BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { \ |
---|
| 167 | copy = 0; \ |
---|
| 168 | *op++ = MAX_COPY-1; \ |
---|
| 169 | } \ |
---|
| 170 | continue; \ |
---|
| 171 | } |
---|
[00587dc] | 172 | |
---|
| 173 | #define IP_BOUNDARY 2 |
---|
| 174 | |
---|
[981e22c] | 175 | |
---|
| 176 | int blosclz_compress(const int opt_level, const void* input, int length, |
---|
| 177 | void* output, int maxout, int accel) |
---|
[00587dc] | 178 | { |
---|
| 179 | uint8_t* ip = (uint8_t*) input; |
---|
| 180 | uint8_t* ibase = (uint8_t*) input; |
---|
| 181 | uint8_t* ip_bound = ip + length - IP_BOUNDARY; |
---|
| 182 | uint8_t* ip_limit = ip + length - 12; |
---|
| 183 | uint8_t* op = (uint8_t*) output; |
---|
| 184 | |
---|
| 185 | /* Hash table depends on the opt level. Hash_log cannot be larger than 15. */ |
---|
[981e22c] | 186 | /* The parametrization below is made from playing with the bench suite, like: |
---|
| 187 | $ bench/bench blosclz single 4 |
---|
| 188 | $ bench/bench blosclz single 4 4194280 12 25 |
---|
| 189 | and taking the minimum times on a i5-3380M @ 2.90GHz. |
---|
| 190 | Curiously enough, values >= 14 does not always |
---|
| 191 | get maximum compression, even with large blocksizes. */ |
---|
| 192 | int8_t hash_log_[10] = {-1, 11, 11, 11, 12, 13, 13, 13, 13, 13}; |
---|
[00587dc] | 193 | uint8_t hash_log = hash_log_[opt_level]; |
---|
| 194 | uint16_t hash_size = 1 << hash_log; |
---|
| 195 | uint16_t *htab; |
---|
| 196 | uint8_t* op_limit; |
---|
| 197 | |
---|
| 198 | int32_t hval; |
---|
| 199 | uint8_t copy; |
---|
| 200 | |
---|
[981e22c] | 201 | double maxlength_[10] = {-1, .1, .15, .2, .3, .45, .6, .75, .9, 1.0}; |
---|
[00587dc] | 202 | int32_t maxlength = (int32_t) (length * maxlength_[opt_level]); |
---|
| 203 | if (maxlength > (int32_t) maxout) { |
---|
| 204 | maxlength = (int32_t) maxout; |
---|
| 205 | } |
---|
| 206 | op_limit = op + maxlength; |
---|
| 207 | |
---|
[981e22c] | 208 | /* output buffer cannot be less than 66 bytes or we can get into trouble */ |
---|
| 209 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(maxlength < 66 || length < 4)) { |
---|
| 210 | return 0; |
---|
[00587dc] | 211 | } |
---|
| 212 | |
---|
[981e22c] | 213 | /* prepare the acceleration to be used in condition */ |
---|
| 214 | accel = accel < 1 ? 1 : accel; |
---|
| 215 | accel -= 1; |
---|
[00587dc] | 216 | |
---|
[981e22c] | 217 | htab = (uint16_t *) calloc(hash_size, sizeof(uint16_t)); |
---|
[00587dc] | 218 | |
---|
| 219 | /* we start with literal copy */ |
---|
| 220 | copy = 2; |
---|
| 221 | *op++ = MAX_COPY-1; |
---|
| 222 | *op++ = *ip++; |
---|
| 223 | *op++ = *ip++; |
---|
| 224 | |
---|
| 225 | /* main loop */ |
---|
| 226 | while(BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) { |
---|
| 227 | const uint8_t* ref; |
---|
| 228 | int32_t distance; |
---|
| 229 | int32_t len = 3; /* minimum match length */ |
---|
| 230 | uint8_t* anchor = ip; /* comparison starting-point */ |
---|
| 231 | |
---|
| 232 | /* check for a run */ |
---|
| 233 | if(ip[0] == ip[-1] && BLOSCLZ_READU16(ip-1)==BLOSCLZ_READU16(ip+1)) { |
---|
| 234 | distance = 1; |
---|
| 235 | ip += 3; |
---|
| 236 | ref = anchor - 1 + 3; |
---|
| 237 | goto match; |
---|
| 238 | } |
---|
| 239 | |
---|
| 240 | /* find potential match */ |
---|
[981e22c] | 241 | HASH_FUNCTION(hval, ip, hash_log); |
---|
[00587dc] | 242 | ref = ibase + htab[hval]; |
---|
| 243 | |
---|
| 244 | /* calculate distance to the match */ |
---|
| 245 | distance = (int32_t)(anchor - ref); |
---|
| 246 | |
---|
[981e22c] | 247 | /* update hash table if necessary */ |
---|
| 248 | if ((distance & accel) == 0) |
---|
| 249 | htab[hval] = (uint16_t)(anchor - ibase); |
---|
| 250 | |
---|
[00587dc] | 251 | /* is this a match? check the first 3 bytes */ |
---|
| 252 | if (distance==0 || (distance >= MAX_FARDISTANCE) || |
---|
| 253 | *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++) |
---|
[981e22c] | 254 | LITERAL(ip, op, op_limit, anchor, copy); |
---|
[00587dc] | 255 | |
---|
| 256 | /* far, needs at least 5-byte match */ |
---|
[981e22c] | 257 | if (opt_level >= 5 && distance >= MAX_DISTANCE) { |
---|
[00587dc] | 258 | if (*ip++ != *ref++ || *ip++ != *ref++) |
---|
[981e22c] | 259 | LITERAL(ip, op, op_limit, anchor, copy); |
---|
[00587dc] | 260 | len += 2; |
---|
| 261 | } |
---|
| 262 | |
---|
| 263 | match: |
---|
| 264 | |
---|
| 265 | /* last matched byte */ |
---|
| 266 | ip = anchor + len; |
---|
| 267 | |
---|
| 268 | /* distance is biased */ |
---|
| 269 | distance--; |
---|
| 270 | |
---|
| 271 | if(!distance) { |
---|
| 272 | /* zero distance means a run */ |
---|
| 273 | uint8_t x = ip[-1]; |
---|
| 274 | int64_t value, value2; |
---|
| 275 | /* Broadcast the value for every byte in a 64-bit register */ |
---|
| 276 | memset(&value, x, 8); |
---|
| 277 | /* safe because the outer check against ip limit */ |
---|
| 278 | while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) { |
---|
[981e22c] | 279 | #if !defined(BLOSCLZ_STRICT_ALIGN) |
---|
[00587dc] | 280 | value2 = ((int64_t *)ref)[0]; |
---|
[981e22c] | 281 | #else |
---|
| 282 | memcpy(&value2, ref, 8); |
---|
| 283 | #endif |
---|
[00587dc] | 284 | if (value != value2) { |
---|
| 285 | /* Find the byte that starts to differ */ |
---|
| 286 | while (ip < ip_bound) { |
---|
| 287 | if (*ref++ != x) break; else ip++; |
---|
| 288 | } |
---|
| 289 | break; |
---|
| 290 | } |
---|
| 291 | else { |
---|
| 292 | ip += 8; |
---|
| 293 | ref += 8; |
---|
| 294 | } |
---|
| 295 | } |
---|
| 296 | if (ip > ip_bound) { |
---|
| 297 | long l = (long)(ip - ip_bound); |
---|
| 298 | ip -= l; |
---|
| 299 | ref -= l; |
---|
| 300 | } /* End of optimization */ |
---|
| 301 | } |
---|
| 302 | else { |
---|
| 303 | for(;;) { |
---|
| 304 | /* safe because the outer check against ip limit */ |
---|
| 305 | while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) { |
---|
[981e22c] | 306 | #if !defined(BLOSCLZ_STRICT_ALIGN) |
---|
[00587dc] | 307 | if (((int64_t *)ref)[0] != ((int64_t *)ip)[0]) { |
---|
[981e22c] | 308 | #endif |
---|
[00587dc] | 309 | /* Find the byte that starts to differ */ |
---|
| 310 | while (ip < ip_bound) { |
---|
| 311 | if (*ref++ != *ip++) break; |
---|
| 312 | } |
---|
| 313 | break; |
---|
[981e22c] | 314 | #if !defined(BLOSCLZ_STRICT_ALIGN) |
---|
| 315 | } else { ip += 8; ref += 8; } |
---|
| 316 | #endif |
---|
[00587dc] | 317 | } |
---|
| 318 | /* Last correction before exiting loop */ |
---|
| 319 | if (ip > ip_bound) { |
---|
| 320 | int32_t l = (int32_t)(ip - ip_bound); |
---|
| 321 | ip -= l; |
---|
| 322 | ref -= l; |
---|
| 323 | } /* End of optimization */ |
---|
| 324 | break; |
---|
| 325 | } |
---|
| 326 | } |
---|
| 327 | |
---|
| 328 | /* if we have copied something, adjust the copy count */ |
---|
| 329 | if (copy) |
---|
| 330 | /* copy is biased, '0' means 1 byte copy */ |
---|
| 331 | *(op-copy-1) = copy-1; |
---|
| 332 | else |
---|
| 333 | /* back, to overwrite the copy count */ |
---|
| 334 | op--; |
---|
| 335 | |
---|
| 336 | /* reset literal counter */ |
---|
| 337 | copy = 0; |
---|
| 338 | |
---|
| 339 | /* length is biased, '1' means a match of 3 bytes */ |
---|
| 340 | ip -= 3; |
---|
| 341 | len = (int32_t)(ip - anchor); |
---|
| 342 | |
---|
| 343 | /* check that we have space enough to encode the match for all the cases */ |
---|
| 344 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+(len/255)+6 > op_limit)) goto out; |
---|
| 345 | |
---|
| 346 | /* encode the match */ |
---|
| 347 | if(distance < MAX_DISTANCE) { |
---|
| 348 | if(len < 7) { |
---|
| 349 | *op++ = (len << 5) + (distance >> 8); |
---|
| 350 | *op++ = (distance & 255); |
---|
| 351 | } |
---|
| 352 | else { |
---|
| 353 | *op++ = (uint8_t)((7 << 5) + (distance >> 8)); |
---|
| 354 | for(len-=7; len >= 255; len-= 255) |
---|
| 355 | *op++ = 255; |
---|
| 356 | *op++ = len; |
---|
| 357 | *op++ = (distance & 255); |
---|
| 358 | } |
---|
| 359 | } |
---|
| 360 | else { |
---|
| 361 | /* far away, but not yet in the another galaxy... */ |
---|
| 362 | if(len < 7) { |
---|
| 363 | distance -= MAX_DISTANCE; |
---|
| 364 | *op++ = (uint8_t)((len << 5) + 31); |
---|
| 365 | *op++ = 255; |
---|
| 366 | *op++ = (uint8_t)(distance >> 8); |
---|
| 367 | *op++ = distance & 255; |
---|
| 368 | } |
---|
| 369 | else { |
---|
| 370 | distance -= MAX_DISTANCE; |
---|
| 371 | *op++ = (7 << 5) + 31; |
---|
| 372 | for(len-=7; len >= 255; len-= 255) |
---|
| 373 | *op++ = 255; |
---|
| 374 | *op++ = len; |
---|
| 375 | *op++ = 255; |
---|
| 376 | *op++ = (uint8_t)(distance >> 8); |
---|
| 377 | *op++ = distance & 255; |
---|
| 378 | } |
---|
| 379 | } |
---|
| 380 | |
---|
| 381 | /* update the hash at match boundary */ |
---|
[981e22c] | 382 | HASH_FUNCTION(hval, ip, hash_log); |
---|
[00587dc] | 383 | htab[hval] = (uint16_t)(ip++ - ibase); |
---|
[981e22c] | 384 | HASH_FUNCTION(hval, ip, hash_log); |
---|
[00587dc] | 385 | htab[hval] = (uint16_t)(ip++ - ibase); |
---|
| 386 | |
---|
| 387 | /* assuming literal copy */ |
---|
| 388 | *op++ = MAX_COPY-1; |
---|
| 389 | } |
---|
| 390 | |
---|
| 391 | /* left-over as literal copy */ |
---|
| 392 | ip_bound++; |
---|
| 393 | while(ip <= ip_bound) { |
---|
| 394 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) goto out; |
---|
| 395 | *op++ = *ip++; |
---|
| 396 | copy++; |
---|
| 397 | if(copy == MAX_COPY) { |
---|
| 398 | copy = 0; |
---|
| 399 | *op++ = MAX_COPY-1; |
---|
| 400 | } |
---|
| 401 | } |
---|
| 402 | |
---|
| 403 | /* if we have copied something, adjust the copy length */ |
---|
| 404 | if(copy) |
---|
| 405 | *(op-copy-1) = copy-1; |
---|
| 406 | else |
---|
| 407 | op--; |
---|
| 408 | |
---|
| 409 | /* marker for blosclz */ |
---|
| 410 | *(uint8_t*)output |= (1 << 5); |
---|
| 411 | |
---|
| 412 | free(htab); |
---|
| 413 | return (int)(op - (uint8_t*)output); |
---|
| 414 | |
---|
| 415 | out: |
---|
| 416 | free(htab); |
---|
| 417 | return 0; |
---|
| 418 | |
---|
| 419 | } |
---|
| 420 | |
---|
| 421 | int blosclz_decompress(const void* input, int length, void* output, int maxout) |
---|
| 422 | { |
---|
| 423 | const uint8_t* ip = (const uint8_t*) input; |
---|
| 424 | const uint8_t* ip_limit = ip + length; |
---|
| 425 | uint8_t* op = (uint8_t*) output; |
---|
| 426 | uint8_t* op_limit = op + maxout; |
---|
| 427 | int32_t ctrl = (*ip++) & 31; |
---|
| 428 | int32_t loop = 1; |
---|
| 429 | |
---|
| 430 | do { |
---|
[981e22c] | 431 | uint8_t* ref = op; |
---|
[00587dc] | 432 | int32_t len = ctrl >> 5; |
---|
| 433 | int32_t ofs = (ctrl & 31) << 8; |
---|
| 434 | |
---|
| 435 | if(ctrl >= 32) { |
---|
| 436 | uint8_t code; |
---|
| 437 | len--; |
---|
| 438 | ref -= ofs; |
---|
| 439 | if (len == 7-1) |
---|
| 440 | do { |
---|
| 441 | code = *ip++; |
---|
| 442 | len += code; |
---|
| 443 | } while (code==255); |
---|
| 444 | code = *ip++; |
---|
| 445 | ref -= code; |
---|
| 446 | |
---|
| 447 | /* match from 16-bit distance */ |
---|
| 448 | if(BLOSCLZ_UNEXPECT_CONDITIONAL(code==255)) |
---|
| 449 | if(BLOSCLZ_EXPECT_CONDITIONAL(ofs==(31 << 8))) { |
---|
| 450 | ofs = (*ip++) << 8; |
---|
| 451 | ofs += *ip++; |
---|
| 452 | ref = op - ofs - MAX_DISTANCE; |
---|
| 453 | } |
---|
| 454 | |
---|
| 455 | #ifdef BLOSCLZ_SAFE |
---|
| 456 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit)) { |
---|
| 457 | return 0; |
---|
| 458 | } |
---|
| 459 | |
---|
| 460 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(ref-1 < (uint8_t *)output)) { |
---|
| 461 | return 0; |
---|
| 462 | } |
---|
| 463 | #endif |
---|
| 464 | |
---|
| 465 | if(BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit)) |
---|
| 466 | ctrl = *ip++; |
---|
| 467 | else |
---|
| 468 | loop = 0; |
---|
| 469 | |
---|
| 470 | if(ref == op) { |
---|
| 471 | /* optimize copy for a run */ |
---|
| 472 | uint8_t b = ref[-1]; |
---|
| 473 | memset(op, b, len+3); |
---|
| 474 | op += len+3; |
---|
| 475 | } |
---|
| 476 | else { |
---|
| 477 | /* copy from reference */ |
---|
| 478 | ref--; |
---|
| 479 | len += 3; |
---|
[981e22c] | 480 | #if !defined(_WIN32) && ((defined(__GNUC__) || defined(__INTEL_COMPILER) || !defined(__clang__))) |
---|
| 481 | GCC_SAFE_COPY(op, ref, len, op_limit); |
---|
| 482 | #else |
---|
| 483 | SAFE_COPY(op, ref, len, op_limit); |
---|
| 484 | #endif |
---|
[00587dc] | 485 | } |
---|
| 486 | } |
---|
| 487 | else { |
---|
| 488 | ctrl++; |
---|
| 489 | #ifdef BLOSCLZ_SAFE |
---|
| 490 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit)) { |
---|
| 491 | return 0; |
---|
| 492 | } |
---|
| 493 | if (BLOSCLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit)) { |
---|
| 494 | return 0; |
---|
| 495 | } |
---|
| 496 | #endif |
---|
| 497 | |
---|
[981e22c] | 498 | BLOCK_COPY(op, ip, ctrl, op_limit); |
---|
[00587dc] | 499 | |
---|
| 500 | loop = (int32_t)BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit); |
---|
| 501 | if(loop) |
---|
| 502 | ctrl = *ip++; |
---|
| 503 | } |
---|
| 504 | } while(BLOSCLZ_EXPECT_CONDITIONAL(loop)); |
---|
| 505 | |
---|
| 506 | return (int)(op - (uint8_t*)output); |
---|
| 507 | } |
---|