Changeset 981e22c for thirdparty/blosc/blosclz.c
- Timestamp:
- 08/26/16 19:35:26 (8 years ago)
- Branches:
- master, pympi
- Children:
- 8ebc79b
- Parents:
- cda87e9
- git-author:
- Hal Finkel <hfinkel@…> (08/26/16 19:35:26)
- git-committer:
- Hal Finkel <hfinkel@…> (08/26/16 19:35:26)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
thirdparty/blosc/blosclz.c
r00587dc r981e22c 1 1 /********************************************************************* 2 Blosc - Blocked S uffling and Compression Library3 4 Author: Francesc Alted <f [email protected]>2 Blosc - Blocked Shuffling and Compression Library 3 4 Author: Francesc Alted <f[email protected]> 5 5 Creation date: 2009-05-20 6 6 … … 21 21 #if defined(_WIN32) && !defined(__MINGW32__) 22 22 #include <windows.h> 23 #include "win32/stdint-windows.h" 23 24 /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ 25 #if defined(_MSC_VER) && _MSC_VER < 1600 26 #include "win32/stdint-windows.h" 27 #else 28 #include <stdint.h> 29 #endif 30 /* llabs only available in VS2013 (VC++ 18.0) and newer */ 31 #if defined(_MSC_VER) && _MSC_VER < 1800 32 #define llabs(v) abs(v) 33 #endif 24 34 #else 25 35 #include <stdint.h> … … 36 46 #elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ 37 47 #undef BLOSCLZ_STRICT_ALIGN 38 #elif defined(_M_IX86) /* Intel, MSVC */48 #elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */ 39 49 #undef BLOSCLZ_STRICT_ALIGN 40 50 #elif defined(__386) … … 44 54 #elif defined(__I86__) /* Digital Mars */ 45 55 #undef BLOSCLZ_STRICT_ALIGN 56 /* Seems like unaligned access in ARM (at least ARMv6) is pretty 57 expensive, so we are going to always enforce strict aligment in ARM. 58 If anybody suggest that newer ARMs are better, we can revisit this. */ 59 /* #elif defined(__ARM_FEATURE_UNALIGNED) */ /* ARM, GNU C */ 60 /* #undef BLOSCLZ_STRICT_ALIGN */ 46 61 #endif 47 62 #endif … … 67 82 * Use inlined functions for supported systems. 68 83 */ 69 #if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C) 70 #define BLOSCLZ_INLINE inline 71 #elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__) 72 #define BLOSCLZ_INLINE __inline 73 #else 74 #define BLOSCLZ_INLINE 84 #if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ 85 #define inline __inline /* Visual C is not C99, but supports some kind of inline */ 75 86 #endif 76 87 77 88 #define MAX_COPY 32 78 #define MAX_LEN 264 /* 256 + 8 */79 89 #define MAX_DISTANCE 8191 80 90 #define MAX_FARDISTANCE (65535+MAX_DISTANCE-1) … … 87 97 88 98 89 static BLOSCLZ_INLINE int32_t hash_function(uint8_t* p, uint8_t hash_log) 90 { 91 int32_t v; 92 93 v = BLOSCLZ_READU16(p); 94 v ^= BLOSCLZ_READU16(p+1)^(v>>(16-hash_log)); 95 v &= (1 << hash_log) - 1; 96 return v; 99 /* 100 * Fast copy macros 101 */ 102 #if defined(_WIN32) 103 #define CPYSIZE 32 104 #else 105 #define CPYSIZE 8 106 #endif 107 #define MCPY(d,s) { memcpy(d, s, CPYSIZE); d+=CPYSIZE; s+=CPYSIZE; } 108 #define FASTCOPY(d,s,e) { do { MCPY(d,s) } while (d<e); } 109 #define SAFECOPY(d,s,e) { while (d<e) { MCPY(d,s) } } 110 111 /* Copy optimized for copying in blocks */ 112 #define BLOCK_COPY(op, ref, len, op_limit) \ 113 { int ilen = len % CPYSIZE; \ 114 uint8_t *cpy = op + len; \ 115 if (cpy + CPYSIZE - ilen <= op_limit) { \ 116 FASTCOPY(op, ref, cpy); \ 117 ref -= (op-cpy); op = cpy; \ 118 } \ 119 else { \ 120 cpy -= ilen; \ 121 SAFECOPY(op, ref, cpy); \ 122 ref -= (op-cpy); op = cpy; \ 123 for(; ilen; --ilen) \ 124 *op++ = *ref++; \ 125 } \ 97 126 } 98 127 128 #define SAFE_COPY(op, ref, len, op_limit) \ 129 if (llabs(op-ref) < CPYSIZE) { \ 130 for(; len; --len) \ 131 *op++ = *ref++; \ 132 } \ 133 else BLOCK_COPY(op, ref, len, op_limit); 134 135 /* Copy optimized for GCC 4.8. Seems like long copy loops are optimal. */ 136 #define GCC_SAFE_COPY(op, ref, len, op_limit) \ 137 if ((len > 32) || (llabs(op-ref) < CPYSIZE)) { \ 138 for(; len; --len) \ 139 *op++ = *ref++; \ 140 } \ 141 else BLOCK_COPY(op, ref, len, op_limit); 142 143 /* Simple, but pretty effective hash function for 3-byte sequence */ 144 #define HASH_FUNCTION(v, p, l) { \ 145 v = BLOSCLZ_READU16(p); \ 146 v ^= BLOSCLZ_READU16(p + 1) ^ ( v >> (16 - l)); \ 147 v &= (1 << l) - 1; \ 148 } 149 150 /* Another version which seems to be a bit more effective than the above, 151 * but a bit slower. Could be interesting for high opt_level. 152 */ 153 #define MINMATCH 3 154 #define HASH_FUNCTION2(v, p, l) { \ 155 v = BLOSCLZ_READU16(p); \ 156 v = (v * 2654435761U) >> ((MINMATCH * 8) - (l + 1)); \ 157 v &= (1 << l) - 1; \ 158 } 159 160 #define LITERAL(ip, op, op_limit, anchor, copy) { \ 161 if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) \ 162 goto out; \ 163 *op++ = *anchor++; \ 164 ip = anchor; \ 165 copy++; \ 166 if(BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) { \ 167 copy = 0; \ 168 *op++ = MAX_COPY-1; \ 169 } \ 170 continue; \ 171 } 99 172 100 173 #define IP_BOUNDARY 2 101 174 102 int blosclz_compress(int opt_level, const void* input, 103 int length, void* output, int maxout) 175 176 int blosclz_compress(const int opt_level, const void* input, int length, 177 void* output, int maxout, int accel) 104 178 { 105 179 uint8_t* ip = (uint8_t*) input; … … 110 184 111 185 /* Hash table depends on the opt level. Hash_log cannot be larger than 15. */ 112 uint8_t hash_log_[10] = {-1, 8, 9, 9, 11, 11, 12, 13, 14, 15}; 186 /* The parametrization below is made from playing with the bench suite, like: 187 $ bench/bench blosclz single 4 188 $ bench/bench blosclz single 4 4194280 12 25 189 and taking the minimum times on a i5-3380M @ 2.90GHz. 190 Curiously enough, values >= 14 does not always 191 get maximum compression, even with large blocksizes. */ 192 int8_t hash_log_[10] = {-1, 11, 11, 11, 12, 13, 13, 13, 13, 13}; 113 193 uint8_t hash_log = hash_log_[opt_level]; 114 194 uint16_t hash_size = 1 << hash_log; … … 116 196 uint8_t* op_limit; 117 197 118 int32_t hslot;119 198 int32_t hval; 120 199 uint8_t copy; 121 200 122 double maxlength_[10] = {-1, .1, .15, .2, . 5, .7, .85, .925, .975, 1.0};201 double maxlength_[10] = {-1, .1, .15, .2, .3, .45, .6, .75, .9, 1.0}; 123 202 int32_t maxlength = (int32_t) (length * maxlength_[opt_level]); 124 203 if (maxlength > (int32_t) maxout) { … … 127 206 op_limit = op + maxlength; 128 207 129 /* output buffer cannot be less than 66 bytes or we can get into problems. 130 As output is usually the same length than input, we take input length. */ 131 if (length < 66) { 132 return 0; /* Mark this as uncompressible */ 208 /* output buffer cannot be less than 66 bytes or we can get into trouble */ 209 if (BLOSCLZ_UNEXPECT_CONDITIONAL(maxlength < 66 || length < 4)) { 210 return 0; 133 211 } 134 212 135 htab = (uint16_t *) malloc(hash_size*sizeof(uint16_t)); 136 137 /* sanity check */ 138 if(BLOSCLZ_UNEXPECT_CONDITIONAL(length < 4)) { 139 if(length) { 140 /* create literal copy only */ 141 *op++ = length-1; 142 ip_bound++; 143 while(ip <= ip_bound) 144 *op++ = *ip++; 145 free(htab); 146 return length+1; 147 } 148 else goto out; 149 } 150 151 /* initializes hash table */ 152 for (hslot = 0; hslot < hash_size; hslot++) 153 htab[hslot] = 0; 213 /* prepare the acceleration to be used in condition */ 214 accel = accel < 1 ? 1 : accel; 215 accel -= 1; 216 217 htab = (uint16_t *) calloc(hash_size, sizeof(uint16_t)); 154 218 155 219 /* we start with literal copy */ … … 175 239 176 240 /* find potential match */ 177 hval = hash_function(ip, hash_log);241 HASH_FUNCTION(hval, ip, hash_log); 178 242 ref = ibase + htab[hval]; 179 /* update hash table */180 htab[hval] = (uint16_t)(anchor - ibase);181 243 182 244 /* calculate distance to the match */ 183 245 distance = (int32_t)(anchor - ref); 246 247 /* update hash table if necessary */ 248 if ((distance & accel) == 0) 249 htab[hval] = (uint16_t)(anchor - ibase); 184 250 185 251 /* is this a match? check the first 3 bytes */ 186 252 if (distance==0 || (distance >= MAX_FARDISTANCE) || 187 253 *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++) 188 goto literal;254 LITERAL(ip, op, op_limit, anchor, copy); 189 255 190 256 /* far, needs at least 5-byte match */ 191 if ( distance >= MAX_DISTANCE) {257 if (opt_level >= 5 && distance >= MAX_DISTANCE) { 192 258 if (*ip++ != *ref++ || *ip++ != *ref++) 193 goto literal;259 LITERAL(ip, op, op_limit, anchor, copy); 194 260 len += 2; 195 261 } … … 211 277 /* safe because the outer check against ip limit */ 212 278 while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) { 279 #if !defined(BLOSCLZ_STRICT_ALIGN) 213 280 value2 = ((int64_t *)ref)[0]; 281 #else 282 memcpy(&value2, ref, 8); 283 #endif 214 284 if (value != value2) { 215 285 /* Find the byte that starts to differ */ … … 234 304 /* safe because the outer check against ip limit */ 235 305 while (ip < (ip_bound - (sizeof(int64_t) - IP_BOUNDARY))) { 236 if (*ref++ != *ip++) break; 306 #if !defined(BLOSCLZ_STRICT_ALIGN) 237 307 if (((int64_t *)ref)[0] != ((int64_t *)ip)[0]) { 308 #endif 238 309 /* Find the byte that starts to differ */ 239 310 while (ip < ip_bound) { … … 241 312 } 242 313 break; 243 } 244 else { 245 ip += 8; 246 ref += 8; 247 } 314 #if !defined(BLOSCLZ_STRICT_ALIGN) 315 } else { ip += 8; ref += 8; } 316 #endif 248 317 } 249 318 /* Last correction before exiting loop */ … … 311 380 312 381 /* update the hash at match boundary */ 313 hval = hash_function(ip, hash_log);382 HASH_FUNCTION(hval, ip, hash_log); 314 383 htab[hval] = (uint16_t)(ip++ - ibase); 315 hval = hash_function(ip, hash_log);384 HASH_FUNCTION(hval, ip, hash_log); 316 385 htab[hval] = (uint16_t)(ip++ - ibase); 317 386 318 387 /* assuming literal copy */ 319 388 *op++ = MAX_COPY-1; 320 321 continue;322 323 literal:324 if (BLOSCLZ_UNEXPECT_CONDITIONAL(op+2 > op_limit)) goto out;325 *op++ = *anchor++;326 ip = anchor;327 copy++;328 if(BLOSCLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) {329 copy = 0;330 *op++ = MAX_COPY-1;331 }332 389 } 333 390 … … 362 419 } 363 420 364 365 421 int blosclz_decompress(const void* input, int length, void* output, int maxout) 366 422 { … … 373 429 374 430 do { 375 constuint8_t* ref = op;431 uint8_t* ref = op; 376 432 int32_t len = ctrl >> 5; 377 433 int32_t ofs = (ctrl & 31) << 8; … … 422 478 ref--; 423 479 len += 3; 424 if (abs((int32_t)(ref-op)) <= (int32_t)len) { 425 /* src and dst do overlap: do a loop */ 426 for(; len; --len) 427 *op++ = *ref++; 428 /* The memmove below does not work well (don't know why) */ 429 /* memmove(op, ref, len); 430 op += len; 431 ref += len; 432 len = 0; */ 433 } 434 else { 435 memcpy(op, ref, len); 436 op += len; 437 ref += len; 438 } 480 #if !defined(_WIN32) && ((defined(__GNUC__) || defined(__INTEL_COMPILER) || !defined(__clang__))) 481 GCC_SAFE_COPY(op, ref, len, op_limit); 482 #else 483 SAFE_COPY(op, ref, len, op_limit); 484 #endif 439 485 } 440 486 } … … 450 496 #endif 451 497 452 memcpy(op, ip, ctrl); 453 ip += ctrl; 454 op += ctrl; 498 BLOCK_COPY(op, ip, ctrl, op_limit); 455 499 456 500 loop = (int32_t)BLOSCLZ_EXPECT_CONDITIONAL(ip < ip_limit);
Note: See TracChangeset
for help on using the changeset viewer.