Changeset 9ee2ce3
- Timestamp:
- 09/28/18 16:32:55 (6 years ago)
- Branches:
- master, pympi
- Children:
- e6aa0eb
- Parents:
- abca157
- git-author:
- Hal Finkel <hfinkel@…> (09/28/18 16:32:55)
- git-committer:
- Hal Finkel <hfinkel@…> (09/28/18 16:32:55)
- Location:
- thirdparty/SZ
- Files:
-
- 1 added
- 55 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified thirdparty/SZ/COPYRIGHT.txt ¶
r2c47b73 r9ee2ce3 1 1 Copyright © 2016 , UChicago Argonne, LLC 2 2 All Rights Reserved 3 [SZ, Version 1. 3]3 [SZ, Version 1.4] 4 4 Sheng Di 5 5 Dingwen Tao 6 Xin Liang 6 7 Franck Cappello 7 8 Argonne National Laboratory -
TabularUnified thirdparty/SZ/sz/include/TightDataPointStorageD.h ¶
r2c47b73 r9ee2ce3 23 23 char reqLength; 24 24 char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression" 25 26 double minLogValue; 25 27 26 28 int stateNum; … … 53 55 unsigned char* pwrErrBoundBytes; 54 56 int pwrErrBoundBytes_size; 57 58 unsigned char* raBytes; 59 size_t raBytes_size; 60 55 61 } TightDataPointStorageD; 56 62 -
TabularUnified thirdparty/SZ/sz/include/TightDataPointStorageF.h ¶
r2c47b73 r9ee2ce3 35 35 size_t rtypeArray_size; 36 36 37 float minLogValue; 38 37 39 unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1) 38 40 size_t typeArray_size; … … 55 57 unsigned char* pwrErrBoundBytes; 56 58 int pwrErrBoundBytes_size; 59 60 unsigned char* raBytes; 61 size_t raBytes_size; 57 62 58 63 } TightDataPointStorageF; -
TabularUnified thirdparty/SZ/sz/include/TypeManager.h ¶
r2c47b73 r9ee2ce3 20 20 //TypeManager.c 21 21 size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result); 22 size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result); 22 23 void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); 23 24 size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result); 25 size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result); 24 26 void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray); 25 27 size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result); -
TabularUnified thirdparty/SZ/sz/include/callZlib.h ¶
r2c47b73 r9ee2ce3 19 19 20 20 #include <stdio.h> 21 22 int isZlibFormat(unsigned char magic1, unsigned char magic2); 21 23 22 24 //callZlib.c -
TabularUnified thirdparty/SZ/sz/include/dataCompression.h ¶
r2c47b73 r9ee2ce3 78 78 int initRandomAccessBytes(unsigned char* raBytes); 79 79 80 int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData); 81 int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 82 int reqLength, int reqBytesLength, int resiBitsLength, float medianValue); 83 84 void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData); 85 86 int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData); 87 int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 88 int reqLength, int reqBytesLength, int resiBitsLength, double medianValue); 89 90 void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData); 91 80 92 #ifdef __cplusplus 81 93 } -
TabularUnified thirdparty/SZ/sz/include/pastriD.h ¶
r2c47b73 r9ee2ce3 11 11 half.d=0.5; 12 12 13 //printf("pastri_double_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x))));14 //printf("sign(x):0x%lx\n", x);15 //printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));13 // //printf("pastri_double_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); 14 // //printf("sign(x):0x%lx\n", x); 15 // //printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); 16 16 half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); 17 //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));17 // //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); 18 18 return (int64_t)(x + half.d); 19 19 } … … 27 27 int i,sb; 28 28 for(i=0;i<p->bSize;i++){ 29 //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG29 // //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG 30 30 if(abs_FastD(data[i])>p->usedEb){ 31 31 bp->nonZeros++; 32 // if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG32 ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG 33 33 } 34 34 if(abs_FastD(data[i])>absExt){ … … 43 43 bp->binSize=2*p->usedEb; 44 44 45 // if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG46 // if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG47 48 // if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize) );} }//DEBUG45 ////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG 46 ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG 47 48 ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize) );} }//DEBUG 49 49 50 50 //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! … … 53 53 for(i=0;i<p->sbSize;i++){ 54 54 patternQ[i]=pastri_double_quantize(data[patternIdx+i],bp->binSize); 55 if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}55 //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} 56 56 } 57 57 … … 59 59 bp->scaleBits=bp->patternBits; 60 60 bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->scaleBits-1))-1); 61 // if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG62 // if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG63 if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG61 ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG 62 ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG 63 //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG 64 64 65 65 //Calculate Scales. … … 68 68 //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! 69 69 int patternExtZero=(patternExt==0); 70 // if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG70 ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG 71 71 for(sb=0;sb<p->sbNum;sb++){ 72 72 //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; … … 74 74 //assert(scales[sb]<=1); 75 75 scalesQ[sb]=pastri_double_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); 76 if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}76 //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} 77 77 } 78 // if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG78 ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG 79 79 80 80 //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. … … 93 93 if(absECQ > bp->ECQExt) 94 94 bp->ECQExt=absECQ; 95 // if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG95 ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG 96 96 switch (ECQ[_1DIdx]){ 97 97 case 0: … … 118 118 double decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; 119 119 if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ 120 printf("p->usedEb=%.6e\n",p->usedEb);121 printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));120 //printf("p->usedEb=%.6e\n",p->usedEb); 121 //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); 122 122 assert(0); 123 123 } … … 175 175 //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; 176 176 177 if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG178 if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG177 //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG 178 //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG 179 179 180 180 //**************************************************************************************** … … 184 184 //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data 185 185 *numOutBytes=UCSparseBytes; 186 if(D_G){printf("UCSparse\n");} //DEBUG187 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG186 //if(D_G){printf("UCSparse\n");} //DEBUG 187 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 188 188 outBuf[0]=0; //mode 189 189 … … 217 217 } 218 218 219 if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG219 //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG 220 220 221 221 //**************************************************************************************** … … 225 225 //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data 226 226 *numOutBytes=UCNonSparseBytes; 227 if(D_G){printf("UCNonSparse\n");} //DEBUG228 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG227 //if(D_G){printf("UCNonSparse\n");} //DEBUG 228 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 229 229 outBuf[0]=1; //mode 230 230 … … 232 232 memcpy(&outBuf[1], data, p->bSize*p->dataSize); 233 233 234 if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG234 //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG 235 235 /* 236 236 for(i=0;i<UCNonSparseBytes-17;i++){ 237 printf("%d ",inBuf[p->bSize*8+i]);238 } 239 printf("\n");237 //printf("%d ",inBuf[p->bSize*8+i]); 238 } 239 //printf("\n"); 240 240 for(i=0;i<UCNonSparseBytes-17;i++){ 241 printf("%d ",outBuf[17+i]);242 } 243 printf("\n");241 //printf("%d ",outBuf[17+i]); 242 } 243 //printf("\n"); 244 244 */ 245 245 //**************************************************************************************** … … 249 249 //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} 250 250 *numOutBytes=CSparseBytes; 251 if(D_G){printf("CSparse\n");} //DEBUG252 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG253 // if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG251 //if(D_G){printf("CSparse\n");} //DEBUG 252 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 253 ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG 254 254 outBuf[0]=2; //mode 255 255 … … 270 270 bitPos=9*8; 271 271 272 // if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG272 ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG 273 273 274 274 for(i=0;i<p->sbSize;i++){ 275 275 writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point 276 276 } 277 // if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG277 ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG 278 278 for(i=0;i<p->sbNum;i++){ 279 279 writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale 280 280 } 281 // if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG282 // if(DEBUG)printf("ECQBits:%d\n",ECQBits);281 ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG 282 ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); 283 283 switch(bp->ECQBits){ 284 284 case 2: … … 288 288 break; 289 289 case 1: 290 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG290 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG 291 291 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 292 292 //writeBits_Fast(outBuf,&bitPos,2,0x10); … … 296 296 break; 297 297 case -1: 298 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG298 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG 299 299 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 300 300 //writeBits_Fast(outBuf,&bitPos,2,0x11); … … 315 315 break; 316 316 case 1: 317 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG317 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG 318 318 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 319 319 //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 … … 323 323 break; 324 324 case -1: 325 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG325 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG 326 326 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 327 327 //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 … … 331 331 break; 332 332 default: 333 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG333 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG 334 334 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 335 335 //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]); … … 344 344 } 345 345 346 // if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG347 if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG346 ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG 347 //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG 348 348 349 349 … … 352 352 *(uint32_t*)(&outBuf[1])=bytePos; 353 353 354 if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG354 //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG 355 355 if(D_G){assert(bitPos==CSparseBits);} 356 356 … … 360 360 //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} 361 361 *numOutBytes=CNonSparseBytes; 362 if(D_G){printf("CNonSparse\n");} //DEBUG363 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG364 // if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG362 //if(D_G){printf("CNonSparse\n");} //DEBUG 363 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 364 ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG 365 365 outBuf[0]=3; //mode 366 366 … … 375 375 bitPos=7*8; //Currently, we are at the end of 7th byte. 376 376 377 // if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG377 ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG 378 378 379 379 for(i=0;i<p->sbSize;i++){ 380 380 writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point 381 381 } 382 // if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG382 ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG 383 383 for(i=0;i<p->sbNum;i++){ 384 384 writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale 385 385 } 386 // if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG387 // if(DEBUG)printf("ECQBits:%d\n",ECQBits);386 ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG 387 ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); 388 388 switch(bp->ECQBits){ 389 389 case 2: … … 391 391 switch(ECQ[i]){ 392 392 case 0: 393 // if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG393 ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG 394 394 writeBits_Fast(outBuf,&bitPos,1,1);//0x1 395 395 break; 396 396 case 1: 397 // if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG397 ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG 398 398 //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 399 399 writeBits_Fast(outBuf,&bitPos,1,0); … … 401 401 break; 402 402 case -1: 403 // if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG403 ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG 404 404 //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 405 405 writeBits_Fast(outBuf,&bitPos,1,0); … … 413 413 break; 414 414 default: //ECQBits>2 415 // if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG415 ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG 416 416 for(i=0;i<p->bSize;i++){ 417 // if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG418 // if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG419 // if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG417 ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 418 ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG 419 ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG 420 420 switch(ECQ[i]){ 421 421 case 0: 422 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG423 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG422 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG 423 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 424 424 //temp1=bitPos; 425 425 writeBits_Fast(outBuf,&bitPos,1,1); //0x1 426 426 //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 427 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG427 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 428 428 break; 429 429 case 1: 430 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG431 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG430 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG 431 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 432 432 //temp1=bitPos; 433 433 //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 … … 436 436 writeBits_Fast(outBuf,&bitPos,1,0); 437 437 //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 438 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG438 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 439 439 break; 440 440 case -1: 441 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG442 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG441 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG 442 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 443 443 //temp1=bitPos; 444 444 //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 … … 447 447 writeBits_Fast(outBuf,&bitPos,1,1); 448 448 //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 449 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG449 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 450 450 break; 451 451 default: 452 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG453 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG452 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG 453 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 454 454 //temp1=bitPos; 455 455 //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 … … 458 458 //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 459 459 writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); 460 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG460 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 461 461 break; 462 462 } … … 465 465 } 466 466 467 // if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG468 if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG467 ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG 468 //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG 469 469 470 470 … … 474 474 *(uint32_t*)(&outBuf[1])=bytePos; 475 475 476 if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG476 //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG 477 477 if(D_G){assert(bitPos==CNonSparseBits);} 478 478 479 479 } 480 // for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG480 ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG 481 481 482 482 } … … 484 484 pastri_blockParams bp; 485 485 486 if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG487 if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG488 if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG489 if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG486 //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG 487 //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG 488 //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG 489 //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG 490 490 491 491 int64_t patternQ[MAX_PS_SIZE]; … … 567 567 //R:UCSparse 568 568 case 0: 569 if(D_G){printf("\nDC:UCSparse\n");} //DEBUG569 //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG 570 570 //bp->nonZeros=*(uint16_t*)(&inBuf[9]); 571 571 //bytePos=11; … … 592 592 bytePos+=8; 593 593 } 594 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG594 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 595 595 break; 596 596 //R:UCNonSparse 597 597 case 1: 598 if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG598 //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG 599 599 //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); 600 600 memcpy(data, &inBuf[1], p->bSize*8); 601 601 bytePos=p->bSize*8; 602 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG602 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 603 603 break; 604 604 //R:CSparse 605 605 case 2: 606 if(D_G){printf("\nDC:CSparse\n");} //DEBUG606 //if(D_G){printf("\nDC:CSparse\n");} //DEBUG 607 607 //for(j=0;j<p->bSize;j++){ 608 608 // data[j]=0; … … 615 615 bp->ECQBits=inBuf[6]; 616 616 617 if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG617 //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG 618 618 619 619 //bp->numOutliers=*(uint16_t*)(&inBuf[15]); … … 621 621 bp->numOutliers=*(uint16_t*)(&inBuf[7]); 622 622 bitPos=9*8; 623 if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG623 //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG 624 624 625 625 bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1); … … 627 627 bp->binSize=p->usedEb*2; 628 628 629 if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG629 //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG 630 630 631 631 for(j=0;j<p->sbSize;j++){ 632 632 patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point 633 if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}633 //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} 634 634 } 635 635 for(j=0;j<p->sbNum;j++){ 636 636 scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale 637 if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}637 //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} 638 638 } 639 639 … … 649 649 case 2: 650 650 for(j=0;j<bp->numOutliers;j++){ 651 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG652 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG651 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG 652 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG 653 653 654 654 _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); 655 655 ECQTemp=readBits_I64(inBuf,&bitPos,1); 656 656 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; 657 // if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);657 ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); 658 658 //continue; 659 659 //sb=_1DIdx/p->sbSize; … … 663 663 ECQ[_1DIdx]=ECQTemp; 664 664 665 // if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG665 ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG 666 666 } 667 667 break; 668 668 default: //bp->ECQBits>2 669 if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG669 //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG 670 670 671 671 for(j=0;j<bp->numOutliers;j++){ … … 674 674 //localIdx=_1DIdx%p->sbSize; 675 675 temp=readBits_UI64(inBuf,&bitPos,1); 676 // if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG676 ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG 677 677 switch(temp){ 678 678 case 0: //+-1 679 679 ECQTemp=readBits_I64(inBuf,&bitPos,1); 680 680 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; 681 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG682 // if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);681 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 682 ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); 683 683 break; 684 684 case 1: //Others 685 685 ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); 686 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG687 // if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);686 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 687 ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); 688 688 break; 689 689 //default: 690 // printf("ERROR: Bad 2-bit value: 0x%lx",temp);690 //// printf("ERROR: Bad 2-bit value: 0x%lx",temp); 691 691 // assert(0); //AMG 692 692 // break; … … 696 696 ECQ[_1DIdx]=ECQTemp; 697 697 698 // if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG698 ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG 699 699 } 700 700 break; … … 705 705 706 706 bytePos=(bitPos+7)/8; 707 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG707 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 708 708 709 709 //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) … … 713 713 //R:CNonSparse 714 714 case 3: 715 if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG715 //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG 716 716 717 717 //for(j=0;j<p->bSize;j++){ … … 725 725 bp->ECQBits=inBuf[6]; 726 726 727 if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG727 //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG 728 728 729 729 //bitPos=15*8; … … 733 733 bp->binSize=p->usedEb*2; 734 734 735 if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG735 //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG 736 736 737 737 for(j=0;j<p->sbSize;j++){ 738 738 patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point 739 if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}739 //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} 740 740 } 741 741 for(j=0;j<p->sbNum;j++){ 742 742 scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale 743 if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}743 //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} 744 744 } 745 745 /* //Splitting 746 746 for(j=0;j<p->bSize;j++){ 747 747 data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; 748 // if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}748 ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} 749 749 } 750 750 */ … … 752 752 case 2: 753 753 for(j=0;j<p->bSize;j++){ 754 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG755 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG754 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG 755 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG 756 756 //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); 757 757 temp=readBits_UI64(inBuf,&bitPos,1); … … 769 769 } 770 770 771 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG771 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 772 772 //continue; 773 773 //sb=_1DIdx/p->sbSize; … … 777 777 ECQ[j]=ECQTemp; 778 778 779 // if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG779 ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG 780 780 } 781 781 break; 782 782 default: //bp->ECQBits>2 783 // if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);783 ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); 784 784 785 785 for(j=0;j<p->bSize;j++){ 786 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG787 // if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);788 789 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG790 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG786 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 787 ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); 788 789 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG 790 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG 791 791 //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); 792 792 temp=readBits_UI64(inBuf,&bitPos,1); 793 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG793 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 794 794 switch(temp){ 795 795 case 0: 796 // if(DEBUG)printf("Read:0");796 ////if(DEBUG)printf("Read:0"); 797 797 temp2=readBits_UI64(inBuf,&bitPos,1); 798 798 switch(temp2){ 799 799 case 0: 800 // if(DEBUG)printf("0");800 ////if(DEBUG)printf("0"); 801 801 ECQTemp=readBits_I64(inBuf,&bitPos,1); 802 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG803 // if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);802 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 803 ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); 804 804 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; 805 // if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);805 ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); 806 806 break; 807 807 case 1: 808 // if(DEBUG)printf("1\n");808 ////if(DEBUG)printf("1\n"); 809 809 ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); 810 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG811 // if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);810 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 811 ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); 812 812 break; 813 813 default: … … 817 817 break; 818 818 case 1: 819 // if(DEBUG)printf("Read:1\n");819 ////if(DEBUG)printf("Read:1\n"); 820 820 ECQTemp=0; 821 // if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);821 ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); 822 822 break; 823 823 default: … … 826 826 } 827 827 828 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG828 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 829 829 //continue; 830 830 //sb=_1DIdx/p->sbSize; … … 834 834 ECQ[j]=ECQTemp; 835 835 836 // if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG836 ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG 837 837 } 838 838 break; … … 842 842 //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); 843 843 bytePos=(bitPos+7)/8; 844 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG844 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 845 845 846 846 //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) … … 880 880 for(i=0;i<p->bSize;i++){ 881 881 if(idx0[i]!=idx0_dc[i]){ 882 printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);882 //printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); 883 883 assert(0); 884 884 } 885 885 if(idx1[i]!=idx1_dc[i]){ 886 printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);886 //printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); 887 887 assert(0); 888 888 } 889 889 if(idx2[i]!=idx2_dc[i]){ 890 printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);890 //printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); 891 891 assert(0); 892 892 } 893 893 if(idx3[i]!=idx3_dc[i]){ 894 printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);894 //printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); 895 895 assert(0); 896 896 } … … 901 901 for(i=0;i<p->bSize;i++){ 902 902 if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ 903 printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);903 //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); 904 904 assert(0); 905 905 } -
TabularUnified thirdparty/SZ/sz/include/pastriF.h ¶
r2c47b73 r9ee2ce3 11 11 half.d=0.5; 12 12 13 // printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x))));14 // printf("sign(x):0x%lx\n", x);15 // printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));13 ////printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); 14 ////printf("sign(x):0x%lx\n", x); 15 ////printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); 16 16 half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); 17 // printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));17 ////printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); 18 18 return (int64_t)(x + half.d); 19 19 } … … 27 27 int i,sb; 28 28 for(i=0;i<p->bSize;i++){ 29 // printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG29 ////printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG 30 30 if(abs_FastD(data[i])>p->usedEb){ 31 31 bp->nonZeros++; 32 // if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG32 ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG 33 33 } 34 34 if(abs_FastD(data[i])>absExt){ … … 43 43 bp->binSize=2*p->usedEb; 44 44 45 // if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG46 // if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG47 48 // if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG45 ////if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG 46 ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG 47 48 ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG 49 49 50 50 //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! … … 53 53 for(i=0;i<p->sbSize;i++){ 54 54 patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize); 55 if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}55 //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} 56 56 } 57 57 … … 59 59 bp->scaleBits=bp->patternBits; 60 60 bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1); 61 // if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG62 // if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG63 if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG61 ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG 62 ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG 63 //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG 64 64 65 65 //Calculate Scales. … … 68 68 //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! 69 69 int patternExtZero=(patternExt==0); 70 // if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG70 ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG 71 71 for(sb=0;sb<p->sbNum;sb++){ 72 72 //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; … … 74 74 //assert(scales[sb]<=1); 75 75 scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); 76 if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}76 //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} 77 77 } 78 // if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG78 ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG 79 79 80 80 //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. … … 93 93 if(absECQ > bp->ECQExt) 94 94 bp->ECQExt=absECQ; 95 // if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG95 ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG 96 96 switch (ECQ[_1DIdx]){ 97 97 case 0: … … 118 118 float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; 119 119 if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ 120 printf("p->usedEb=%.6e\n",p->usedEb);121 printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));120 //printf("p->usedEb=%.6e\n",p->usedEb); 121 //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); 122 122 assert(0); 123 123 } … … 175 175 //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; 176 176 177 if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG178 if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG177 //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG 178 //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG 179 179 180 180 //**************************************************************************************** … … 184 184 //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data 185 185 *numOutBytes=UCSparseBytes; 186 if(D_G){printf("UCSparse\n");} //DEBUG187 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG186 //if(D_G){printf("UCSparse\n");} //DEBUG 187 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 188 188 outBuf[0]=0; //mode 189 189 … … 217 217 } 218 218 219 if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG219 //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG 220 220 221 221 //**************************************************************************************** … … 225 225 //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data 226 226 *numOutBytes=UCNonSparseBytes; 227 if(D_G){printf("UCNonSparse\n");} //DEBUG228 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG227 //if(D_G){printf("UCNonSparse\n");} //DEBUG 228 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 229 229 outBuf[0]=1; //mode 230 230 … … 232 232 memcpy(&outBuf[1], data, p->bSize*p->dataSize); 233 233 234 if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG234 //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG 235 235 /* 236 236 for(i=0;i<UCNonSparseBytes-17;i++){ 237 printf("%d ",inBuf[p->bSize*8+i]);238 } 239 printf("\n");237 //printf("%d ",inBuf[p->bSize*8+i]); 238 } 239 //printf("\n"); 240 240 for(i=0;i<UCNonSparseBytes-17;i++){ 241 printf("%d ",outBuf[17+i]);242 } 243 printf("\n");241 //printf("%d ",outBuf[17+i]); 242 } 243 //printf("\n"); 244 244 */ 245 245 //**************************************************************************************** … … 249 249 //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} 250 250 *numOutBytes=CSparseBytes; 251 if(D_G){printf("CSparse\n");} //DEBUG252 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG253 // if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG251 //if(D_G){printf("CSparse\n");} //DEBUG 252 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 253 ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG 254 254 outBuf[0]=2; //mode 255 255 … … 270 270 bitPos=9*8; 271 271 272 // if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG272 ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG 273 273 274 274 for(i=0;i<p->sbSize;i++){ 275 275 writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point 276 276 } 277 // if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG277 ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG 278 278 for(i=0;i<p->sbNum;i++){ 279 279 writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale 280 280 } 281 // if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG282 // if(DEBUG)printf("ECQBits:%d\n",ECQBits);281 ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG 282 ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); 283 283 switch(bp->ECQBits){ 284 284 case 2: … … 288 288 break; 289 289 case 1: 290 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG290 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG 291 291 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 292 292 //writeBits_Fast(outBuf,&bitPos,2,0x10); … … 296 296 break; 297 297 case -1: 298 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG298 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG 299 299 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 300 300 //writeBits_Fast(outBuf,&bitPos,2,0x11); … … 315 315 break; 316 316 case 1: 317 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG317 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG 318 318 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 319 319 //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 … … 323 323 break; 324 324 case -1: 325 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG325 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG 326 326 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 327 327 //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 … … 331 331 break; 332 332 default: 333 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG333 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG 334 334 writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); 335 335 //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]); … … 344 344 } 345 345 346 // if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG347 if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG346 ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG 347 //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG 348 348 349 349 … … 352 352 *(uint32_t*)(&outBuf[1])=bytePos; 353 353 354 if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG354 //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG 355 355 if(D_G){assert(bitPos==CSparseBits);} 356 356 … … 360 360 //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} 361 361 *numOutBytes=CNonSparseBytes; 362 if(D_G){printf("CNonSparse\n");} //DEBUG363 if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG364 // if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG362 //if(D_G){printf("CNonSparse\n");} //DEBUG 363 //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG 364 ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG 365 365 outBuf[0]=3; //mode 366 366 … … 375 375 bitPos=7*8; //Currently, we are at the end of 7th byte. 376 376 377 // if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG377 ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG 378 378 379 379 for(i=0;i<p->sbSize;i++){ 380 380 writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point 381 381 } 382 // if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG382 ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG 383 383 for(i=0;i<p->sbNum;i++){ 384 384 writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale 385 385 } 386 // if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG387 // if(DEBUG)printf("ECQBits:%d\n",ECQBits);386 ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG 387 ////if(DEBUG)printf("ECQBits:%d\n",ECQBits); 388 388 switch(bp->ECQBits){ 389 389 case 2: … … 391 391 switch(ECQ[i]){ 392 392 case 0: 393 // if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG393 ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG 394 394 writeBits_Fast(outBuf,&bitPos,1,1);//0x1 395 395 break; 396 396 case 1: 397 // if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG397 ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG 398 398 //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 399 399 writeBits_Fast(outBuf,&bitPos,1,0); … … 401 401 break; 402 402 case -1: 403 // if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG403 ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG 404 404 //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 405 405 writeBits_Fast(outBuf,&bitPos,1,0); … … 413 413 break; 414 414 default: //ECQBits>2 415 // if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG415 ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG 416 416 for(i=0;i<p->bSize;i++){ 417 // if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG418 // if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG419 // if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG417 ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 418 ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG 419 ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG 420 420 switch(ECQ[i]){ 421 421 case 0: 422 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG423 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG422 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG 423 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 424 424 //temp1=bitPos; 425 425 writeBits_Fast(outBuf,&bitPos,1,1); //0x1 426 426 //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 427 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG427 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 428 428 break; 429 429 case 1: 430 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG431 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG430 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG 431 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 432 432 //temp1=bitPos; 433 433 //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 … … 436 436 writeBits_Fast(outBuf,&bitPos,1,0); 437 437 //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 438 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG438 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 439 439 break; 440 440 case -1: 441 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG442 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG441 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG 442 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 443 443 //temp1=bitPos; 444 444 //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 … … 447 447 writeBits_Fast(outBuf,&bitPos,1,1); 448 448 //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 449 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG449 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 450 450 break; 451 451 default: 452 // if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG453 // if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG452 ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG 453 ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG 454 454 //temp1=bitPos; 455 455 //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 … … 458 458 //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 459 459 writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); 460 // if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG460 ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG 461 461 break; 462 462 } … … 465 465 } 466 466 467 // if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG468 if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG467 ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG 468 //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG 469 469 470 470 … … 474 474 *(uint32_t*)(&outBuf[1])=bytePos; 475 475 476 if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG476 //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG 477 477 if(D_G){assert(bitPos==CNonSparseBits);} 478 478 479 479 } 480 // for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG480 ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG 481 481 482 482 } … … 484 484 pastri_blockParams bp; 485 485 486 if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG487 if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG488 if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG489 if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG486 //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG 487 //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG 488 //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG 489 //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG 490 490 491 491 int64_t patternQ[MAX_PS_SIZE]; … … 567 567 //R:UCSparse 568 568 case 0: 569 if(D_G){printf("\nDC:UCSparse\n");} //DEBUG569 //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG 570 570 //bp->nonZeros=*(uint16_t*)(&inBuf[9]); 571 571 //bytePos=11; … … 592 592 bytePos+=8; 593 593 } 594 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG594 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 595 595 break; 596 596 //R:UCNonSparse 597 597 case 1: 598 if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG598 //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG 599 599 //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); 600 600 memcpy(data, &inBuf[1], p->bSize*8); 601 601 bytePos=p->bSize*8; 602 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG602 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 603 603 break; 604 604 //R:CSparse 605 605 case 2: 606 if(D_G){printf("\nDC:CSparse\n");} //DEBUG606 //if(D_G){printf("\nDC:CSparse\n");} //DEBUG 607 607 //for(j=0;j<p->bSize;j++){ 608 608 // data[j]=0; … … 615 615 bp->ECQBits=inBuf[6]; 616 616 617 if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG617 //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG 618 618 619 619 //bp->numOutliers=*(uint16_t*)(&inBuf[15]); … … 621 621 bp->numOutliers=*(uint16_t*)(&inBuf[7]); 622 622 bitPos=9*8; 623 if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG623 //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG 624 624 625 625 bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); … … 627 627 bp->binSize=p->usedEb*2; 628 628 629 if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG629 //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG 630 630 631 631 for(j=0;j<p->sbSize;j++){ 632 632 patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point 633 if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}633 //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} 634 634 } 635 635 for(j=0;j<p->sbNum;j++){ 636 636 scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale 637 if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}637 //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} 638 638 } 639 639 … … 649 649 case 2: 650 650 for(j=0;j<bp->numOutliers;j++){ 651 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG652 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG651 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG 652 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG 653 653 654 654 _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); 655 655 ECQTemp=readBits_I64(inBuf,&bitPos,1); 656 656 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; 657 // if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);657 ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); 658 658 //continue; 659 659 //sb=_1DIdx/p->sbSize; … … 663 663 ECQ[_1DIdx]=ECQTemp; 664 664 665 // if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG665 ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG 666 666 } 667 667 break; 668 668 default: //bp->ECQBits>2 669 if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG669 //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG 670 670 671 671 for(j=0;j<bp->numOutliers;j++){ … … 674 674 //localIdx=_1DIdx%p->sbSize; 675 675 temp=readBits_UI64(inBuf,&bitPos,1); 676 // if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG676 ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG 677 677 switch(temp){ 678 678 case 0: //+-1 679 679 ECQTemp=readBits_I64(inBuf,&bitPos,1); 680 680 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; 681 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG682 // if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);681 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 682 ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); 683 683 break; 684 684 case 1: //Others 685 685 ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); 686 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG687 // if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);686 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 687 ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); 688 688 break; 689 689 //default: 690 // printf("ERROR: Bad 2-bit value: 0x%lx",temp);690 //// printf("ERROR: Bad 2-bit value: 0x%lx",temp); 691 691 // assert(0); //AMG 692 692 // break; … … 696 696 ECQ[_1DIdx]=ECQTemp; 697 697 698 // if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG698 ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG 699 699 } 700 700 break; … … 705 705 706 706 bytePos=(bitPos+7)/8; 707 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG707 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 708 708 709 709 //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) … … 713 713 //R:CNonSparse 714 714 case 3: 715 if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG715 //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG 716 716 717 717 //for(j=0;j<p->bSize;j++){ … … 725 725 bp->ECQBits=inBuf[6]; 726 726 727 if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG727 //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG 728 728 729 729 //bitPos=15*8; … … 733 733 bp->binSize=p->usedEb*2; 734 734 735 if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG735 //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG 736 736 737 737 for(j=0;j<p->sbSize;j++){ 738 738 patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point 739 if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}739 //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} 740 740 } 741 741 for(j=0;j<p->sbNum;j++){ 742 742 scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale 743 if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}743 //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} 744 744 } 745 745 /* //Splitting 746 746 for(j=0;j<p->bSize;j++){ 747 747 data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; 748 // if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}748 ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} 749 749 } 750 750 */ … … 752 752 case 2: 753 753 for(j=0;j<p->bSize;j++){ 754 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG755 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG754 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG 755 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG 756 756 //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); 757 757 temp=readBits_UI64(inBuf,&bitPos,1); … … 769 769 } 770 770 771 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG771 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 772 772 //continue; 773 773 //sb=_1DIdx/p->sbSize; … … 777 777 ECQ[j]=ECQTemp; 778 778 779 // if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG779 ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG 780 780 } 781 781 break; 782 782 default: //bp->ECQBits>2 783 // if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);783 ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); 784 784 785 785 for(j=0;j<p->bSize;j++){ 786 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG787 // if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);788 789 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG790 // if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG786 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 787 ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); 788 789 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG 790 ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG 791 791 //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); 792 792 temp=readBits_UI64(inBuf,&bitPos,1); 793 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG793 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 794 794 switch(temp){ 795 795 case 0: 796 // if(DEBUG)printf("Read:0");796 ////if(DEBUG)printf("Read:0"); 797 797 temp2=readBits_UI64(inBuf,&bitPos,1); 798 798 switch(temp2){ 799 799 case 0: 800 // if(DEBUG)printf("0");800 ////if(DEBUG)printf("0"); 801 801 ECQTemp=readBits_I64(inBuf,&bitPos,1); 802 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG803 // if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);802 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 803 ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); 804 804 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; 805 // if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);805 ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); 806 806 break; 807 807 case 1: 808 // if(DEBUG)printf("1\n");808 ////if(DEBUG)printf("1\n"); 809 809 ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); 810 // if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG811 // if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);810 ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG 811 ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); 812 812 break; 813 813 default: … … 817 817 break; 818 818 case 1: 819 // if(DEBUG)printf("Read:1\n");819 ////if(DEBUG)printf("Read:1\n"); 820 820 ECQTemp=0; 821 // if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);821 ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); 822 822 break; 823 823 default: … … 826 826 } 827 827 828 // if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG828 ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG 829 829 //continue; 830 830 //sb=_1DIdx/p->sbSize; … … 834 834 ECQ[j]=ECQTemp; 835 835 836 // if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG836 ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG 837 837 } 838 838 break; … … 842 842 //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); 843 843 bytePos=(bitPos+7)/8; 844 if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG844 //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG 845 845 846 846 //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) … … 880 880 for(i=0;i<p->bSize;i++){ 881 881 if(idx0[i]!=idx0_dc[i]){ 882 printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);882 //printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); 883 883 assert(0); 884 884 } 885 885 if(idx1[i]!=idx1_dc[i]){ 886 printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);886 //printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); 887 887 assert(0); 888 888 } 889 889 if(idx2[i]!=idx2_dc[i]){ 890 printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);890 //printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); 891 891 assert(0); 892 892 } 893 893 if(idx3[i]!=idx3_dc[i]){ 894 printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);894 //printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); 895 895 assert(0); 896 896 } … … 901 901 for(i=0;i<p->bSize;i++){ 902 902 if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ 903 printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);903 //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); 904 904 assert(0); 905 905 } -
TabularUnified thirdparty/SZ/sz/include/sz.h ¶
r2c47b73 r9ee2ce3 55 55 #include "sz_float_ts.h" 56 56 #include "szd_float_ts.h" 57 #include "utility.h" 57 58 58 59 #ifdef _WIN32 … … 75 76 //typedef unsigned long uint64_t; 76 77 77 #define SZ_VERNUM 0x0 14078 #define SZ_VER_MAJOR 179 #define SZ_VER_MINOR 480 #define SZ_VER_BUILD 1381 #define SZ_VER_REVISION 578 #define SZ_VERNUM 0x0200 79 #define SZ_VER_MAJOR 2 80 #define SZ_VER_MINOR 0 81 #define SZ_VER_BUILD 2 82 #define SZ_VER_REVISION 0 82 83 83 84 #define PASTRI 103 … … 131 132 #define SZ_TEMPORAL_COMPRESSION 3 132 133 134 #define SZ_NO_REGRESSION 0 135 #define SZ_WITH_LINEAR_REGRESSION 1 136 133 137 #define SZ_PWR_MIN_TYPE 0 134 138 #define SZ_PWR_AVG_TYPE 1 … … 152 156 153 157 #define numOfBufferedSteps 1 //the number of time steps in the buffer 158 159 160 #define GZIP_COMPRESSOR 0 //i.e., ZLIB_COMPRSSOR 161 #define ZSTD_COMPRESSOR 1 154 162 155 163 //Note: the following setting should be consistent with stateNum in Huffman.h … … 237 245 unsigned int maxRangeRadius; 238 246 int sol_ID;// it's always SZ, unless the setting is PASTRI compression mode (./configure --enable-pastri) 247 int losslessCompressor; 239 248 int sampleDistance; //2 bytes 240 249 float predThreshold; // 2 bytes … … 279 288 char metadata_filename[256]; 280 289 FILE *metadata_file; 290 unsigned char* bit_array; //sihuan added 291 size_t intersect_size; //sihuan added 292 int64_t* hist_index; //sihuan added: prestep index 293 281 294 } sz_tsc_metadata; 282 295 … … 290 303 extern sz_params *confparams_dec; 291 304 extern sz_exedata *exe_params; 305 extern int sz_with_regression; 306 292 307 //------------------------------------------------ 293 308 extern SZ_VarSet* sz_varset; … … 357 372 size_t compute_total_batch_size(); 358 373 359 int isZlibFormat(unsigned char magic1, unsigned char magic2);360 361 374 void SZ_registerVar(char* varName, int dataType, void* data, 362 375 int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, -
TabularUnified thirdparty/SZ/sz/include/sz_double.h ¶
r2c47b73 r9ee2ce3 76 76 size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); 77 77 78 unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq); 79 unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq); 80 unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); 81 unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); 82 83 78 84 #ifdef __cplusplus 79 85 } -
TabularUnified thirdparty/SZ/sz/include/sz_double_pwr.h ¶
r2c47b73 r9ee2ce3 38 38 size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f, size_t *outSize); 39 39 40 void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t dataLength, size_t *outSize, double min, double max); 41 void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t *outSize, double min, double max); 42 void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max); 43 40 44 #ifdef __cplusplus 41 45 } -
TabularUnified thirdparty/SZ/sz/include/sz_float.h ¶
r2c47b73 r9ee2ce3 129 129 size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4); 130 130 131 132 unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq); 133 unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq); 134 135 unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); 136 unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); 137 unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); 138 131 139 #ifdef __cplusplus 132 140 } -
TabularUnified thirdparty/SZ/sz/include/sz_float_pwr.h ¶
r2c47b73 r9ee2ce3 45 45 size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f, size_t *outSize); 46 46 47 void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max); 48 void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max); 49 void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max); 50 47 51 #ifdef __cplusplus 48 52 } -
TabularUnified thirdparty/SZ/sz/include/szd_double.h ¶
r2c47b73 r9ee2ce3 25 25 void getSnapshotData_double_3D(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps, int errBoundMode); 26 26 void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageD* tdps, int errBoundMode); 27 void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data); 28 void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); 27 29 28 30 int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize); -
TabularUnified thirdparty/SZ/sz/include/szd_double_pwr.h ¶
r2c47b73 r9ee2ce3 22 22 23 23 void decompressDataSeries_double_1D_pwrgroup(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); 24 void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps); 25 void decompressDataSeries_double_2D_pwr_pre_log(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps); 26 void decompressDataSeries_double_3D_pwr_pre_log(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps); 27 24 28 #ifdef __cplusplus 25 29 } -
TabularUnified thirdparty/SZ/sz/include/szd_float.h ¶
r2c47b73 r9ee2ce3 32 32 33 33 size_t decompressDataSeries_float_3D_RA_block(float * data, float mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, float * unpredictable_data); 34 35 void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data); 36 void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); 37 void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); 38 34 39 #ifdef __cplusplus 35 40 } -
TabularUnified thirdparty/SZ/sz/include/szd_float_pwr.h ¶
r2c47b73 r9ee2ce3 23 23 char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength); 24 24 void decompressDataSeries_float_1D_pwrgroup(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); 25 void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps); 26 void decompressDataSeries_float_2D_pwr_pre_log(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps); 27 void decompressDataSeries_float_3D_pwr_pre_log(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps); 25 28 26 29 #ifdef __cplusplus -
TabularUnified thirdparty/SZ/sz/src/ByteToolkit.c ¶
r2c47b73 r9ee2ce3 431 431 432 432 //the byte to input is in the big-endian format 433 float bytesToFloat(unsigned char* bytes)433 inline float bytesToFloat(unsigned char* bytes) 434 434 { 435 435 lfloat buf; … … 440 440 } 441 441 442 void floatToBytes(unsigned char *b, float num)442 inline void floatToBytes(unsigned char *b, float num) 443 443 { 444 444 lfloat buf; … … 450 450 451 451 //the byte to input is in the big-endian format 452 double bytesToDouble(unsigned char* bytes)452 inline double bytesToDouble(unsigned char* bytes) 453 453 { 454 454 ldouble buf; … … 459 459 } 460 460 461 void doubleToBytes(unsigned char *b, double num)461 inline void doubleToBytes(unsigned char *b, double num) 462 462 { 463 463 ldouble buf; … … 508 508 } 509 509 510 in t getMaskRightCode(int m) {510 inline int getMaskRightCode(int m) { 511 511 switch (m) { 512 512 case 1: … … 531 531 } 532 532 533 in t getLeftMovingCode(int kMod8)533 inline int getLeftMovingCode(int kMod8) 534 534 { 535 535 return getMaskRightCode(8 - kMod8); 536 536 } 537 537 538 in t getRightMovingSteps(int kMod8, int resiBitLength) {538 inline int getRightMovingSteps(int kMod8, int resiBitLength) { 539 539 return 8 - kMod8 - resiBitLength; 540 540 } 541 541 542 in t getRightMovingCode(int kMod8, int resiBitLength)542 inline int getRightMovingCode(int kMod8, int resiBitLength) 543 543 { 544 544 int rightMovingSteps = 8 - kMod8 - resiBitLength; … … 815 815 816 816 817 size_t bytesToSize(unsigned char* bytes)817 inline size_t bytesToSize(unsigned char* bytes) 818 818 { 819 819 size_t result = 0; … … 825 825 } 826 826 827 void sizeToBytes(unsigned char* outBytes, size_t size)827 inline void sizeToBytes(unsigned char* outBytes, size_t size) 828 828 { 829 829 if(exe_params->SZ_SIZE_TYPE==4) -
TabularUnified thirdparty/SZ/sz/src/DynamicDoubleArray.c ¶
r2c47b73 r9ee2ce3 22 22 void convertDDAtoDoubles(DynamicDoubleArray *dba, double **data) 23 23 { 24 int size = dba->size;24 size_t size = dba->size; 25 25 if(size>0) 26 26 *data = (double*)malloc(size * sizeof(double)); -
TabularUnified thirdparty/SZ/sz/src/DynamicFloatArray.c ¶
r2c47b73 r9ee2ce3 22 22 void convertDFAtoFloats(DynamicFloatArray *dfa, float **data) 23 23 { 24 int size = dfa->size;24 size_t size = dfa->size; 25 25 if(size>0) 26 26 *data = (float*)malloc(size * sizeof(float)); -
TabularUnified thirdparty/SZ/sz/src/DynamicIntArray.c ¶
r2c47b73 r9ee2ce3 22 22 void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data) 23 23 { 24 int size = dia->size;24 size_t size = dia->size; 25 25 if(size>0) 26 26 *data = (unsigned char*)malloc(size * sizeof(char)); -
TabularUnified thirdparty/SZ/sz/src/Huffman.c ¶
r2c47b73 r9ee2ce3 652 652 void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize) 653 653 { 654 size_t i, nodeCount = 0; 654 size_t i; 655 int nodeCount = 0; 655 656 unsigned char *treeBytes, buffer[4]; 656 657 657 658 init(huffmanTree, s, length); 658 659 for (i = 0; i < huffmanTree->stateNum; i++) 659 if (huffmanTree->code[i]) nodeCount++; 660 if (huffmanTree->code[i]) nodeCount++; 660 661 nodeCount = nodeCount*2-1; 661 662 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree,nodeCount, &treeBytes); … … 664 665 intToBytes_bigEndian(buffer, nodeCount); 665 666 memcpy(*out, buffer, 4); 666 memcpy(*out+4, treeBytes, treeByteSize); 667 intToBytes_bigEndian(buffer, huffmanTree->stateNum/2); //real number of intervals 668 memcpy(*out+4, buffer, 4); 669 memcpy(*out+8, treeBytes, treeByteSize); 667 670 free(treeBytes); 668 671 size_t enCodeSize = 0; 669 encode(huffmanTree, s, length, *out+ 4+treeByteSize, &enCodeSize);670 *outSize = 4+treeByteSize+enCodeSize;672 encode(huffmanTree, s, length, *out+8+treeByteSize, &enCodeSize); 673 *outSize = 8+treeByteSize+enCodeSize; 671 674 672 675 //unsigned short state[length]; … … 683 686 size_t encodeStartIndex; 684 687 size_t nodeCount = bytesToInt_bigEndian(s); 685 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+ 4, nodeCount);688 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+8, nodeCount); 686 689 687 690 //sdi: Debug … … 702 705 else 703 706 encodeStartIndex = 1+3*nodeCount*sizeof(unsigned int)+nodeCount*sizeof(unsigned char); 704 decode(s+ 4+encodeStartIndex, targetLength, root, out);707 decode(s+8+encodeStartIndex, targetLength, root, out); 705 708 } 706 709 -
TabularUnified thirdparty/SZ/sz/src/TightDataPointStorageD.c ¶
r2c47b73 r9ee2ce3 47 47 (*this)->pwrErrBoundBytes = NULL; 48 48 (*this)->pwrErrBoundBytes_size = 0; 49 50 (*this)->raBytes = NULL; 51 (*this)->raBytes_size = 0; 52 49 53 } 50 54 … … 84 88 int mode = confparams_dec->szMode; 85 89 int predictionMode = confparams_dec->predictionMode; 90 int losslessCompressor = confparams_dec->losslessCompressor; 86 91 if(confparams_dec!=NULL) 87 92 free(confparams_dec); 88 93 confparams_dec = params; 89 94 confparams_dec->szMode = mode; 95 confparams_dec->losslessCompressor = losslessCompressor; 96 90 97 if(mode==SZ_TEMPORAL_COMPRESSION) 91 98 { … … 94 101 } 95 102 index += MetaDataByteLength; 103 104 int isRandomAccess = (sameRByte >> 7) & 0x01; 96 105 97 106 unsigned char dsLengthBytes[8]; … … 122 131 (*this)->allSameData = 0; 123 132 133 if(isRandomAccess == 1) 134 { 135 (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE; 136 (*this)->raBytes = &(flatBytes[index]); 137 return errorBoundMode; 138 } 139 124 140 int rtype_ = sameRByte & 0x08; //1000 125 141 … … 205 221 (*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1; 206 222 } 223 224 int minLogValueSize = 0; 225 if(errorBoundMode>=PW_REL) 226 minLogValueSize = 8; 207 227 208 228 if ((*this)->rtypeArray != NULL) … … 210 230 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8 211 231 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - 8 - (*this)->rtypeArray_size 212 - (*this)->typeArray_size - (*this)->leadNumArray_size232 - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size 213 233 - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; 214 234 for (i = 0; i < (*this)->rtypeArray_size; i++) … … 218 238 { 219 239 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8 220 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - (*this)->typeArray_size240 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size 221 241 - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; 222 242 } 243 244 if(errorBoundMode >= PW_REL){ 245 (*this)->minLogValue = bytesToDouble(&flatBytes[index]); 246 index+=8; 247 } 223 248 224 249 (*this)->typeArray = &flatBytes[index]; … … 423 448 bytes[k++] = exactMidBytesLength[i]; 424 449 450 if(confparams_cpr->errorBoundMode>=PW_REL) 451 { 452 doubleToBytes(exactMidBytesLength, tdps->minLogValue); 453 for(i = 0;i < 8; i++) 454 bytes[k++] = exactMidBytesLength[i]; 455 } 456 425 457 memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); 426 458 k += tdps->typeArray_size; … … 522 554 memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size); 523 555 k += tdps->rtypeArray_size; 556 557 if(confparams_cpr->errorBoundMode>=PW_REL) 558 { 559 doubleToBytes(exactMidBytesLength, tdps->minLogValue); 560 for(i = 0;i < 8; i++) 561 bytes[k++] = exactMidBytesLength[i]; 562 } 563 524 564 memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); 525 565 k += tdps->typeArray_size; … … 584 624 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; 585 625 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; 626 627 int minLogValueSize = 0; 586 628 if(confparams_cpr->errorBoundMode>=PW_REL) 587 629 { … … 589 631 radExpoL = 1; 590 632 pwrBoundArrayL = 4; 633 minLogValueSize = 8; 591 634 } 592 635 593 636 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8 594 637 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE 638 + minLogValueSize /*max absolute log value*/ 595 639 + tdps->typeArray_size + tdps->leadNumArray_size 596 640 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; … … 606 650 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; 607 651 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; 652 int minLogValueSize = 0; 608 653 if(confparams_cpr->errorBoundMode>=PW_REL) 609 654 { … … 611 656 radExpoL = 1; 612 657 pwrBoundArrayL = 4; 658 minLogValueSize = 8; 613 659 } 614 660 615 661 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8 616 662 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 8 + tdps->rtypeArray_size 617 + tdps->typeArray_size + tdps->leadNumArray_size663 + minLogValueSize + tdps->typeArray_size + tdps->leadNumArray_size 618 664 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; 619 665 -
TabularUnified thirdparty/SZ/sz/src/TightDataPointStorageF.c ¶
r2c47b73 r9ee2ce3 47 47 (*this)->pwrErrBoundBytes = NULL; 48 48 (*this)->pwrErrBoundBytes_size = 0; 49 50 (*this)->raBytes = NULL; 51 (*this)->raBytes_size = 0; 49 52 } 50 53 … … 82 85 int mode = confparams_dec->szMode; 83 86 int predictionMode = confparams_dec->predictionMode; 87 int losslessCompressor = confparams_dec->losslessCompressor; 84 88 if(confparams_dec!=NULL) 85 89 free(confparams_dec); 86 90 confparams_dec = params; 87 91 confparams_dec->szMode = mode; 92 confparams_dec->losslessCompressor = losslessCompressor; 93 88 94 if(mode==SZ_TEMPORAL_COMPRESSION) 89 95 { … … 93 99 94 100 index += MetaDataByteLength; 101 102 int isRandomAccess = (sameRByte >> 7) & 0x01; 95 103 96 104 unsigned char dsLengthBytes[8]; … … 118 126 else 119 127 (*this)->allSameData = 0; 128 if(isRandomAccess == 1) 129 { 130 (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE; 131 (*this)->raBytes = &(flatBytes[index]); 132 return errorBoundMode; 133 } 120 134 121 135 int rtype_ = sameRByte & 0x08; //=00001000 … … 166 180 for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++) 167 181 byteBuf[i] = flatBytes[index++]; 168 (*this)->rtypeArray_size = bytesToSize(byteBuf);//(ST) 182 (*this)->rtypeArray_size = bytesToSize(byteBuf);//(ST) 169 183 } 170 184 else … … 200 214 } 201 215 216 int minLogValueSize = 0; 217 if(errorBoundMode>=PW_REL) 218 minLogValueSize = 4; 219 202 220 if ((*this)->rtypeArray != NULL) 203 221 { 204 222 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8 205 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - 4 - (*this)->rtypeArray_size206 - (*this)->typeArray_size - (*this)->leadNumArray_size223 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - exe_params->SZ_SIZE_TYPE - 4 - (*this)->rtypeArray_size 224 - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size 207 225 - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; 208 226 for (i = 0; i < (*this)->rtypeArray_size; i++) … … 212 230 { 213 231 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8 214 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - (*this)->typeArray_size232 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size 215 233 - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size; 216 } 234 } 235 236 if(errorBoundMode>=PW_REL) 237 { 238 (*this)->minLogValue = bytesToFloat(&flatBytes[index]); 239 index+=4; 240 } 217 241 218 242 (*this)->typeArray = &flatBytes[index]; … … 419 443 bytes[k++] = exactMidBytesLength[i]; 420 444 445 if(confparams_cpr->errorBoundMode>=PW_REL) 446 { 447 floatToBytes(exactMidBytesLength, tdps->minLogValue); 448 for(i=0;i<4;i++) 449 bytes[k++] = exactMidBytesLength[i]; 450 } 451 421 452 memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); 422 453 k += tdps->typeArray_size; … … 520 551 memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size); 521 552 k += tdps->rtypeArray_size; 553 554 if(confparams_cpr->errorBoundMode>=PW_REL) 555 { 556 floatToBytes(exactMidBytesLength, tdps->minLogValue); 557 for(i=0;i<4;i++) 558 bytes[k++] = exactMidBytesLength[i]; 559 } 560 522 561 memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size); 523 562 k += tdps->typeArray_size; … … 575 614 for (i = 0; i < tdps->exactMidBytes_size; i++) 576 615 (*bytes)[k++] = tdps->exactMidBytes[i]; 616 617 *size = totalByteLength; 618 } 619 else if (tdps->rtypeArray == NULL) 620 { 621 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; 622 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; 623 int minLogValueSize = 0; 624 if(confparams_cpr->errorBoundMode>=PW_REL) 625 { 626 segmentL = exe_params->SZ_SIZE_TYPE; 627 radExpoL = 1; 628 pwrBoundArrayL = 4; 629 minLogValueSize = 4; 630 } 631 632 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8 633 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + minLogValueSize 634 + tdps->typeArray_size + tdps->leadNumArray_size 635 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; 636 637 *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); 638 639 convertTDPStoBytes_float(tdps, *bytes, dsLengthBytes, sameByte); 640 641 *size = totalByteLength; 642 } 643 else //the case with reserved value 644 { 645 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size; 646 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0; 647 int minLogValueSize = 0; 648 if(confparams_cpr->errorBoundMode>=PW_REL) 649 { 650 segmentL = exe_params->SZ_SIZE_TYPE; 651 radExpoL = 1; 652 pwrBoundArrayL = 4; 653 minLogValueSize = 4; 654 } 655 656 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8 657 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4 + tdps->rtypeArray_size 658 + minLogValueSize + tdps->typeArray_size + tdps->leadNumArray_size 659 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; 660 661 sameByte = (unsigned char) (sameByte | 0x08); // 00001000, the 4th bit 662 // denotes whether it is 663 // with "reserved value" 664 665 if(confparams_cpr->errorBoundMode>=PW_REL) 666 sameByte = (unsigned char) (sameByte | 0x10); // 00001000, the 5th bit 667 668 *bytes = (unsigned char*)malloc(sizeof(unsigned char)*totalByteLength); 669 670 convertTDPStoBytes_float_reserve(tdps, *bytes, dsLengthBytes, sameByte); 671 672 *size = totalByteLength; 673 } 674 } 675 676 void convertTDPStoFlatBytes_float_args(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size) 677 { 678 size_t i, k = 0; 679 unsigned char dsLengthBytes[8]; 680 681 if(exe_params->SZ_SIZE_TYPE==4) 682 intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4 683 else 684 longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8 685 686 unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0; 687 sameByte = sameByte | (confparams_cpr->szMode << 1); 688 if(tdps->isLossless) 689 sameByte = (unsigned char) (sameByte | 0x10); 690 if(confparams_cpr->errorBoundMode>=PW_REL) 691 sameByte = (unsigned char) (sameByte | 0x20); // 00100000, the 5th bit 692 if(exe_params->SZ_SIZE_TYPE==8) 693 sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit 694 695 if(tdps->allSameData==1) 696 { 697 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size; 698 //*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength); 699 700 for (i = 0; i < 3; i++)//3 701 bytes[k++] = versionNumber[i]; 702 bytes[k++] = sameByte; 703 704 convertSZParamsToBytes(confparams_cpr, &(bytes[k])); 705 k = k + MetaDataByteLength; 706 707 for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++) 708 bytes[k++] = dsLengthBytes[i]; 709 for (i = 0; i < tdps->exactMidBytes_size; i++) 710 bytes[k++] = tdps->exactMidBytes[i]; 577 711 578 712 *size = totalByteLength; … … 594 728 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size; 595 729 596 *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);597 598 convertTDPStoBytes_float(tdps, *bytes, dsLengthBytes, sameByte);599 600 *size = totalByteLength;601 }602 else //the case with reserved value603 {604 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;605 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;606 if(confparams_cpr->errorBoundMode>=PW_REL)607 {608 segmentL = exe_params->SZ_SIZE_TYPE;609 radExpoL = 1;610 pwrBoundArrayL = 4;611 }612 613 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8614 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4 + tdps->rtypeArray_size615 + tdps->typeArray_size + tdps->leadNumArray_size616 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;617 618 sameByte = (unsigned char) (sameByte | 0x08); // 00001000, the 4th bit619 // denotes whether it is620 // with "reserved value"621 622 if(confparams_cpr->errorBoundMode>=PW_REL)623 sameByte = (unsigned char) (sameByte | 0x10); // 00001000, the 5th bit624 625 *bytes = (unsigned char*)malloc(sizeof(unsigned char)*totalByteLength);626 627 convertTDPStoBytes_float_reserve(tdps, *bytes, dsLengthBytes, sameByte);628 629 *size = totalByteLength;630 }631 }632 633 void convertTDPStoFlatBytes_float_args(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size)634 {635 size_t i, k = 0;636 unsigned char dsLengthBytes[8];637 638 if(exe_params->SZ_SIZE_TYPE==4)639 intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4640 else641 longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8642 643 unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0;644 sameByte = sameByte | (confparams_cpr->szMode << 1);645 if(tdps->isLossless)646 sameByte = (unsigned char) (sameByte | 0x10);647 if(confparams_cpr->errorBoundMode>=PW_REL)648 sameByte = (unsigned char) (sameByte | 0x20); // 00100000, the 5th bit649 if(exe_params->SZ_SIZE_TYPE==8)650 sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit651 652 if(tdps->allSameData==1)653 {654 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size;655 //*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);656 657 for (i = 0; i < 3; i++)//3658 bytes[k++] = versionNumber[i];659 bytes[k++] = sameByte;660 661 convertSZParamsToBytes(confparams_cpr, &(bytes[k]));662 k = k + MetaDataByteLength;663 664 for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)665 bytes[k++] = dsLengthBytes[i];666 for (i = 0; i < tdps->exactMidBytes_size; i++)667 bytes[k++] = tdps->exactMidBytes[i];668 669 *size = totalByteLength;670 }671 else if (tdps->rtypeArray == NULL)672 {673 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;674 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;675 if(confparams_cpr->errorBoundMode>=PW_REL)676 {677 segmentL = exe_params->SZ_SIZE_TYPE;678 radExpoL = 1;679 pwrBoundArrayL = 4;680 }681 682 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8683 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE684 + tdps->typeArray_size + tdps->leadNumArray_size685 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;686 687 730 convertTDPStoBytes_float(tdps, bytes, dsLengthBytes, sameByte); 688 731 … … 722 765 * */ 723 766 void free_TightDataPointStorageF(TightDataPointStorageF *tdps) 724 { 767 { 725 768 if(tdps->rtypeArray!=NULL) 726 769 free(tdps->rtypeArray); -
TabularUnified thirdparty/SZ/sz/src/TypeManager.c ¶
r2c47b73 r9ee2ce3 44 44 return byteLength; 45 45 } 46 46 47 size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result) 48 { 49 size_t byteLength = 0; 50 size_t i, j; 51 if(intArrayLength%8==0) 52 byteLength = intArrayLength/8; 53 else 54 byteLength = intArrayLength/8+1; 55 56 size_t n = 0; 57 int tmp, type; 58 for(i = 0;i<byteLength;i++) 59 { 60 tmp = 0; 61 for(j = 0;j<8&&n<intArrayLength;j++) 62 { 63 type = intArray[n]; 64 if(type == 1) 65 tmp = (tmp | (1 << (7-j))); 66 n++; 67 } 68 result[i] = (unsigned char)tmp; 69 } 70 return byteLength; 71 } 72 47 73 void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray) 48 74 { … … 149 175 } 150 176 177 size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result) 178 { 179 size_t i, j, byteLength = 0; 180 if(timeStepTypeLength%4==0) 181 byteLength = timeStepTypeLength*2/8; 182 else 183 byteLength = timeStepTypeLength*2/8+1; 184 185 size_t n = 0; 186 for(i = 0;i<byteLength;i++) 187 { 188 int tmp = 0; 189 for(j = 0;j<4&&n<timeStepTypeLength;j++) 190 { 191 int type = timeStepType[n]; 192 switch(type) 193 { 194 case 0: 195 196 break; 197 case 1: 198 tmp = (tmp | (1 << (6-j*2))); 199 break; 200 case 2: 201 tmp = (tmp | (2 << (6-j*2))); 202 break; 203 case 3: 204 tmp = (tmp | (3 << (6-j*2))); 205 break; 206 default: 207 printf("Error: wrong timestep type...: type[%zu]=%d\n", n, type); 208 exit(0); 209 } 210 n++; 211 } 212 result[i] = (unsigned char)tmp; 213 } 214 return byteLength; 215 } 216 151 217 void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray) 152 218 { … … 292 358 } 293 359 294 in t getLeftMovingSteps(size_t k, unsigned char resiBitLength)360 inline int getLeftMovingSteps(size_t k, unsigned char resiBitLength) 295 361 { 296 362 return 8 - k%8 - resiBitLength; -
TabularUnified thirdparty/SZ/sz/src/callZlib.c ¶
r2c47b73 r9ee2ce3 26 26 return SZ_NSCS; \ 27 27 } \ 28 } 29 30 int isZlibFormat(unsigned char magic1, unsigned char magic2) 31 { 32 if(magic1==104&&magic2==5) //DC+BS 33 return 1; 34 if(magic1==104&&magic2==129) //DC+DC 35 return 1; 36 if(magic1==104&&magic2==222) //DC+BC 37 return 1; 38 if(magic1==120&&magic2==1) //BC+BS 39 return 1; 40 if(magic1==120&&magic2==94) //BC+? 41 return 1; 42 if(magic1==120&&magic2==156) //BC+DC 43 return 1; 44 if(magic1==120&&magic2==218) //BC+BS 45 return 1; 46 return 0; 28 47 } 29 48 … … 196 215 strm.opaque = Z_NULL; 197 216 ret = deflateInit(&strm, level); 217 //int windowBits = 15; 218 //ret = deflateInit2(&strm, level, Z_DEFLATED, windowBits, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);//Z_FIXED); //Z_DEFAULT_STRATEGY 219 198 220 if (ret != Z_OK) 199 221 return ret; -
TabularUnified thirdparty/SZ/sz/src/conf.c ¶
r2c47b73 r9ee2ce3 103 103 104 104 confparams_cpr->szMode = SZ_BEST_COMPRESSION; 105 106 confparams_cpr->gzipMode = 1; //fast mode 105 confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; //other option: GZIP_COMPRESSOR; 106 if(confparams_cpr->losslessCompressor==ZSTD_COMPRESSOR) 107 confparams_cpr->gzipMode = 3; //fast mode 108 else 109 confparams_cpr->gzipMode = 1; //high speed mode 107 110 108 111 confparams_cpr->errorBoundMode = PSNR; 109 112 confparams_cpr->psnr = 90; 113 confparams_cpr->absErrBound = 1E-4; 114 confparams_cpr->relBoundRatio = 1E-4; 110 115 111 116 confparams_cpr->pw_relBoundRatio = 1E-3; … … 115 120 116 121 confparams_cpr->snapshotCmprStep = 5; 122 123 sz_with_regression = SZ_WITH_LINEAR_REGRESSION; 117 124 118 125 return SZ_SCES; … … 213 220 } 214 221 215 modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", NULL); 222 modeBuf = iniparser_getstring(ini, "PARAMETER:losslessCompressor", "ZSTD_COMPRESSOR"); 223 if(strcmp(modeBuf, "GZIP_COMPRESSOR")==0) 224 confparams_cpr->losslessCompressor = GZIP_COMPRESSOR; 225 else if(strcmp(modeBuf, "ZSTD_COMPRESSOR")==0) 226 confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; 227 else 228 { 229 printf("[SZ] Error: Wrong losslessCompressor setting (please check sz.config file)\n");\ 230 printf("No Such a lossless compressor: %s\n", modeBuf); 231 iniparser_freedict(ini); 232 return SZ_NSCS; 233 } 234 235 modeBuf = iniparser_getstring(ini, "PARAMETER:withLinearRegression", "YES"); 236 if(strcmp(modeBuf, "YES")==0 || strcmp(modeBuf, "yes")==0) 237 sz_with_regression = SZ_WITH_LINEAR_REGRESSION; 238 else 239 sz_with_regression = SZ_NO_REGRESSION; 240 241 modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", "Gzip_BEST_SPEED"); 216 242 if(modeBuf==NULL) 217 243 { … … 233 259 return SZ_NSCS; 234 260 } 261 262 modeBuf = iniparser_getstring(ini, "PARAMETER:zstdMode", "Zstd_HIGH_SPEED"); 263 if(modeBuf==NULL) 264 { 265 printf("[SZ] Error: Null Zstd mode setting (please check sz.config file)\n"); 266 iniparser_freedict(ini); 267 return SZ_NSCS; 268 } 269 else if(strcmp(modeBuf, "Zstd_BEST_SPEED")==0) 270 confparams_cpr->gzipMode = 1; 271 else if(strcmp(modeBuf, "Zstd_HIGH_SPEED")==0) 272 confparams_cpr->gzipMode = 3; 273 else if(strcmp(modeBuf, "Zstd_HIGH_COMPRESSION")==0) 274 confparams_cpr->gzipMode = 19; 275 else if(strcmp(modeBuf, "Zstd_BEST_COMPRESSION")==0) 276 confparams_cpr->gzipMode = 22; 277 else if(strcmp(modeBuf, "Zstd_DEFAULT_COMPRESSION")==0) 278 confparams_cpr->gzipMode = 3; 279 else 280 { 281 printf("[SZ] Error: Wrong zstd Mode (please check sz.config file)\n"); 282 return SZ_NSCS; 283 } 235 284 236 285 //TODO -
TabularUnified thirdparty/SZ/sz/src/dataCompression.c ¶
r2c47b73 r9ee2ce3 67 67 { 68 68 unsigned int* data = (unsigned int*)oriData; 69 int data_;69 unsigned int data_; 70 70 min = data[0], max = min; 71 71 computeMinMax(data); … … 74 74 { 75 75 int* data = (int*)oriData; 76 unsignedint data_;76 int data_; 77 77 min = data[0], max = min; 78 78 computeMinMax(data); … … 596 596 return k; 597 597 } 598 599 //The following functions are float-precision version of dealing with the unpredictable data points 600 int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData) 601 { 602 float valueRangeSize; 603 604 computeRangeSize_float(oriData, nbEle, &valueRangeSize, medianValue); 605 short radExpo = getExponent_float(valueRangeSize/2); 606 607 int reqLength; 608 computeReqLength_float(precision, radExpo, &reqLength, medianValue); 609 610 *reqBytesLength = reqLength/8; 611 *resiBitsLength = reqLength%8; 612 613 size_t i = 0; 614 for(i = 0;i < nbEle;i++) 615 { 616 float normValue = oriData[i] - *medianValue; 617 618 lfloat lfBuf; 619 lfBuf.value = normValue; 620 621 int ignBytesLength = 32 - reqLength; 622 if(ignBytesLength<0) 623 ignBytesLength = 0; 624 625 lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength; 626 627 //float tmpValue = lfBuf.value; 628 629 decData[i] = lfBuf.value + *medianValue; 630 } 631 return reqLength; 632 } 633 634 /** 635 * @param float* oriData: inplace argument (input / output) 636 * 637 * */ 638 int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 639 int reqLength, int reqBytesLength, int resiBitsLength, float medianValue) 640 { 641 //allocate memory for coefficient compression arrays 642 DynamicIntArray *exactLeadNumArray; 643 new_DIA(&exactLeadNumArray, DynArrayInitLen); 644 DynamicByteArray *exactMidByteArray; 645 new_DBA(&exactMidByteArray, DynArrayInitLen); 646 DynamicIntArray *resiBitArray; 647 new_DIA(&resiBitArray, DynArrayInitLen); 648 unsigned char preDataBytes[4] = {0,0,0,0}; 649 650 //allocate memory for vce and lce 651 FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement)); 652 LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); 653 654 size_t i = 0; 655 for(i = 0;i < nbEle;i++) 656 { 657 compressSingleFloatValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength); 658 updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); 659 memcpy(preDataBytes,vce->curBytes,4); 660 addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); 661 oriData[i] = vce->data; 662 } 663 convertDIAtoInts(exactLeadNumArray, leadArray); 664 convertDBAtoBytes(exactMidByteArray,midArray); 665 convertDIAtoInts(resiBitArray, resiArray); 666 667 size_t midArraySize = exactMidByteArray->size; 668 669 free(vce); 670 free(lce); 671 672 free_DIA(exactLeadNumArray); 673 free_DBA(exactMidByteArray); 674 free_DIA(resiBitArray); 675 676 return midArraySize; 677 } 678 679 void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData) 680 { 681 *decData = (float*)malloc(nbEle*sizeof(float)); 682 size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0; 683 float exactData = 0; 684 unsigned char preBytes[4] = {0,0,0,0}; 685 unsigned char curBytes[4]; 686 int resiBits; 687 unsigned char leadingNum; 688 689 int reqBytesLength = reqLength/8; 690 int resiBitsLength = reqLength%8; 691 692 for(i = 0; i<nbEle;i++) 693 { 694 // compute resiBits 695 resiBits = 0; 696 if (resiBitsLength != 0) { 697 int kMod8 = k % 8; 698 int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); 699 if (rightMovSteps > 0) { 700 int code = getRightMovingCode(kMod8, resiBitsLength); 701 resiBits = (residualMidBits[p] & code) >> rightMovSteps; 702 } else if (rightMovSteps < 0) { 703 int code1 = getLeftMovingCode(kMod8); 704 int code2 = getRightMovingCode(kMod8, resiBitsLength); 705 int leftMovSteps = -rightMovSteps; 706 rightMovSteps = 8 - leftMovSteps; 707 resiBits = (residualMidBits[p] & code1) << leftMovSteps; 708 p++; 709 resiBits = resiBits 710 | ((residualMidBits[p] & code2) >> rightMovSteps); 711 } else // rightMovSteps == 0 712 { 713 int code = getRightMovingCode(kMod8, resiBitsLength); 714 resiBits = (residualMidBits[p] & code); 715 p++; 716 } 717 k += resiBitsLength; 718 } 719 720 // recover the exact data 721 memset(curBytes, 0, 4); 722 leadingNum = leadNum[l++]; 723 memcpy(curBytes, preBytes, leadingNum); 724 for (j = leadingNum; j < reqBytesLength; j++) 725 curBytes[j] = exactMidBytes[curByteIndex++]; 726 if (resiBitsLength != 0) { 727 unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); 728 curBytes[reqBytesLength] = resiByte; 729 } 730 731 exactData = bytesToFloat(curBytes); 732 (*decData)[i] = exactData + medianValue; 733 memcpy(preBytes,curBytes,4); 734 } 735 } 736 737 //double-precision version of dealing with unpredictable data points in sz 2.0 738 int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData) 739 { 740 double valueRangeSize; 741 742 computeRangeSize_double(oriData, nbEle, &valueRangeSize, medianValue); 743 short radExpo = getExponent_double(valueRangeSize/2); 744 745 int reqLength; 746 computeReqLength_double(precision, radExpo, &reqLength, medianValue); 747 748 *reqBytesLength = reqLength/8; 749 *resiBitsLength = reqLength%8; 750 751 size_t i = 0; 752 for(i = 0;i < nbEle;i++) 753 { 754 double normValue = oriData[i] - *medianValue; 755 756 ldouble ldBuf; 757 ldBuf.value = normValue; 758 759 int ignBytesLength = 64 - reqLength; 760 if(ignBytesLength<0) 761 ignBytesLength = 0; 762 763 ldBuf.lvalue = (ldBuf.lvalue >> ignBytesLength) << ignBytesLength; 764 765 decData[i] = ldBuf.value + *medianValue; 766 } 767 return reqLength; 768 } 769 770 /** 771 * @param double* oriData: inplace argument (input / output) 772 * 773 * */ 774 int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 775 int reqLength, int reqBytesLength, int resiBitsLength, double medianValue) 776 { 777 //allocate memory for coefficient compression arrays 778 DynamicIntArray *exactLeadNumArray; 779 new_DIA(&exactLeadNumArray, DynArrayInitLen); 780 DynamicByteArray *exactMidByteArray; 781 new_DBA(&exactMidByteArray, DynArrayInitLen); 782 DynamicIntArray *resiBitArray; 783 new_DIA(&resiBitArray, DynArrayInitLen); 784 unsigned char preDataBytes[8] = {0,0,0,0,0,0,0,0}; 785 786 //allocate memory for vce and lce 787 DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement)); 788 LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement)); 789 790 size_t i = 0; 791 for(i = 0;i < nbEle;i++) 792 { 793 compressSingleDoubleValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength); 794 updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce); 795 memcpy(preDataBytes,vce->curBytes,8); 796 addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce); 797 oriData[i] = vce->data; 798 } 799 convertDIAtoInts(exactLeadNumArray, leadArray); 800 convertDBAtoBytes(exactMidByteArray,midArray); 801 convertDIAtoInts(resiBitArray, resiArray); 802 803 size_t midArraySize = exactMidByteArray->size; 804 805 free(vce); 806 free(lce); 807 808 free_DIA(exactLeadNumArray); 809 free_DBA(exactMidByteArray); 810 free_DIA(resiBitArray); 811 812 return midArraySize; 813 } 814 815 void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData) 816 { 817 *decData = (double*)malloc(nbEle*sizeof(double)); 818 size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0; 819 double exactData = 0; 820 unsigned char preBytes[8] = {0,0,0,0,0,0,0,0}; 821 unsigned char curBytes[8]; 822 int resiBits; 823 unsigned char leadingNum; 824 825 int reqBytesLength = reqLength/8; 826 int resiBitsLength = reqLength%8; 827 828 for(i = 0; i<nbEle;i++) 829 { 830 // compute resiBits 831 resiBits = 0; 832 if (resiBitsLength != 0) { 833 int kMod8 = k % 8; 834 int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength); 835 if (rightMovSteps > 0) { 836 int code = getRightMovingCode(kMod8, resiBitsLength); 837 resiBits = (residualMidBits[p] & code) >> rightMovSteps; 838 } else if (rightMovSteps < 0) { 839 int code1 = getLeftMovingCode(kMod8); 840 int code2 = getRightMovingCode(kMod8, resiBitsLength); 841 int leftMovSteps = -rightMovSteps; 842 rightMovSteps = 8 - leftMovSteps; 843 resiBits = (residualMidBits[p] & code1) << leftMovSteps; 844 p++; 845 resiBits = resiBits 846 | ((residualMidBits[p] & code2) >> rightMovSteps); 847 } else // rightMovSteps == 0 848 { 849 int code = getRightMovingCode(kMod8, resiBitsLength); 850 resiBits = (residualMidBits[p] & code); 851 p++; 852 } 853 k += resiBitsLength; 854 } 855 856 // recover the exact data 857 memset(curBytes, 0, 8); 858 leadingNum = leadNum[l++]; 859 memcpy(curBytes, preBytes, leadingNum); 860 for (j = leadingNum; j < reqBytesLength; j++) 861 curBytes[j] = exactMidBytes[curByteIndex++]; 862 if (resiBitsLength != 0) { 863 unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength)); 864 curBytes[reqBytesLength] = resiByte; 865 } 866 867 exactData = bytesToDouble(curBytes); 868 (*decData)[i] = exactData + medianValue; 869 memcpy(preBytes,curBytes,8); 870 } 871 } -
TabularUnified thirdparty/SZ/sz/src/sz.c ¶
r2c47b73 r9ee2ce3 23 23 #include "Huffman.h" 24 24 #include "conf.h" 25 #include "utility.h" 25 26 //#include "CurveFillingCompressStorage.h" 26 27 … … 36 37 37 38 sz_exedata *exe_params = NULL; 39 40 int sz_with_regression = SZ_WITH_LINEAR_REGRESSION; //SZ_NO_REGRESSION 38 41 39 42 /*following global variables are desgined for time-series based compression*/ … … 70 73 int SZ_Init_Params(sz_params *params) 71 74 { 72 int x = 1; 73 char *y = (char*)&x; 74 int endianType = BIG_ENDIAN_SYSTEM; 75 if(*y==1) endianType = LITTLE_ENDIAN_SYSTEM; 76 77 sysEndianType = endianType; 78 exe_params->SZ_SIZE_TYPE = sizeof(size_t); 79 80 // set default values 81 if(params->max_quant_intervals > 0) 75 SZ_Init(NULL); 76 77 if(params->losslessCompressor!=GZIP_COMPRESSOR && params->losslessCompressor!=ZSTD_COMPRESSOR) 78 params->losslessCompressor = ZSTD_COMPRESSOR; 79 80 if(params->max_quant_intervals > 0) 82 81 params->maxRangeRadius = params->max_quant_intervals/2; 83 else 84 params->max_quant_intervals = params->maxRangeRadius*2; 85 86 exe_params->intvCapacity = params->maxRangeRadius*2; 87 exe_params->intvRadius = params->maxRangeRadius; 88 89 if(params->quantization_intervals>0) 90 { 91 updateQuantizationInfo(params->quantization_intervals); 92 exe_params->optQuantMode = 0; 93 } 94 else 95 exe_params->optQuantMode = 1; 96 82 83 memcpy(confparams_cpr, params, sizeof(sz_params)); 97 84 98 85 if(params->quantization_intervals%2!=0) … … 101 88 return SZ_NSCS; 102 89 } 103 104 confparams_cpr = (sz_params*)malloc(sizeof(sz_params));105 memcpy(confparams_cpr, params, sizeof(sz_params));106 90 107 91 return SZ_SCES; … … 536 520 //confparams_dec->szMode = (sameRByte & 0x06)>>1; 537 521 isLossless = (sameRByte & 0x10)>>4; 522 523 int isRandomAccess = (sameRByte >> 7) & 0x01; 524 525 if(exe_params==NULL) 526 { 527 exe_params = (sz_exedata *)malloc(sizeof(struct sz_exedata)); 528 memset(exe_params, 0, sizeof(struct sz_exedata)); 529 } 538 530 exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4; 539 531 … … 548 540 dataSeriesLength = bytesToSize(&(bytes[index]));// 4 or 8 549 541 index += exe_params->SZ_SIZE_TYPE; 550 index += 4; //max_quant_intervals551 542 //index += 4; //max_quant_intervals 543 552 544 sz_metadata* metadata = (sz_metadata*)malloc(sizeof(struct sz_metadata)); 553 545 … … 565 557 if(isConstant==0 && isLossless==0) 566 558 { 567 int radExpoL = 0, segmentL = 0, pwrErrBoundBytesL = 0; 568 if(metadata->conf_params->errorBoundMode >= PW_REL) 559 if(isRandomAccess==1) 569 560 { 570 radExpoL = 1; 571 segmentL = exe_params->SZ_SIZE_TYPE; 572 pwrErrBoundBytesL = 4; 561 unsigned char* raBytes = &(bytes[index]); 562 defactoNBBins = bytesToInt_bigEndian(raBytes + sizeof(int) + sizeof(double)); 573 563 } 574 575 int offset_typearray = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + 4 + 1 + 8 576 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE; 577 size_t nodeCount = bytesToInt_bigEndian(bytes+offset_typearray); 578 defactoNBBins = (nodeCount+1)/2; 579 } 564 else 565 { 566 int radExpoL = 0, segmentL = 0, pwrErrBoundBytesL = 0; 567 if(metadata->conf_params->errorBoundMode >= PW_REL) 568 { 569 radExpoL = 1; 570 segmentL = exe_params->SZ_SIZE_TYPE; 571 pwrErrBoundBytesL = 4; 572 } 573 574 int offset_typearray = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + (4 + params->dataType*4) + 1 + 8 575 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4; 576 defactoNBBins = bytesToInt_bigEndian(bytes+offset_typearray); 577 } 578 579 } 580 580 581 581 metadata->defactoNBBins = defactoNBBins; … … 778 778 } 779 779 return totalSize; 780 }781 782 int isZlibFormat(unsigned char magic1, unsigned char magic2)783 {784 if(magic1==104&&magic2==5) //DC+BS785 return 1;786 if(magic1==104&&magic2==129) //DC+DC787 return 1;788 if(magic1==104&&magic2==222) //DC+BC789 return 1;790 if(magic1==120&&magic2==1) //BC+BS791 return 1;792 if(magic1==120&&magic2==156) //BC+DC793 return 1;794 if(magic1==120&&magic2==218) //BC+BS795 return 1;796 return 0;797 780 } 798 781 -
TabularUnified thirdparty/SZ/sz/src/sz_double.c ¶
r2c47b73 r9ee2ce3 26 26 #include "rw.h" 27 27 #include "sz_double_ts.h" 28 #include "utility.h" 28 29 29 30 unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize) … … 329 330 pred = last3CmprsData[0]; 330 331 predAbsErr = fabs(curData - pred); 331 if(predAbsErr< =checkRadius)332 if(predAbsErr<checkRadius) 332 333 { 333 334 state = (predAbsErr/realPrecision+1)/2; … … 1517 1518 if(errBoundMode>=PW_REL) 1518 1519 { 1519 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max);1520 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);1520 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); 1521 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); 1521 1522 } 1522 1523 else … … 1563 1564 return SZ_NSCS; 1564 1565 } 1565 } 1566 } 1566 1567 1567 1568 int status = SZ_SCES; … … 1601 1602 if(confparams_cpr->errorBoundMode>=PW_REL) 1602 1603 { 1603 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max); 1604 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, 1605 valueRangeSize, medianValue, &tmpOutSize); 1604 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); 1605 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); 1606 1606 } 1607 1607 else 1608 1608 #ifdef HAVE_TIMECMPR 1609 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 1609 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 1610 1610 multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1611 1611 else … … 1617 1617 { 1618 1618 if(confparams_cpr->errorBoundMode>=PW_REL) 1619 SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr (&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max);1619 SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); 1620 1620 else 1621 1621 #ifdef HAVE_TIMECMPR … … 1624 1624 else 1625 1625 #endif 1626 SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1626 { 1627 if(sz_with_regression == SZ_NO_REGRESSION) 1628 SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1629 else 1630 tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); 1631 } 1627 1632 } 1628 1633 else … … 1630 1635 { 1631 1636 if(confparams_cpr->errorBoundMode>=PW_REL) 1632 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max);1637 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); 1633 1638 else 1634 1639 #ifdef HAVE_TIMECMPR … … 1637 1642 else 1638 1643 #endif 1639 SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1644 { 1645 if(sz_with_regression == SZ_NO_REGRESSION) 1646 SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1647 else 1648 tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); 1649 } 1650 1651 1640 1652 } 1641 1653 else … … 1643 1655 { 1644 1656 if(confparams_cpr->errorBoundMode>=PW_REL) 1645 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max);1657 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); 1646 1658 else 1647 1659 #ifdef HAVE_TIMECMPR … … 1649 1661 multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1650 1662 else 1651 #endif 1652 SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1663 #endif 1664 { 1665 if(sz_with_regression == SZ_NO_REGRESSION) 1666 SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1667 else 1668 tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); 1669 } 1670 1653 1671 } 1654 1672 else … … 1666 1684 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1667 1685 { 1668 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1686 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1669 1687 free(tmpByteData); 1670 1688 } … … 3122 3140 { 3123 3141 radiusIndex = confparams_cpr->maxRangeRadius - 1; 3124 //printf("radiusIndex=%d\n", radiusIndex);3125 3142 } 3126 3143 intervals[radiusIndex]++; 3127 // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData);3128 // fflush(stdout);3129 3144 offset_count += confparams_cpr->sampleDistance; 3130 3145 if(offset_count >= r3){ … … 3142 3157 else data_pos += confparams_cpr->sampleDistance; 3143 3158 } 3144 // printf("sample_count: %ld\n", sample_count);3145 // fflush(stdout);3146 // if(*max_freq < 0.15) *max_freq *= 2;3147 3159 //compute the appropriate number 3148 3160 size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; … … 3162 3174 powerOf2 = 32; 3163 3175 free(intervals); 3164 //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);3165 3176 return powerOf2; 3166 3177 } … … 3173 3184 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3174 3185 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3175 size_t totalSampleSize = 0; //(r1-1)*(r2-1)/confparams_cpr->sampleDistance;3186 size_t totalSampleSize = 0; 3176 3187 3177 3188 size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset … … 3227 3238 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3228 3239 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3229 size_t totalSampleSize = 0; //dataLength/confparams_cpr->sampleDistance;3240 size_t totalSampleSize = 0; 3230 3241 3231 3242 double * data_pos = oriData + 2; 3232 3243 while(data_pos - oriData < dataLength){ 3233 3244 totalSampleSize++; 3234 //pred_value = 2*data_pos[-1] - data_pos[-2];3235 3245 pred_value = data_pos[-1]; 3236 3246 pred_err = fabs(pred_value - *data_pos); … … 3261 3271 3262 3272 free(intervals); 3263 //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);3264 3273 return powerOf2; 3265 3274 } 3275 3276 /*The above code is for sz 1.4.13; the following code is for sz 2.0*/ 3277 unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) 3278 { 3279 double mean = 0.0; 3280 size_t len = r1 * r2; 3281 size_t mean_distance = (int) (sqrt(len)); 3282 3283 double * data_pos = oriData; 3284 size_t mean_count = 0; 3285 while(data_pos - oriData < len){ 3286 mean += *data_pos; 3287 mean_count ++; 3288 data_pos += mean_distance; 3289 } 3290 if(mean_count > 0) mean /= mean_count; 3291 size_t range = 8192; 3292 size_t radius = 4096; 3293 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 3294 memset(freq_intervals, 0, range*sizeof(size_t)); 3295 3296 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 3297 int sampleDistance = confparams_cpr->sampleDistance; 3298 double predThreshold = confparams_cpr->predThreshold; 3299 3300 size_t i; 3301 size_t radiusIndex; 3302 double pred_value = 0, pred_err; 3303 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 3304 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 3305 3306 double mean_diff; 3307 ptrdiff_t freq_index; 3308 size_t freq_count = 0; 3309 size_t n1_count = 1; 3310 size_t offset_count = sampleDistance - 1; 3311 size_t offset_count_2 = 0; 3312 size_t sample_count = 0; 3313 data_pos = oriData + r2 + offset_count; 3314 while(data_pos - oriData < len){ 3315 pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; 3316 pred_err = fabs(pred_value - *data_pos); 3317 if(pred_err < realPrecision) freq_count ++; 3318 radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); 3319 if(radiusIndex>=maxRangeRadius) 3320 radiusIndex = maxRangeRadius - 1; 3321 intervals[radiusIndex]++; 3322 3323 mean_diff = *data_pos - mean; 3324 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 3325 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 3326 if(freq_index <= 0){ 3327 freq_intervals[0] ++; 3328 } 3329 else if(freq_index >= range){ 3330 freq_intervals[range - 1] ++; 3331 } 3332 else{ 3333 freq_intervals[freq_index] ++; 3334 } 3335 offset_count += sampleDistance; 3336 if(offset_count >= r2){ 3337 n1_count ++; 3338 offset_count_2 = n1_count % sampleDistance; 3339 data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 3340 offset_count = (sampleDistance - offset_count_2); 3341 if(offset_count == 0) offset_count ++; 3342 } 3343 else data_pos += sampleDistance; 3344 sample_count ++; 3345 } 3346 *max_freq = freq_count * 1.0/ sample_count; 3347 3348 //compute the appropriate number 3349 size_t targetCount = sample_count*predThreshold; 3350 size_t sum = 0; 3351 for(i=0;i<maxRangeRadius;i++) 3352 { 3353 sum += intervals[i]; 3354 if(sum>targetCount) 3355 break; 3356 } 3357 if(i>=maxRangeRadius) 3358 i = maxRangeRadius-1; 3359 unsigned int accIntervals = 2*(i+1); 3360 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 3361 3362 if(powerOf2<32) 3363 powerOf2 = 32; 3364 3365 // collect frequency 3366 size_t max_sum = 0; 3367 size_t max_index = 0; 3368 size_t tmp_sum; 3369 size_t * freq_pos = freq_intervals + 1; 3370 for(size_t i=1; i<range-2; i++){ 3371 tmp_sum = freq_pos[0] + freq_pos[1]; 3372 if(tmp_sum > max_sum){ 3373 max_sum = tmp_sum; 3374 max_index = i; 3375 } 3376 freq_pos ++; 3377 } 3378 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 3379 *mean_freq = max_sum * 1.0 / sample_count; 3380 3381 free(freq_intervals); 3382 free(intervals); 3383 return powerOf2; 3384 } 3385 3386 unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq) 3387 { 3388 double mean = 0.0; 3389 size_t len = r1 * r2 * r3; 3390 size_t mean_distance = (int) (sqrt(len)); 3391 double * data_pos = oriData; 3392 size_t offset_count = 0; 3393 size_t offset_count_2 = 0; 3394 size_t mean_count = 0; 3395 while(data_pos - oriData < len){ 3396 mean += *data_pos; 3397 mean_count ++; 3398 data_pos += mean_distance; 3399 offset_count += mean_distance; 3400 offset_count_2 += mean_distance; 3401 if(offset_count >= r3){ 3402 offset_count = 0; 3403 data_pos -= 1; 3404 } 3405 if(offset_count_2 >= r2 * r3){ 3406 offset_count_2 = 0; 3407 data_pos -= 1; 3408 } 3409 } 3410 if(mean_count > 0) mean /= mean_count; 3411 size_t range = 8192; 3412 size_t radius = 4096; 3413 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 3414 memset(freq_intervals, 0, range*sizeof(size_t)); 3415 3416 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 3417 int sampleDistance = confparams_cpr->sampleDistance; 3418 double predThreshold = confparams_cpr->predThreshold; 3419 3420 size_t i; 3421 size_t radiusIndex; 3422 size_t r23=r2*r3; 3423 double pred_value = 0, pred_err; 3424 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 3425 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 3426 3427 double mean_diff; 3428 ptrdiff_t freq_index; 3429 size_t freq_count = 0; 3430 size_t sample_count = 0; 3431 3432 offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset 3433 data_pos = oriData + r23 + r3 + offset_count; 3434 size_t n1_count = 1, n2_count = 1; // count i,j sum 3435 3436 while(data_pos - oriData < len){ 3437 3438 pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; 3439 pred_err = fabs(pred_value - *data_pos); 3440 if(pred_err < realPrecision) freq_count ++; 3441 radiusIndex = (pred_err/realPrecision+1)/2; 3442 if(radiusIndex>=maxRangeRadius) 3443 { 3444 radiusIndex = maxRangeRadius - 1; 3445 } 3446 intervals[radiusIndex]++; 3447 3448 mean_diff = *data_pos - mean; 3449 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 3450 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 3451 if(freq_index <= 0){ 3452 freq_intervals[0] ++; 3453 } 3454 else if(freq_index >= range){ 3455 freq_intervals[range - 1] ++; 3456 } 3457 else{ 3458 freq_intervals[freq_index] ++; 3459 } 3460 offset_count += sampleDistance; 3461 if(offset_count >= r3){ 3462 n2_count ++; 3463 if(n2_count == r2){ 3464 n1_count ++; 3465 n2_count = 1; 3466 data_pos += r3; 3467 } 3468 offset_count_2 = (n1_count + n2_count) % sampleDistance; 3469 data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 3470 offset_count = (sampleDistance - offset_count_2); 3471 if(offset_count == 0) offset_count ++; 3472 } 3473 else data_pos += sampleDistance; 3474 sample_count ++; 3475 } 3476 *max_freq = freq_count * 1.0/ sample_count; 3477 3478 //compute the appropriate number 3479 size_t targetCount = sample_count*predThreshold; 3480 size_t sum = 0; 3481 for(i=0;i<maxRangeRadius;i++) 3482 { 3483 sum += intervals[i]; 3484 if(sum>targetCount) 3485 break; 3486 } 3487 if(i>=maxRangeRadius) 3488 i = maxRangeRadius-1; 3489 unsigned int accIntervals = 2*(i+1); 3490 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 3491 3492 if(powerOf2<32) 3493 powerOf2 = 32; 3494 // collect frequency 3495 size_t max_sum = 0; 3496 size_t max_index = 0; 3497 size_t tmp_sum; 3498 size_t * freq_pos = freq_intervals + 1; 3499 for(size_t i=1; i<range-2; i++){ 3500 tmp_sum = freq_pos[0] + freq_pos[1]; 3501 if(tmp_sum > max_sum){ 3502 max_sum = tmp_sum; 3503 max_index = i; 3504 } 3505 freq_pos ++; 3506 } 3507 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 3508 *mean_freq = max_sum * 1.0 / sample_count; 3509 3510 free(freq_intervals); 3511 free(intervals); 3512 return powerOf2; 3513 } 3514 3515 #define MIN(a, b) a<b? a : b 3516 unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ 3517 3518 unsigned int quantization_intervals; 3519 double sz_sample_correct_freq = -1;//0.5; //-1 3520 double dense_pos; 3521 double mean_flush_freq; 3522 unsigned char use_mean = 0; 3523 3524 if(exe_params->optQuantMode==1) 3525 { 3526 quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 3527 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 3528 updateQuantizationInfo(quantization_intervals); 3529 } 3530 else{ 3531 quantization_intervals = exe_params->intvCapacity; 3532 } 3533 3534 // calculate block dims 3535 size_t num_x, num_y; 3536 size_t block_size = 16; 3537 3538 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 3539 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 3540 3541 size_t split_index_x, split_index_y; 3542 size_t early_blockcount_x, early_blockcount_y; 3543 size_t late_blockcount_x, late_blockcount_y; 3544 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 3545 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 3546 3547 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; 3548 size_t num_blocks = num_x * num_y; 3549 size_t num_elements = r1 * r2; 3550 3551 size_t dim0_offset = r2; 3552 3553 int * result_type = (int *) malloc(num_elements * sizeof(int)); 3554 size_t unpred_data_max_size = max_num_block_elements; 3555 double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); 3556 size_t total_unpred = 0; 3557 size_t unpredictable_count; 3558 double * data_pos = oriData; 3559 int * type = result_type; 3560 size_t offset_x, offset_y; 3561 size_t current_blockcount_x, current_blockcount_y; 3562 3563 double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); 3564 double * reg_params_pos = reg_params; 3565 // move regression part out 3566 size_t params_offset_b = num_blocks; 3567 size_t params_offset_c = 2*num_blocks; 3568 for(size_t i=0; i<num_x; i++){ 3569 for(size_t j=0; j<num_y; j++){ 3570 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3571 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3572 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3573 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3574 3575 data_pos = oriData + offset_x * dim0_offset + offset_y; 3576 3577 { 3578 double * cur_data_pos = data_pos; 3579 double fx = 0.0; 3580 double fy = 0.0; 3581 double f = 0; 3582 double sum_x; 3583 double curData; 3584 for(size_t i=0; i<current_blockcount_x; i++){ 3585 sum_x = 0; 3586 for(size_t j=0; j<current_blockcount_y; j++){ 3587 curData = *cur_data_pos; 3588 sum_x += curData; 3589 fy += curData * j; 3590 cur_data_pos ++; 3591 } 3592 fx += sum_x * i; 3593 f += sum_x; 3594 cur_data_pos += dim0_offset - current_blockcount_y; 3595 } 3596 double coeff = 1.0 / (current_blockcount_x * current_blockcount_y); 3597 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 3598 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 3599 reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2); 3600 } 3601 3602 reg_params_pos ++; 3603 } 3604 } 3605 3606 //Compress coefficient arrays 3607 double precision_a, precision_b, precision_c; 3608 double rel_param_err = 0.15/3; 3609 precision_a = rel_param_err * realPrecision / late_blockcount_x; 3610 precision_b = rel_param_err * realPrecision / late_blockcount_y; 3611 precision_c = rel_param_err * realPrecision; 3612 3613 double mean = 0; 3614 use_mean = 0; 3615 if(use_mean){ 3616 // compute mean 3617 double sum = 0.0; 3618 size_t mean_count = 0; 3619 for(size_t i=0; i<num_elements; i++){ 3620 if(fabs(oriData[i] - dense_pos) < realPrecision){ 3621 sum += oriData[i]; 3622 mean_count ++; 3623 } 3624 } 3625 if(mean_count > 0) mean = sum / mean_count; 3626 } 3627 3628 3629 double tmp_realPrecision = realPrecision; 3630 3631 // use two prediction buffers for higher performance 3632 double * unpredictable_data = result_unpredictable_data; 3633 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 3634 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 3635 size_t reg_count = 0; 3636 size_t strip_dim_0 = early_blockcount_x + 1; 3637 size_t strip_dim_1 = r2 + 1; 3638 size_t strip_dim0_offset = strip_dim_1; 3639 unsigned char * indicator_pos = indicator; 3640 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); 3641 double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); 3642 memset(prediction_buffer_1, 0, prediction_buffer_size); 3643 double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); 3644 memset(prediction_buffer_2, 0, prediction_buffer_size); 3645 double * cur_pb_buf = prediction_buffer_1; 3646 double * next_pb_buf = prediction_buffer_2; 3647 double * cur_pb_buf_pos; 3648 double * next_pb_buf_pos; 3649 int intvCapacity = exe_params->intvCapacity; 3650 int intvRadius = exe_params->intvRadius; 3651 int use_reg = 0; 3652 3653 reg_params_pos = reg_params; 3654 // compress the regression coefficients on the fly 3655 double last_coeffcients[3] = {0.0}; 3656 int coeff_intvCapacity_sz = 65536; 3657 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 3658 int * coeff_type[3]; 3659 int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); 3660 double * coeff_unpred_data[3]; 3661 double * coeff_unpredictable_data = (double *) malloc(num_blocks*3*sizeof(double)); 3662 double precision[3]; 3663 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; 3664 for(int i=0; i<3; i++){ 3665 coeff_type[i] = coeff_result_type + i * num_blocks; 3666 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 3667 } 3668 int coeff_index = 0; 3669 unsigned int coeff_unpredictable_count[3] = {0}; 3670 if(use_mean){ 3671 type = result_type; 3672 int intvCapacity_sz = intvCapacity - 2; 3673 for(size_t i=0; i<num_x; i++){ 3674 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3675 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3676 data_pos = oriData + offset_x * dim0_offset; 3677 3678 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 3679 next_pb_buf_pos = next_pb_buf + 1; 3680 double * pb_pos = cur_pb_buf_pos; 3681 double * next_pb_pos = next_pb_buf_pos; 3682 3683 for(size_t j=0; j<num_y; j++){ 3684 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3685 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3686 3687 /*sampling: decide which predictor to use (regression or lorenzo)*/ 3688 { 3689 double * cur_data_pos; 3690 double curData; 3691 double pred_reg, pred_sz; 3692 double err_sz = 0.0, err_reg = 0.0; 3693 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 3694 // [1, 9] [3, 7] [7, 3] [9, 1] 3695 int count = 0; 3696 for(int i=1; i<current_blockcount_x; i+=2){ 3697 cur_data_pos = data_pos + i * dim0_offset + i; 3698 curData = *cur_data_pos; 3699 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 3700 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 3701 3702 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 3703 3704 err_reg += fabs(pred_reg - curData); 3705 3706 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 3707 curData = *cur_data_pos; 3708 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 3709 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 3710 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 3711 3712 err_reg += fabs(pred_reg - curData); 3713 3714 count += 2; 3715 } 3716 3717 use_reg = (err_reg < err_sz); 3718 } 3719 if(use_reg) 3720 { 3721 { 3722 /*predict coefficients in current block via previous reg_block*/ 3723 double cur_coeff; 3724 double diff, itvNum; 3725 for(int e=0; e<3; e++){ 3726 cur_coeff = reg_params_pos[e*num_blocks]; 3727 diff = cur_coeff - last_coeffcients[e]; 3728 itvNum = fabs(diff)/precision[e] + 1; 3729 if (itvNum < coeff_intvCapacity_sz){ 3730 if (diff < 0) itvNum = -itvNum; 3731 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 3732 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 3733 //ganrantee comporession error against the case of machine-epsilon 3734 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 3735 coeff_type[e][coeff_index] = 0; 3736 last_coeffcients[e] = cur_coeff; 3737 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 3738 } 3739 } 3740 else{ 3741 coeff_type[e][coeff_index] = 0; 3742 last_coeffcients[e] = cur_coeff; 3743 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 3744 } 3745 } 3746 coeff_index ++; 3747 } 3748 double curData; 3749 double pred; 3750 double itvNum; 3751 double diff; 3752 size_t index = 0; 3753 size_t block_unpredictable_count = 0; 3754 double * cur_data_pos = data_pos; 3755 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 3756 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 3757 curData = *cur_data_pos; 3758 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3759 diff = curData - pred; 3760 itvNum = fabs(diff)/realPrecision + 1; 3761 if (itvNum < intvCapacity){ 3762 if (diff < 0) itvNum = -itvNum; 3763 type[index] = (int) (itvNum/2) + intvRadius; 3764 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3765 //ganrantee comporession error against the case of machine-epsilon 3766 if(fabs(curData - pred)>realPrecision){ 3767 type[index] = 0; 3768 pred = curData; 3769 unpredictable_data[block_unpredictable_count ++] = curData; 3770 } 3771 } 3772 else{ 3773 type[index] = 0; 3774 pred = curData; 3775 unpredictable_data[block_unpredictable_count ++] = curData; 3776 } 3777 index ++; 3778 cur_data_pos ++; 3779 } 3780 /*dealing with the last jj (boundary)*/ 3781 { 3782 size_t jj = current_blockcount_y - 1; 3783 curData = *cur_data_pos; 3784 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3785 diff = curData - pred; 3786 itvNum = fabs(diff)/realPrecision + 1; 3787 if (itvNum < intvCapacity){ 3788 if (diff < 0) itvNum = -itvNum; 3789 type[index] = (int) (itvNum/2) + intvRadius; 3790 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3791 //ganrantee comporession error against the case of machine-epsilon 3792 if(fabs(curData - pred)>realPrecision){ 3793 type[index] = 0; 3794 pred = curData; 3795 unpredictable_data[block_unpredictable_count ++] = curData; 3796 } 3797 } 3798 else{ 3799 type[index] = 0; 3800 pred = curData; 3801 unpredictable_data[block_unpredictable_count ++] = curData; 3802 } 3803 3804 // assign value to block surfaces 3805 pb_pos[ii * strip_dim0_offset + jj] = pred; 3806 index ++; 3807 cur_data_pos ++; 3808 } 3809 cur_data_pos += dim0_offset - current_blockcount_y; 3810 } 3811 /*dealing with the last ii (boundary)*/ 3812 { 3813 size_t ii = current_blockcount_x - 1; 3814 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 3815 curData = *cur_data_pos; 3816 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3817 diff = curData - pred; 3818 itvNum = fabs(diff)/realPrecision + 1; 3819 if (itvNum < intvCapacity){ 3820 if (diff < 0) itvNum = -itvNum; 3821 type[index] = (int) (itvNum/2) + intvRadius; 3822 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3823 //ganrantee comporession error against the case of machine-epsilon 3824 if(fabs(curData - pred)>realPrecision){ 3825 type[index] = 0; 3826 pred = curData; 3827 unpredictable_data[block_unpredictable_count ++] = curData; 3828 } 3829 } 3830 else{ 3831 type[index] = 0; 3832 pred = curData; 3833 unpredictable_data[block_unpredictable_count ++] = curData; 3834 } 3835 // assign value to next prediction buffer 3836 next_pb_pos[jj] = pred; 3837 index ++; 3838 cur_data_pos ++; 3839 } 3840 /*dealing with the last jj (boundary)*/ 3841 { 3842 size_t jj = current_blockcount_y - 1; 3843 curData = *cur_data_pos; 3844 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 3845 diff = curData - pred; 3846 itvNum = fabs(diff)/realPrecision + 1; 3847 if (itvNum < intvCapacity){ 3848 if (diff < 0) itvNum = -itvNum; 3849 type[index] = (int) (itvNum/2) + intvRadius; 3850 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 3851 //ganrantee comporession error against the case of machine-epsilon 3852 if(fabs(curData - pred)>realPrecision){ 3853 type[index] = 0; 3854 pred = curData; 3855 unpredictable_data[block_unpredictable_count ++] = curData; 3856 } 3857 } 3858 else{ 3859 type[index] = 0; 3860 pred = curData; 3861 unpredictable_data[block_unpredictable_count ++] = curData; 3862 } 3863 3864 // assign value to block surfaces 3865 pb_pos[ii * strip_dim0_offset + jj] = pred; 3866 // assign value to next prediction buffer 3867 next_pb_pos[jj] = pred; 3868 3869 index ++; 3870 cur_data_pos ++; 3871 } 3872 } // end ii == -1 3873 unpredictable_count = block_unpredictable_count; 3874 total_unpred += unpredictable_count; 3875 unpredictable_data += unpredictable_count; 3876 reg_count ++; 3877 }// end use_reg 3878 else{ 3879 // use SZ 3880 // SZ predication 3881 unpredictable_count = 0; 3882 double * cur_pb_pos = pb_pos; 3883 double * cur_data_pos = data_pos; 3884 double curData; 3885 double pred2D; 3886 double itvNum, diff; 3887 size_t index = 0; 3888 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 3889 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3890 curData = *cur_data_pos; 3891 if(fabs(curData - mean) <= realPrecision){ 3892 // adjust type[index] to intvRadius for coherence with freq in reg 3893 type[index] = intvRadius; 3894 *cur_pb_pos = mean; 3895 } 3896 else 3897 { 3898 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 3899 diff = curData - pred2D; 3900 itvNum = fabs(diff)/realPrecision + 1; 3901 if (itvNum < intvCapacity_sz){ 3902 if (diff < 0) itvNum = -itvNum; 3903 type[index] = (int) (itvNum/2) + intvRadius; 3904 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 3905 if(type[index] <= intvRadius) type[index] -= 1; 3906 //ganrantee comporession error against the case of machine-epsilon 3907 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 3908 type[index] = 0; 3909 *cur_pb_pos = curData; 3910 unpredictable_data[unpredictable_count ++] = curData; 3911 } 3912 } 3913 else{ 3914 type[index] = 0; 3915 *cur_pb_pos = curData; 3916 unpredictable_data[unpredictable_count ++] = curData; 3917 } 3918 } 3919 index ++; 3920 cur_pb_pos ++; 3921 cur_data_pos ++; 3922 } 3923 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 3924 cur_data_pos += dim0_offset - current_blockcount_y; 3925 } 3926 /*dealing with the last ii (boundary)*/ 3927 { 3928 // ii == current_blockcount_x - 1 3929 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3930 curData = *cur_data_pos; 3931 if(fabs(curData - mean) <= realPrecision){ 3932 // adjust type[index] to intvRadius for coherence with freq in reg 3933 type[index] = intvRadius; 3934 *cur_pb_pos = mean; 3935 } 3936 else 3937 { 3938 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 3939 diff = curData - pred2D; 3940 itvNum = fabs(diff)/realPrecision + 1; 3941 if (itvNum < intvCapacity_sz){ 3942 if (diff < 0) itvNum = -itvNum; 3943 type[index] = (int) (itvNum/2) + intvRadius; 3944 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 3945 if(type[index] <= intvRadius) type[index] -= 1; 3946 //ganrantee comporession error against the case of machine-epsilon 3947 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 3948 type[index] = 0; 3949 *cur_pb_pos = curData; 3950 unpredictable_data[unpredictable_count ++] = curData; 3951 } 3952 } 3953 else{ 3954 type[index] = 0; 3955 *cur_pb_pos = curData; 3956 unpredictable_data[unpredictable_count ++] = curData; 3957 } 3958 } 3959 next_pb_pos[jj] = *cur_pb_pos; 3960 index ++; 3961 cur_pb_pos ++; 3962 cur_data_pos ++; 3963 } 3964 } 3965 total_unpred += unpredictable_count; 3966 unpredictable_data += unpredictable_count; 3967 // change indicator 3968 indicator_pos[j] = 1; 3969 }// end SZ 3970 reg_params_pos ++; 3971 data_pos += current_blockcount_y; 3972 pb_pos += current_blockcount_y; 3973 next_pb_pos += current_blockcount_y; 3974 type += current_blockcount_x * current_blockcount_y; 3975 }// end j 3976 indicator_pos += num_y; 3977 double * tmp; 3978 tmp = cur_pb_buf; 3979 cur_pb_buf = next_pb_buf; 3980 next_pb_buf = tmp; 3981 }// end i 3982 }// end use mean 3983 else{ 3984 type = result_type; 3985 int intvCapacity_sz = intvCapacity - 2; 3986 for(size_t i=0; i<num_x; i++){ 3987 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3988 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3989 data_pos = oriData + offset_x * dim0_offset; 3990 3991 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 3992 next_pb_buf_pos = next_pb_buf + 1; 3993 double * pb_pos = cur_pb_buf_pos; 3994 double * next_pb_pos = next_pb_buf_pos; 3995 3996 for(size_t j=0; j<num_y; j++){ 3997 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3998 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3999 /*sampling*/ 4000 { 4001 // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i] 4002 double * cur_data_pos; 4003 double curData; 4004 double pred_reg, pred_sz; 4005 double err_sz = 0.0, err_reg = 0.0; 4006 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 4007 // [1, 9] [3, 7] [7, 3] [9, 1] 4008 int count = 0; 4009 for(int i=1; i<current_blockcount_x; i+=2){ 4010 cur_data_pos = data_pos + i * dim0_offset + i; 4011 curData = *cur_data_pos; 4012 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4013 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 4014 err_sz += fabs(pred_sz - curData); 4015 err_reg += fabs(pred_reg - curData); 4016 4017 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 4018 curData = *cur_data_pos; 4019 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4020 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 4021 err_sz += fabs(pred_sz - curData); 4022 err_reg += fabs(pred_reg - curData); 4023 4024 count += 2; 4025 } 4026 err_sz += realPrecision * count * 0.81; 4027 use_reg = (err_reg < err_sz); 4028 4029 } 4030 if(use_reg) 4031 { 4032 { 4033 /*predict coefficients in current block via previous reg_block*/ 4034 double cur_coeff; 4035 double diff, itvNum; 4036 for(int e=0; e<3; e++){ 4037 cur_coeff = reg_params_pos[e*num_blocks]; 4038 diff = cur_coeff - last_coeffcients[e]; 4039 itvNum = fabs(diff)/precision[e] + 1; 4040 if (itvNum < coeff_intvCapacity_sz){ 4041 if (diff < 0) itvNum = -itvNum; 4042 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4043 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4044 //ganrantee comporession error against the case of machine-epsilon 4045 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4046 coeff_type[e][coeff_index] = 0; 4047 last_coeffcients[e] = cur_coeff; 4048 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4049 } 4050 } 4051 else{ 4052 coeff_type[e][coeff_index] = 0; 4053 last_coeffcients[e] = cur_coeff; 4054 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4055 } 4056 } 4057 coeff_index ++; 4058 } 4059 double curData; 4060 double pred; 4061 double itvNum; 4062 double diff; 4063 size_t index = 0; 4064 size_t block_unpredictable_count = 0; 4065 double * cur_data_pos = data_pos; 4066 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4067 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4068 curData = *cur_data_pos; 4069 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4070 diff = curData - pred; 4071 itvNum = fabs(diff)/realPrecision + 1; 4072 if (itvNum < intvCapacity){ 4073 if (diff < 0) itvNum = -itvNum; 4074 type[index] = (int) (itvNum/2) + intvRadius; 4075 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4076 //ganrantee comporession error against the case of machine-epsilon 4077 if(fabs(curData - pred)>realPrecision){ 4078 type[index] = 0; 4079 pred = curData; 4080 unpredictable_data[block_unpredictable_count ++] = curData; 4081 } 4082 } 4083 else{ 4084 type[index] = 0; 4085 pred = curData; 4086 unpredictable_data[block_unpredictable_count ++] = curData; 4087 } 4088 index ++; 4089 cur_data_pos ++; 4090 } 4091 /*dealing with the last jj (boundary)*/ 4092 { 4093 // jj == current_blockcount_y - 1 4094 size_t jj = current_blockcount_y - 1; 4095 curData = *cur_data_pos; 4096 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4097 diff = curData - pred; 4098 itvNum = fabs(diff)/realPrecision + 1; 4099 if (itvNum < intvCapacity){ 4100 if (diff < 0) itvNum = -itvNum; 4101 type[index] = (int) (itvNum/2) + intvRadius; 4102 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4103 //ganrantee comporession error against the case of machine-epsilon 4104 if(fabs(curData - pred)>realPrecision){ 4105 type[index] = 0; 4106 pred = curData; 4107 unpredictable_data[block_unpredictable_count ++] = curData; 4108 } 4109 } 4110 else{ 4111 type[index] = 0; 4112 pred = curData; 4113 unpredictable_data[block_unpredictable_count ++] = curData; 4114 } 4115 4116 // assign value to block surfaces 4117 pb_pos[ii * strip_dim0_offset + jj] = pred; 4118 index ++; 4119 cur_data_pos ++; 4120 } 4121 cur_data_pos += dim0_offset - current_blockcount_y; 4122 } 4123 /*dealing with the last ii (boundary)*/ 4124 { 4125 size_t ii = current_blockcount_x - 1; 4126 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4127 curData = *cur_data_pos; 4128 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4129 diff = curData - pred; 4130 itvNum = fabs(diff)/realPrecision + 1; 4131 if (itvNum < intvCapacity){ 4132 if (diff < 0) itvNum = -itvNum; 4133 type[index] = (int) (itvNum/2) + intvRadius; 4134 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4135 //ganrantee comporession error against the case of machine-epsilon 4136 if(fabs(curData - pred)>realPrecision){ 4137 type[index] = 0; 4138 pred = curData; 4139 unpredictable_data[block_unpredictable_count ++] = curData; 4140 } 4141 } 4142 else{ 4143 type[index] = 0; 4144 pred = curData; 4145 unpredictable_data[block_unpredictable_count ++] = curData; 4146 } 4147 // assign value to next prediction buffer 4148 next_pb_pos[jj] = pred; 4149 index ++; 4150 cur_data_pos ++; 4151 } 4152 /*dealing with the last jj (boundary)*/ 4153 { 4154 // jj == current_blockcount_y - 1 4155 size_t jj = current_blockcount_y - 1; 4156 curData = *cur_data_pos; 4157 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4158 diff = curData - pred; 4159 itvNum = fabs(diff)/realPrecision + 1; 4160 if (itvNum < intvCapacity){ 4161 if (diff < 0) itvNum = -itvNum; 4162 type[index] = (int) (itvNum/2) + intvRadius; 4163 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4164 //ganrantee comporession error against the case of machine-epsilon 4165 if(fabs(curData - pred)>realPrecision){ 4166 type[index] = 0; 4167 pred = curData; 4168 unpredictable_data[block_unpredictable_count ++] = curData; 4169 } 4170 } 4171 else{ 4172 type[index] = 0; 4173 pred = curData; 4174 unpredictable_data[block_unpredictable_count ++] = curData; 4175 } 4176 4177 // assign value to block surfaces 4178 pb_pos[ii * strip_dim0_offset + jj] = pred; 4179 // assign value to next prediction buffer 4180 next_pb_pos[jj] = pred; 4181 4182 index ++; 4183 cur_data_pos ++; 4184 } 4185 } // end ii == -1 4186 unpredictable_count = block_unpredictable_count; 4187 total_unpred += unpredictable_count; 4188 unpredictable_data += unpredictable_count; 4189 reg_count ++; 4190 }// end use_reg 4191 else{ 4192 // use SZ 4193 // SZ predication 4194 unpredictable_count = 0; 4195 double * cur_pb_pos = pb_pos; 4196 double * cur_data_pos = data_pos; 4197 double curData; 4198 double pred2D; 4199 double itvNum, diff; 4200 size_t index = 0; 4201 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4202 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4203 curData = *cur_data_pos; 4204 4205 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4206 diff = curData - pred2D; 4207 itvNum = fabs(diff)/realPrecision + 1; 4208 if (itvNum < intvCapacity_sz){ 4209 if (diff < 0) itvNum = -itvNum; 4210 type[index] = (int) (itvNum/2) + intvRadius; 4211 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4212 //ganrantee comporession error against the case of machine-epsilon 4213 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4214 type[index] = 0; 4215 *cur_pb_pos = curData; 4216 unpredictable_data[unpredictable_count ++] = curData; 4217 } 4218 } 4219 else{ 4220 type[index] = 0; 4221 *cur_pb_pos = curData; 4222 unpredictable_data[unpredictable_count ++] = curData; 4223 } 4224 4225 index ++; 4226 cur_pb_pos ++; 4227 cur_data_pos ++; 4228 } 4229 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 4230 cur_data_pos += dim0_offset - current_blockcount_y; 4231 } 4232 /*dealing with the last ii (boundary)*/ 4233 { 4234 // ii == current_blockcount_x - 1 4235 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4236 curData = *cur_data_pos; 4237 4238 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4239 diff = curData - pred2D; 4240 itvNum = fabs(diff)/realPrecision + 1; 4241 if (itvNum < intvCapacity_sz){ 4242 if (diff < 0) itvNum = -itvNum; 4243 type[index] = (int) (itvNum/2) + intvRadius; 4244 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4245 //ganrantee comporession error against the case of machine-epsilon 4246 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4247 type[index] = 0; 4248 *cur_pb_pos = curData; 4249 unpredictable_data[unpredictable_count ++] = curData; 4250 } 4251 } 4252 else{ 4253 type[index] = 0; 4254 *cur_pb_pos = curData; 4255 unpredictable_data[unpredictable_count ++] = curData; 4256 } 4257 next_pb_pos[jj] = *cur_pb_pos; 4258 index ++; 4259 cur_pb_pos ++; 4260 cur_data_pos ++; 4261 } 4262 } 4263 total_unpred += unpredictable_count; 4264 unpredictable_data += unpredictable_count; 4265 // change indicator 4266 indicator_pos[j] = 1; 4267 }// end SZ 4268 reg_params_pos ++; 4269 data_pos += current_blockcount_y; 4270 pb_pos += current_blockcount_y; 4271 next_pb_pos += current_blockcount_y; 4272 type += current_blockcount_x * current_blockcount_y; 4273 }// end j 4274 indicator_pos += num_y; 4275 double * tmp; 4276 tmp = cur_pb_buf; 4277 cur_pb_buf = next_pb_buf; 4278 next_pb_buf = tmp; 4279 }// end i 4280 } 4281 free(prediction_buffer_1); 4282 free(prediction_buffer_2); 4283 4284 int stateNum = 2*quantization_intervals; 4285 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4286 4287 size_t nodeCount = 0; 4288 size_t i = 0; 4289 init(huffmanTree, result_type, num_elements); 4290 for (i = 0; i < stateNum; i++) 4291 if (huffmanTree->code[i]) nodeCount++; 4292 nodeCount = nodeCount*2-1; 4293 4294 unsigned char *treeBytes; 4295 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4296 4297 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 4298 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 4299 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); 4300 unsigned char * result_pos = result; 4301 initRandomAccessBytes(result_pos); 4302 result_pos += meta_data_offset; 4303 4304 sizeToBytes(result_pos, num_elements); 4305 result_pos += exe_params->SZ_SIZE_TYPE; 4306 4307 intToBytes_bigEndian(result_pos, block_size); 4308 result_pos += sizeof(int); 4309 doubleToBytes(result_pos, realPrecision); 4310 result_pos += sizeof(double); 4311 intToBytes_bigEndian(result_pos, quantization_intervals); 4312 result_pos += sizeof(int); 4313 intToBytes_bigEndian(result_pos, treeByteSize); 4314 result_pos += sizeof(int); 4315 intToBytes_bigEndian(result_pos, nodeCount); 4316 result_pos += sizeof(int); 4317 memcpy(result_pos, treeBytes, treeByteSize); 4318 result_pos += treeByteSize; 4319 free(treeBytes); 4320 4321 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 4322 result_pos += sizeof(unsigned char); 4323 memcpy(result_pos, &mean, sizeof(double)); 4324 result_pos += sizeof(double); 4325 4326 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 4327 result_pos += indicator_size; 4328 4329 //convert the lead/mid/resi to byte stream 4330 if(reg_count>0){ 4331 for(int e=0; e<3; e++){ 4332 int stateNum = 2*coeff_intvCapacity_sz; 4333 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4334 size_t nodeCount = 0; 4335 init(huffmanTree, coeff_type[e], reg_count); 4336 size_t i = 0; 4337 for (i = 0; i < huffmanTree->stateNum; i++) 4338 if (huffmanTree->code[i]) nodeCount++; 4339 nodeCount = nodeCount*2-1; 4340 unsigned char *treeBytes; 4341 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4342 doubleToBytes(result_pos, precision[e]); 4343 result_pos += sizeof(double); 4344 intToBytes_bigEndian(result_pos, coeff_intvRadius); 4345 result_pos += sizeof(int); 4346 intToBytes_bigEndian(result_pos, treeByteSize); 4347 result_pos += sizeof(int); 4348 intToBytes_bigEndian(result_pos, nodeCount); 4349 result_pos += sizeof(int); 4350 memcpy(result_pos, treeBytes, treeByteSize); 4351 result_pos += treeByteSize; 4352 free(treeBytes); 4353 size_t typeArray_size = 0; 4354 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 4355 sizeToBytes(result_pos, typeArray_size); 4356 result_pos += sizeof(size_t) + typeArray_size; 4357 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 4358 result_pos += sizeof(int); 4359 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); 4360 result_pos += coeff_unpredictable_count[e]*sizeof(double); 4361 SZ_ReleaseHuffman(huffmanTree); 4362 } 4363 } 4364 free(coeff_result_type); 4365 free(coeff_unpredictable_data); 4366 4367 //record the number of unpredictable data and also store them 4368 memcpy(result_pos, &total_unpred, sizeof(size_t)); 4369 result_pos += sizeof(size_t); 4370 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); 4371 result_pos += total_unpred * sizeof(double); 4372 size_t typeArray_size = 0; 4373 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 4374 result_pos += typeArray_size; 4375 4376 size_t totalEncodeSize = result_pos - result; 4377 free(indicator); 4378 free(result_unpredictable_data); 4379 free(result_type); 4380 free(reg_params); 4381 4382 SZ_ReleaseHuffman(huffmanTree); 4383 *comp_size = totalEncodeSize; 4384 4385 return result; 4386 } 4387 4388 unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ 4389 4390 unsigned int quantization_intervals; 4391 double sz_sample_correct_freq = -1;//0.5; //-1 4392 double dense_pos; 4393 double mean_flush_freq; 4394 unsigned char use_mean = 0; 4395 4396 // calculate block dims 4397 size_t num_x, num_y, num_z; 4398 size_t block_size = 6; 4399 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 4400 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 4401 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); 4402 4403 size_t split_index_x, split_index_y, split_index_z; 4404 size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; 4405 size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; 4406 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 4407 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 4408 SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); 4409 4410 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; 4411 size_t num_blocks = num_x * num_y * num_z; 4412 size_t num_elements = r1 * r2 * r3; 4413 4414 size_t dim0_offset = r2 * r3; 4415 size_t dim1_offset = r3; 4416 4417 int * result_type = (int *) malloc(num_elements * sizeof(int)); 4418 size_t unpred_data_max_size = max_num_block_elements; 4419 double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); 4420 size_t total_unpred = 0; 4421 size_t unpredictable_count; 4422 size_t max_unpred_count = 0; 4423 double * data_pos = oriData; 4424 int * type = result_type; 4425 size_t type_offset; 4426 size_t offset_x, offset_y, offset_z; 4427 size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; 4428 4429 double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double)); 4430 double * reg_params_pos = reg_params; 4431 // move regression part out 4432 size_t params_offset_b = num_blocks; 4433 size_t params_offset_c = 2*num_blocks; 4434 size_t params_offset_d = 3*num_blocks; 4435 for(size_t i=0; i<num_x; i++){ 4436 for(size_t j=0; j<num_y; j++){ 4437 for(size_t k=0; k<num_z; k++){ 4438 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4439 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4440 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4441 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4442 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4443 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4444 4445 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 4446 /*Calculate regression coefficients*/ 4447 { 4448 double * cur_data_pos = data_pos; 4449 double fx = 0.0; 4450 double fy = 0.0; 4451 double fz = 0.0; 4452 double f = 0; 4453 double sum_x, sum_y; 4454 double curData; 4455 for(size_t i=0; i<current_blockcount_x; i++){ 4456 sum_x = 0; 4457 for(size_t j=0; j<current_blockcount_y; j++){ 4458 sum_y = 0; 4459 for(size_t k=0; k<current_blockcount_z; k++){ 4460 curData = *cur_data_pos; 4461 // f += curData; 4462 // fx += curData * i; 4463 // fy += curData * j; 4464 // fz += curData * k; 4465 sum_y += curData; 4466 fz += curData * k; 4467 cur_data_pos ++; 4468 } 4469 fy += sum_y * j; 4470 sum_x += sum_y; 4471 cur_data_pos += dim1_offset - current_blockcount_z; 4472 } 4473 fx += sum_x * i; 4474 f += sum_x; 4475 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4476 } 4477 double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z); 4478 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 4479 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 4480 reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1); 4481 reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2); 4482 } 4483 reg_params_pos ++; 4484 } 4485 } 4486 } 4487 4488 //Compress coefficient arrays 4489 double precision_a, precision_b, precision_c, precision_d; 4490 double rel_param_err = 0.025; 4491 precision_a = rel_param_err * realPrecision / late_blockcount_x; 4492 precision_b = rel_param_err * realPrecision / late_blockcount_y; 4493 precision_c = rel_param_err * realPrecision / late_blockcount_z; 4494 precision_d = rel_param_err * realPrecision; 4495 4496 if(exe_params->optQuantMode==1) 4497 { 4498 quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 4499 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 4500 updateQuantizationInfo(quantization_intervals); 4501 } 4502 else{ 4503 quantization_intervals = exe_params->intvCapacity; 4504 } 4505 4506 double mean = 0; 4507 if(use_mean){ 4508 // compute mean 4509 double sum = 0.0; 4510 size_t mean_count = 0; 4511 for(size_t i=0; i<num_elements; i++){ 4512 if(fabs(oriData[i] - dense_pos) < realPrecision){ 4513 sum += oriData[i]; 4514 mean_count ++; 4515 } 4516 } 4517 if(mean_count > 0) mean = sum / mean_count; 4518 } 4519 4520 double tmp_realPrecision = realPrecision; 4521 4522 // use two prediction buffers for higher performance 4523 double * unpredictable_data = result_unpredictable_data; 4524 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 4525 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 4526 size_t reg_count = 0; 4527 size_t strip_dim_0 = early_blockcount_x + 1; 4528 size_t strip_dim_1 = r2 + 1; 4529 size_t strip_dim_2 = r3 + 1; 4530 size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; 4531 size_t strip_dim1_offset = strip_dim_2; 4532 unsigned char * indicator_pos = indicator; 4533 4534 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double); 4535 double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size); 4536 memset(prediction_buffer_1, 0, prediction_buffer_size); 4537 double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size); 4538 memset(prediction_buffer_2, 0, prediction_buffer_size); 4539 double * cur_pb_buf = prediction_buffer_1; 4540 double * next_pb_buf = prediction_buffer_2; 4541 double * cur_pb_buf_pos; 4542 double * next_pb_buf_pos; 4543 int intvCapacity = exe_params->intvCapacity; 4544 int intvRadius = exe_params->intvRadius; 4545 int use_reg = 0; 4546 double noise = realPrecision * 1.22; 4547 4548 reg_params_pos = reg_params; 4549 // compress the regression coefficients on the fly 4550 double last_coeffcients[4] = {0.0}; 4551 int coeff_intvCapacity_sz = 65536; 4552 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 4553 int * coeff_type[4]; 4554 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 4555 double * coeff_unpred_data[4]; 4556 double * coeff_unpredictable_data = (double *) malloc(num_blocks*4*sizeof(double)); 4557 double precision[4]; 4558 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; 4559 for(int i=0; i<4; i++){ 4560 coeff_type[i] = coeff_result_type + i * num_blocks; 4561 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 4562 } 4563 int coeff_index = 0; 4564 unsigned int coeff_unpredictable_count[4] = {0}; 4565 4566 if(use_mean){ 4567 int intvCapacity_sz = intvCapacity - 2; 4568 for(size_t i=0; i<num_x; i++){ 4569 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4570 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4571 for(size_t j=0; j<num_y; j++){ 4572 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4573 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4574 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 4575 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 4576 type = result_type + type_offset; 4577 4578 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 4579 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 4580 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 4581 4582 size_t current_blockcount_z; 4583 double * pb_pos = cur_pb_buf_pos; 4584 double * next_pb_pos = next_pb_buf_pos; 4585 size_t strip_unpredictable_count = 0; 4586 for(size_t k=0; k<num_z; k++){ 4587 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4588 4589 /*sampling and decide which predictor*/ 4590 { 4591 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 4592 double * cur_data_pos; 4593 double curData; 4594 double pred_reg, pred_sz; 4595 double err_sz = 0.0, err_reg = 0.0; 4596 int bmi = 0; 4597 if(i>0 && j>0 && k>0){ 4598 for(int i=0; i<block_size; i++){ 4599 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4600 curData = *cur_data_pos; 4601 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4602 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4603 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4604 err_reg += fabs(pred_reg - curData); 4605 4606 bmi = block_size - i; 4607 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4608 curData = *cur_data_pos; 4609 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4610 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4611 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4612 err_reg += fabs(pred_reg - curData); 4613 4614 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4615 curData = *cur_data_pos; 4616 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4617 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4618 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4619 err_reg += fabs(pred_reg - curData); 4620 4621 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4622 curData = *cur_data_pos; 4623 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4624 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4625 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4626 err_reg += fabs(pred_reg - curData); 4627 } 4628 } 4629 else{ 4630 for(int i=1; i<block_size; i++){ 4631 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4632 curData = *cur_data_pos; 4633 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4634 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4635 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4636 err_reg += fabs(pred_reg - curData); 4637 4638 bmi = block_size - i; 4639 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4640 curData = *cur_data_pos; 4641 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4642 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4643 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4644 err_reg += fabs(pred_reg - curData); 4645 4646 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4647 curData = *cur_data_pos; 4648 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4649 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4650 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4651 err_reg += fabs(pred_reg - curData); 4652 4653 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4654 curData = *cur_data_pos; 4655 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4656 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4657 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 4658 err_reg += fabs(pred_reg - curData); 4659 4660 } 4661 } 4662 use_reg = (err_reg < err_sz); 4663 } 4664 if(use_reg){ 4665 { 4666 /*predict coefficients in current block via previous reg_block*/ 4667 double cur_coeff; 4668 double diff, itvNum; 4669 for(int e=0; e<4; e++){ 4670 cur_coeff = reg_params_pos[e*num_blocks]; 4671 diff = cur_coeff - last_coeffcients[e]; 4672 itvNum = fabs(diff)/precision[e] + 1; 4673 if (itvNum < coeff_intvCapacity_sz){ 4674 if (diff < 0) itvNum = -itvNum; 4675 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4676 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4677 //ganrantee comporession error against the case of machine-epsilon 4678 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4679 coeff_type[e][coeff_index] = 0; 4680 last_coeffcients[e] = cur_coeff; 4681 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4682 } 4683 } 4684 else{ 4685 coeff_type[e][coeff_index] = 0; 4686 last_coeffcients[e] = cur_coeff; 4687 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4688 } 4689 } 4690 coeff_index ++; 4691 } 4692 double curData; 4693 double pred; 4694 double itvNum; 4695 double diff; 4696 size_t index = 0; 4697 size_t block_unpredictable_count = 0; 4698 double * cur_data_pos = data_pos; 4699 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4700 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4701 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4702 curData = *cur_data_pos; 4703 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 4704 diff = curData - pred; 4705 itvNum = fabs(diff)/tmp_realPrecision + 1; 4706 if (itvNum < intvCapacity){ 4707 if (diff < 0) itvNum = -itvNum; 4708 type[index] = (int) (itvNum/2) + intvRadius; 4709 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4710 //ganrantee comporession error against the case of machine-epsilon 4711 if(fabs(curData - pred)>tmp_realPrecision){ 4712 type[index] = 0; 4713 pred = curData; 4714 unpredictable_data[block_unpredictable_count ++] = curData; 4715 } 4716 } 4717 else{ 4718 type[index] = 0; 4719 pred = curData; 4720 unpredictable_data[block_unpredictable_count ++] = curData; 4721 } 4722 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 4723 // assign value to block surfaces 4724 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 4725 } 4726 index ++; 4727 cur_data_pos ++; 4728 } 4729 cur_data_pos += dim1_offset - current_blockcount_z; 4730 } 4731 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4732 } 4733 /*dealing with the last ii (boundary)*/ 4734 { 4735 // ii == current_blockcount_x - 1 4736 size_t ii = current_blockcount_x - 1; 4737 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4738 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4739 curData = *cur_data_pos; 4740 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 4741 diff = curData - pred; 4742 itvNum = fabs(diff)/tmp_realPrecision + 1; 4743 if (itvNum < intvCapacity){ 4744 if (diff < 0) itvNum = -itvNum; 4745 type[index] = (int) (itvNum/2) + intvRadius; 4746 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4747 //ganrantee comporession error against the case of machine-epsilon 4748 if(fabs(curData - pred)>tmp_realPrecision){ 4749 type[index] = 0; 4750 pred = curData; 4751 unpredictable_data[block_unpredictable_count ++] = curData; 4752 } 4753 } 4754 else{ 4755 type[index] = 0; 4756 pred = curData; 4757 unpredictable_data[block_unpredictable_count ++] = curData; 4758 } 4759 4760 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 4761 // assign value to block surfaces 4762 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 4763 } 4764 // assign value to next prediction buffer 4765 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 4766 index ++; 4767 cur_data_pos ++; 4768 } 4769 cur_data_pos += dim1_offset - current_blockcount_z; 4770 } 4771 } 4772 unpredictable_count = block_unpredictable_count; 4773 strip_unpredictable_count += unpredictable_count; 4774 unpredictable_data += unpredictable_count; 4775 4776 reg_count ++; 4777 } 4778 else{ 4779 // use SZ 4780 // SZ predication 4781 unpredictable_count = 0; 4782 double * cur_pb_pos = pb_pos; 4783 double * cur_data_pos = data_pos; 4784 double curData; 4785 double pred3D; 4786 double itvNum, diff; 4787 size_t index = 0; 4788 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4789 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4790 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4791 4792 curData = *cur_data_pos; 4793 if(fabs(curData - mean) <= realPrecision){ 4794 // adjust type[index] to intvRadius for coherence with freq in reg 4795 type[index] = intvRadius; 4796 *cur_pb_pos = mean; 4797 } 4798 else 4799 { 4800 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 4801 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 4802 diff = curData - pred3D; 4803 itvNum = fabs(diff)/realPrecision + 1; 4804 if (itvNum < intvCapacity_sz){ 4805 if (diff < 0) itvNum = -itvNum; 4806 type[index] = (int) (itvNum/2) + intvRadius; 4807 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4808 if(type[index] <= intvRadius) type[index] -= 1; 4809 //ganrantee comporession error against the case of machine-epsilon 4810 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4811 type[index] = 0; 4812 *cur_pb_pos = curData; 4813 unpredictable_data[unpredictable_count ++] = curData; 4814 } 4815 } 4816 else{ 4817 type[index] = 0; 4818 *cur_pb_pos = curData; 4819 unpredictable_data[unpredictable_count ++] = curData; 4820 } 4821 } 4822 index ++; 4823 cur_pb_pos ++; 4824 cur_data_pos ++; 4825 } 4826 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 4827 cur_data_pos += dim1_offset - current_blockcount_z; 4828 } 4829 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 4830 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4831 } 4832 /*dealing with the last ii (boundary)*/ 4833 { 4834 // ii == current_blockcount_x - 1 4835 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4836 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4837 4838 curData = *cur_data_pos; 4839 if(fabs(curData - mean) <= realPrecision){ 4840 // adjust type[index] to intvRadius for coherence with freq in reg 4841 type[index] = intvRadius; 4842 *cur_pb_pos = mean; 4843 } 4844 else 4845 { 4846 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 4847 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 4848 diff = curData - pred3D; 4849 itvNum = fabs(diff)/realPrecision + 1; 4850 if (itvNum < intvCapacity_sz){ 4851 if (diff < 0) itvNum = -itvNum; 4852 type[index] = (int) (itvNum/2) + intvRadius; 4853 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4854 if(type[index] <= intvRadius) type[index] -= 1; 4855 //ganrantee comporession error against the case of machine-epsilon 4856 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4857 type[index] = 0; 4858 *cur_pb_pos = curData; 4859 unpredictable_data[unpredictable_count ++] = curData; 4860 } 4861 } 4862 else{ 4863 type[index] = 0; 4864 *cur_pb_pos = curData; 4865 unpredictable_data[unpredictable_count ++] = curData; 4866 } 4867 } 4868 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 4869 index ++; 4870 cur_pb_pos ++; 4871 cur_data_pos ++; 4872 } 4873 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 4874 cur_data_pos += dim1_offset - current_blockcount_z; 4875 } 4876 } 4877 strip_unpredictable_count += unpredictable_count; 4878 unpredictable_data += unpredictable_count; 4879 // change indicator 4880 indicator_pos[k] = 1; 4881 }// end SZ 4882 4883 reg_params_pos ++; 4884 data_pos += current_blockcount_z; 4885 pb_pos += current_blockcount_z; 4886 next_pb_pos += current_blockcount_z; 4887 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 4888 4889 } // end k 4890 4891 if(strip_unpredictable_count > max_unpred_count){ 4892 max_unpred_count = strip_unpredictable_count; 4893 } 4894 total_unpred += strip_unpredictable_count; 4895 indicator_pos += num_z; 4896 }// end j 4897 double * tmp; 4898 tmp = cur_pb_buf; 4899 cur_pb_buf = next_pb_buf; 4900 next_pb_buf = tmp; 4901 }// end i 4902 } 4903 else{ 4904 int intvCapacity_sz = intvCapacity - 2; 4905 for(size_t i=0; i<num_x; i++){ 4906 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4907 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4908 4909 for(size_t j=0; j<num_y; j++){ 4910 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4911 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4912 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 4913 // copy bottom plane from plane buffer 4914 // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double)); 4915 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 4916 type = result_type + type_offset; 4917 4918 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 4919 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 4920 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 4921 4922 size_t current_blockcount_z; 4923 double * pb_pos = cur_pb_buf_pos; 4924 double * next_pb_pos = next_pb_buf_pos; 4925 size_t strip_unpredictable_count = 0; 4926 for(size_t k=0; k<num_z; k++){ 4927 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4928 /*sampling*/ 4929 { 4930 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 4931 double * cur_data_pos; 4932 double curData; 4933 double pred_reg, pred_sz; 4934 double err_sz = 0.0, err_reg = 0.0; 4935 int bmi; 4936 if(i>0 && j>0 && k>0){ 4937 for(int i=0; i<block_size; i++){ 4938 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4939 curData = *cur_data_pos; 4940 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4941 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4942 err_sz += fabs(pred_sz - curData) + noise; 4943 err_reg += fabs(pred_reg - curData); 4944 4945 bmi = block_size - i; 4946 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4947 curData = *cur_data_pos; 4948 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4949 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4950 err_sz += fabs(pred_sz - curData) + noise; 4951 err_reg += fabs(pred_reg - curData); 4952 4953 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4954 curData = *cur_data_pos; 4955 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4956 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4957 err_sz += fabs(pred_sz - curData) + noise; 4958 err_reg += fabs(pred_reg - curData); 4959 4960 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4961 curData = *cur_data_pos; 4962 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4963 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4964 err_sz += fabs(pred_sz - curData) + noise; 4965 err_reg += fabs(pred_reg - curData); 4966 } 4967 } 4968 else{ 4969 for(int i=1; i<block_size; i++){ 4970 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 4971 curData = *cur_data_pos; 4972 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4973 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4974 err_sz += fabs(pred_sz - curData) + noise; 4975 err_reg += fabs(pred_reg - curData); 4976 4977 bmi = block_size - i; 4978 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 4979 curData = *cur_data_pos; 4980 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4981 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4982 err_sz += fabs(pred_sz - curData) + noise; 4983 err_reg += fabs(pred_reg - curData); 4984 4985 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 4986 curData = *cur_data_pos; 4987 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4988 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 4989 err_sz += fabs(pred_sz - curData) + noise; 4990 err_reg += fabs(pred_reg - curData); 4991 4992 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 4993 curData = *cur_data_pos; 4994 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 4995 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 4996 err_sz += fabs(pred_sz - curData) + noise; 4997 err_reg += fabs(pred_reg - curData); 4998 } 4999 } 5000 use_reg = (err_reg < err_sz); 5001 5002 } 5003 if(use_reg) 5004 { 5005 { 5006 /*predict coefficients in current block via previous reg_block*/ 5007 double cur_coeff; 5008 double diff, itvNum; 5009 for(int e=0; e<4; e++){ 5010 cur_coeff = reg_params_pos[e*num_blocks]; 5011 diff = cur_coeff - last_coeffcients[e]; 5012 itvNum = fabs(diff)/precision[e] + 1; 5013 if (itvNum < coeff_intvCapacity_sz){ 5014 if (diff < 0) itvNum = -itvNum; 5015 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 5016 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 5017 //ganrantee comporession error against the case of machine-epsilon 5018 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 5019 coeff_type[e][coeff_index] = 0; 5020 last_coeffcients[e] = cur_coeff; 5021 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5022 } 5023 } 5024 else{ 5025 coeff_type[e][coeff_index] = 0; 5026 last_coeffcients[e] = cur_coeff; 5027 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5028 } 5029 } 5030 coeff_index ++; 5031 } 5032 double curData; 5033 double pred; 5034 double itvNum; 5035 double diff; 5036 size_t index = 0; 5037 size_t block_unpredictable_count = 0; 5038 double * cur_data_pos = data_pos; 5039 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5040 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5041 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5042 5043 curData = *cur_data_pos; 5044 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5045 diff = curData - pred; 5046 itvNum = fabs(diff)/tmp_realPrecision + 1; 5047 if (itvNum < intvCapacity){ 5048 if (diff < 0) itvNum = -itvNum; 5049 type[index] = (int) (itvNum/2) + intvRadius; 5050 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5051 //ganrantee comporession error against the case of machine-epsilon 5052 if(fabs(curData - pred)>tmp_realPrecision){ 5053 type[index] = 0; 5054 pred = curData; 5055 unpredictable_data[block_unpredictable_count ++] = curData; 5056 } 5057 } 5058 else{ 5059 type[index] = 0; 5060 pred = curData; 5061 unpredictable_data[block_unpredictable_count ++] = curData; 5062 } 5063 5064 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5065 // assign value to block surfaces 5066 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5067 } 5068 index ++; 5069 cur_data_pos ++; 5070 } 5071 cur_data_pos += dim1_offset - current_blockcount_z; 5072 } 5073 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5074 } 5075 /*dealing with the last ii (boundary)*/ 5076 { 5077 // ii == current_blockcount_x - 1 5078 size_t ii = current_blockcount_x - 1; 5079 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5080 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5081 curData = *cur_data_pos; 5082 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5083 diff = curData - pred; 5084 itvNum = fabs(diff)/tmp_realPrecision + 1; 5085 if (itvNum < intvCapacity){ 5086 if (diff < 0) itvNum = -itvNum; 5087 type[index] = (int) (itvNum/2) + intvRadius; 5088 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5089 //ganrantee comporession error against the case of machine-epsilon 5090 if(fabs(curData - pred)>tmp_realPrecision){ 5091 type[index] = 0; 5092 pred = curData; 5093 unpredictable_data[block_unpredictable_count ++] = curData; 5094 } 5095 } 5096 else{ 5097 type[index] = 0; 5098 pred = curData; 5099 unpredictable_data[block_unpredictable_count ++] = curData; 5100 } 5101 5102 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5103 // assign value to block surfaces 5104 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5105 } 5106 // assign value to next prediction buffer 5107 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 5108 index ++; 5109 cur_data_pos ++; 5110 } 5111 cur_data_pos += dim1_offset - current_blockcount_z; 5112 } 5113 } 5114 unpredictable_count = block_unpredictable_count; 5115 strip_unpredictable_count += unpredictable_count; 5116 unpredictable_data += unpredictable_count; 5117 reg_count ++; 5118 } 5119 else{ 5120 // use SZ 5121 // SZ predication 5122 unpredictable_count = 0; 5123 double * cur_pb_pos = pb_pos; 5124 double * cur_data_pos = data_pos; 5125 double curData; 5126 double pred3D; 5127 double itvNum, diff; 5128 size_t index = 0; 5129 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5130 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5131 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5132 5133 curData = *cur_data_pos; 5134 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5135 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5136 diff = curData - pred3D; 5137 itvNum = fabs(diff)/realPrecision + 1; 5138 if (itvNum < intvCapacity_sz){ 5139 if (diff < 0) itvNum = -itvNum; 5140 type[index] = (int) (itvNum/2) + intvRadius; 5141 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5142 //ganrantee comporession error against the case of machine-epsilon 5143 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5144 type[index] = 0; 5145 *cur_pb_pos = curData; 5146 unpredictable_data[unpredictable_count ++] = curData; 5147 } 5148 } 5149 else{ 5150 type[index] = 0; 5151 *cur_pb_pos = curData; 5152 unpredictable_data[unpredictable_count ++] = curData; 5153 } 5154 index ++; 5155 cur_pb_pos ++; 5156 cur_data_pos ++; 5157 } 5158 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5159 cur_data_pos += dim1_offset - current_blockcount_z; 5160 } 5161 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 5162 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5163 } 5164 /*dealing with the last ii (boundary)*/ 5165 { 5166 // ii == current_blockcount_x - 1 5167 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5168 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5169 5170 curData = *cur_data_pos; 5171 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5172 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5173 diff = curData - pred3D; 5174 itvNum = fabs(diff)/realPrecision + 1; 5175 if (itvNum < intvCapacity_sz){ 5176 if (diff < 0) itvNum = -itvNum; 5177 type[index] = (int) (itvNum/2) + intvRadius; 5178 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5179 //ganrantee comporession error against the case of machine-epsilon 5180 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5181 type[index] = 0; 5182 *cur_pb_pos = curData; 5183 unpredictable_data[unpredictable_count ++] = curData; 5184 } 5185 } 5186 else{ 5187 type[index] = 0; 5188 *cur_pb_pos = curData; 5189 unpredictable_data[unpredictable_count ++] = curData; 5190 } 5191 // assign value to next prediction buffer 5192 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 5193 index ++; 5194 cur_pb_pos ++; 5195 cur_data_pos ++; 5196 } 5197 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5198 cur_data_pos += dim1_offset - current_blockcount_z; 5199 } 5200 } 5201 strip_unpredictable_count += unpredictable_count; 5202 unpredictable_data += unpredictable_count; 5203 // change indicator 5204 indicator_pos[k] = 1; 5205 }// end SZ 5206 5207 reg_params_pos ++; 5208 data_pos += current_blockcount_z; 5209 pb_pos += current_blockcount_z; 5210 next_pb_pos += current_blockcount_z; 5211 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 5212 5213 } 5214 5215 if(strip_unpredictable_count > max_unpred_count){ 5216 max_unpred_count = strip_unpredictable_count; 5217 } 5218 total_unpred += strip_unpredictable_count; 5219 indicator_pos += num_z; 5220 } 5221 double * tmp; 5222 tmp = cur_pb_buf; 5223 cur_pb_buf = next_pb_buf; 5224 next_pb_buf = tmp; 5225 } 5226 } 5227 5228 free(prediction_buffer_1); 5229 free(prediction_buffer_2); 5230 5231 int stateNum = 2*quantization_intervals; 5232 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 5233 5234 size_t nodeCount = 0; 5235 init(huffmanTree, result_type, num_elements); 5236 size_t i = 0; 5237 for (i = 0; i < huffmanTree->stateNum; i++) 5238 if (huffmanTree->code[i]) nodeCount++; 5239 nodeCount = nodeCount*2-1; 5240 5241 unsigned char *treeBytes; 5242 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 5243 5244 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 5245 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 5246 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1); 5247 unsigned char * result_pos = result; 5248 initRandomAccessBytes(result_pos); 5249 5250 result_pos += meta_data_offset; 5251 5252 sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 5253 result_pos += exe_params->SZ_SIZE_TYPE; 5254 5255 intToBytes_bigEndian(result_pos, block_size); 5256 result_pos += sizeof(int); 5257 doubleToBytes(result_pos, realPrecision); 5258 result_pos += sizeof(double); 5259 intToBytes_bigEndian(result_pos, quantization_intervals); 5260 result_pos += sizeof(int); 5261 intToBytes_bigEndian(result_pos, treeByteSize); 5262 result_pos += sizeof(int); 5263 intToBytes_bigEndian(result_pos, nodeCount); 5264 result_pos += sizeof(int); 5265 memcpy(result_pos, treeBytes, treeByteSize); 5266 result_pos += treeByteSize; 5267 free(treeBytes); 5268 5269 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 5270 result_pos += sizeof(unsigned char); 5271 memcpy(result_pos, &mean, sizeof(double)); 5272 result_pos += sizeof(double); 5273 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 5274 result_pos += indicator_size; 5275 5276 //convert the lead/mid/resi to byte stream 5277 if(reg_count > 0){ 5278 for(int e=0; e<4; e++){ 5279 int stateNum = 2*coeff_intvCapacity_sz; 5280 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 5281 size_t nodeCount = 0; 5282 init(huffmanTree, coeff_type[e], reg_count); 5283 size_t i = 0; 5284 for (i = 0; i < huffmanTree->stateNum; i++) 5285 if (huffmanTree->code[i]) nodeCount++; 5286 nodeCount = nodeCount*2-1; 5287 unsigned char *treeBytes; 5288 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 5289 doubleToBytes(result_pos, precision[e]); 5290 result_pos += sizeof(double); 5291 intToBytes_bigEndian(result_pos, coeff_intvRadius); 5292 result_pos += sizeof(int); 5293 intToBytes_bigEndian(result_pos, treeByteSize); 5294 result_pos += sizeof(int); 5295 intToBytes_bigEndian(result_pos, nodeCount); 5296 result_pos += sizeof(int); 5297 memcpy(result_pos, treeBytes, treeByteSize); 5298 result_pos += treeByteSize; 5299 free(treeBytes); 5300 size_t typeArray_size = 0; 5301 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 5302 sizeToBytes(result_pos, typeArray_size); 5303 result_pos += sizeof(size_t) + typeArray_size; 5304 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 5305 result_pos += sizeof(int); 5306 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double)); 5307 result_pos += coeff_unpredictable_count[e]*sizeof(double); 5308 SZ_ReleaseHuffman(huffmanTree); 5309 } 5310 } 5311 free(coeff_result_type); 5312 free(coeff_unpredictable_data); 5313 5314 //record the number of unpredictable data and also store them 5315 memcpy(result_pos, &total_unpred, sizeof(size_t)); 5316 result_pos += sizeof(size_t); 5317 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double)); 5318 result_pos += total_unpred * sizeof(double); 5319 size_t typeArray_size = 0; 5320 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 5321 result_pos += typeArray_size; 5322 size_t totalEncodeSize = result_pos - result; 5323 free(indicator); 5324 free(result_unpredictable_data); 5325 free(result_type); 5326 free(reg_params); 5327 5328 5329 SZ_ReleaseHuffman(huffmanTree); 5330 *comp_size = totalEncodeSize; 5331 return result; 5332 } -
TabularUnified thirdparty/SZ/sz/src/sz_double_pwr.c ¶
r2c47b73 r9ee2ce3 24 24 #include "zlib.h" 25 25 #include "rw.h" 26 #include "utility.h" 26 27 27 28 void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, double* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision) … … 1773 1774 free_TightDataPointStorageD(tdps); 1774 1775 } 1776 1777 #include <stdbool.h> 1778 1779 void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, double min, double max){ 1780 1781 double * log_data = (double *) malloc(dataLength * sizeof(double)); 1782 1783 unsigned char * signs = (unsigned char *) malloc(dataLength); 1784 memset(signs, 0, dataLength); 1785 // preprocess 1786 double max_abs_log_data; 1787 if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); 1788 else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); 1789 else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); 1790 double min_log_data = max_abs_log_data; 1791 bool positive = true; 1792 for(size_t i=0; i<dataLength; i++){ 1793 if(oriData[i] < 0){ 1794 signs[i] = 1; 1795 log_data[i] = -oriData[i]; 1796 positive = false; 1797 } 1798 else 1799 log_data[i] = oriData[i]; 1800 if(log_data[i] > 0){ 1801 log_data[i] = log2(log_data[i]); 1802 if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; 1803 if(log_data[i] < min_log_data) min_log_data = log_data[i]; 1804 } 1805 } 1806 1807 double valueRangeSize, medianValue_f; 1808 computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); 1809 if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); 1810 double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; 1811 for(size_t i=0; i<dataLength; i++){ 1812 if(oriData[i] == 0){ 1813 log_data[i] = min_log_data - 2.0001*realPrecision; 1814 } 1815 } 1816 TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ(log_data, dataLength, realPrecision, valueRangeSize, medianValue_f); 1817 tdps->minLogValue = min_log_data - 1.0001*realPrecision; 1818 free(log_data); 1819 if(!positive){ 1820 unsigned char * comp_signs; 1821 // compress signs 1822 unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); 1823 tdps->pwrErrBoundBytes = comp_signs; 1824 tdps->pwrErrBoundBytes_size = signSize; 1825 } 1826 else{ 1827 tdps->pwrErrBoundBytes = NULL; 1828 tdps->pwrErrBoundBytes_size = 0; 1829 } 1830 free(signs); 1831 1832 convertTDPStoFlatBytes_double(tdps, newByteData, outSize); 1833 if(*outSize>dataLength*sizeof(double)) 1834 SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); 1835 1836 free_TightDataPointStorageD(tdps); 1837 } 1838 1839 void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, double min, double max){ 1840 1841 size_t dataLength = r1 * r2; 1842 double * log_data = (double *) malloc(dataLength * sizeof(double)); 1843 1844 unsigned char * signs = (unsigned char *) malloc(dataLength); 1845 memset(signs, 0, dataLength); 1846 // preprocess 1847 double max_abs_log_data; 1848 if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); 1849 else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); 1850 else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); 1851 double min_log_data = max_abs_log_data; 1852 bool positive = true; 1853 for(size_t i=0; i<dataLength; i++){ 1854 if(oriData[i] < 0){ 1855 signs[i] = 1; 1856 log_data[i] = -oriData[i]; 1857 positive = false; 1858 } 1859 else 1860 log_data[i] = oriData[i]; 1861 if(log_data[i] > 0){ 1862 log_data[i] = log2(log_data[i]); 1863 if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; 1864 if(log_data[i] < min_log_data) min_log_data = log_data[i]; 1865 } 1866 } 1867 1868 double valueRangeSize, medianValue_f; 1869 computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); 1870 if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); 1871 double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; 1872 for(size_t i=0; i<dataLength; i++){ 1873 if(oriData[i] == 0){ 1874 log_data[i] = min_log_data - 2.0001*realPrecision; 1875 } 1876 } 1877 TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ(log_data, r1, r2, realPrecision, valueRangeSize, medianValue_f); 1878 tdps->minLogValue = min_log_data - 1.0001*realPrecision; 1879 free(log_data); 1880 1881 if(!positive){ 1882 unsigned char * comp_signs; 1883 // compress signs 1884 unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); 1885 tdps->pwrErrBoundBytes = comp_signs; 1886 tdps->pwrErrBoundBytes_size = signSize; 1887 } 1888 else{ 1889 tdps->pwrErrBoundBytes = NULL; 1890 tdps->pwrErrBoundBytes_size = 0; 1891 } 1892 free(signs); 1893 1894 convertTDPStoFlatBytes_double(tdps, newByteData, outSize); 1895 if(*outSize>dataLength*sizeof(double)) 1896 SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); 1897 1898 free_TightDataPointStorageD(tdps); 1899 } 1900 1901 void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max){ 1902 1903 size_t dataLength = r1 * r2 * r3; 1904 double * log_data = (double *) malloc(dataLength * sizeof(double)); 1905 1906 unsigned char * signs = (unsigned char *) malloc(dataLength); 1907 memset(signs, 0, dataLength); 1908 // preprocess 1909 double max_abs_log_data; 1910 if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); 1911 else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); 1912 else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); 1913 double min_log_data = max_abs_log_data; 1914 bool positive = true; 1915 for(size_t i=0; i<dataLength; i++){ 1916 if(oriData[i] < 0){ 1917 signs[i] = 1; 1918 log_data[i] = -oriData[i]; 1919 positive = false; 1920 } 1921 else 1922 log_data[i] = oriData[i]; 1923 if(log_data[i] > 0){ 1924 log_data[i] = log2(log_data[i]); 1925 if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; 1926 if(log_data[i] < min_log_data) min_log_data = log_data[i]; 1927 } 1928 } 1929 1930 double valueRangeSize, medianValue_f; 1931 computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f); 1932 if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); 1933 double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16; 1934 for(size_t i=0; i<dataLength; i++){ 1935 if(oriData[i] == 0){ 1936 log_data[i] = min_log_data - 2.0001*realPrecision; 1937 } 1938 } 1939 TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ(log_data, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1940 tdps->minLogValue = min_log_data - 1.0001*realPrecision; 1941 free(log_data); 1942 if(!positive){ 1943 unsigned char * comp_signs; 1944 // compress signs 1945 unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); 1946 tdps->pwrErrBoundBytes = comp_signs; 1947 tdps->pwrErrBoundBytes_size = signSize; 1948 } 1949 else{ 1950 tdps->pwrErrBoundBytes = NULL; 1951 tdps->pwrErrBoundBytes_size = 0; 1952 } 1953 free(signs); 1954 1955 convertTDPStoFlatBytes_double(tdps, newByteData, outSize); 1956 if(*outSize>dataLength*sizeof(double)) 1957 SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); 1958 1959 free_TightDataPointStorageD(tdps); 1960 } -
TabularUnified thirdparty/SZ/sz/src/sz_double_ts.c ¶
r2c47b73 r9ee2ce3 67 67 double realPrecision, double valueRangeSize, double medianValue_d) 68 68 { 69 double* preStepData = (double*)(multisteps->hist_data);69 double* preStepData = (double*)(multisteps->hist_data); 70 70 //store the decompressed data 71 71 double* decData = (double*)malloc(sizeof(double)*dataLength); -
TabularUnified thirdparty/SZ/sz/src/sz_float.c ¶
r2c47b73 r9ee2ce3 1 1 /** 2 2 * @file sz_float.c 3 * @author Sheng Di and Dingwen Tao3 * @author Sheng Di, Dingwen Tao, Xin Liang 4 4 * @date Aug, 2016 5 5 * @brief SZ_Init, Compression and Decompression functions … … 26 26 #include "rw.h" 27 27 #include "sz_float_ts.h" 28 #include "utility.h" 28 29 29 30 unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize) … … 406 407 pred = last3CmprsData[0]; 407 408 predAbsErr = fabs(curData - pred); 408 if(predAbsErr< =checkRadius)409 if(predAbsErr<checkRadius) 409 410 { 410 411 state = (predAbsErr/realPrecision+1)/2; … … 1357 1358 } 1358 1359 else 1359 { 1360 tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1360 { 1361 if(sz_with_regression == SZ_NO_REGRESSION) 1362 tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1363 else 1364 *newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize); 1361 1365 compressionType = 0; //snapshot-based compression 1362 1366 multisteps->lastSnapshotStep = timestep; … … 1367 1371 tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1368 1372 1369 1370 convertTDPStoFlatBytes_float(tdps, newByteData, outSize);1371 1372 if(*outSize>dataLength*sizeof(float))1373 SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);1374 1375 free_TightDataPointStorageF(tdps);1376 1373 if(tdps!=NULL) 1374 { 1375 convertTDPStoFlatBytes_float(tdps, newByteData, outSize); 1376 if(*outSize>dataLength*sizeof(float)) 1377 SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize); 1378 free_TightDataPointStorageF(tdps); 1379 } 1380 1377 1381 return compressionType; 1378 1382 } … … 1771 1775 if(errBoundMode>=PW_REL) 1772 1776 { 1773 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max);1774 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);1777 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max); 1778 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize); 1775 1779 } 1776 1780 else … … 1780 1784 { 1781 1785 if(errBoundMode>=PW_REL) 1782 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr (newByteData, oriData, realPrecision, r2, r1, outSize, min, max);1786 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max); 1783 1787 else 1784 1788 SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); … … 1787 1791 { 1788 1792 if(errBoundMode>=PW_REL) 1789 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max);1793 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max); 1790 1794 else 1791 1795 SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); … … 1794 1798 { 1795 1799 if(errBoundMode>=PW_REL) 1796 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max);1800 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max); 1797 1801 else 1798 1802 SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue); … … 1855 1859 if(confparams_cpr->errorBoundMode>=PW_REL) 1856 1860 { 1857 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max); 1858 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, 1859 valueRangeSize, medianValue, &tmpOutSize); 1861 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max); 1862 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize); 1860 1863 } 1861 1864 else … … 1871 1874 { 1872 1875 if(confparams_cpr->errorBoundMode>=PW_REL) 1873 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr (&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max);1876 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max); 1874 1877 else 1875 1878 #ifdef HAVE_TIMECMPR … … 1878 1881 else 1879 1882 #endif 1880 SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1883 { 1884 if(sz_with_regression == SZ_NO_REGRESSION) 1885 SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1886 else 1887 tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); 1888 } 1881 1889 } 1882 1890 else … … 1884 1892 { 1885 1893 if(confparams_cpr->errorBoundMode>=PW_REL) 1886 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max);1894 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max); 1887 1895 else 1888 1896 #ifdef HAVE_TIMECMPR 1889 1897 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 1890 multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);1898 multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1891 1899 else 1892 1900 #endif 1893 SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1901 { 1902 if(sz_with_regression == SZ_NO_REGRESSION) 1903 SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1904 else 1905 tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); 1906 } 1894 1907 } 1895 1908 else … … 1897 1910 { 1898 1911 if(confparams_cpr->errorBoundMode>=PW_REL) 1899 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr (&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max);1912 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max); 1900 1913 //ToDO 1901 1914 //SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr(&tmpByteData, oriData, r4, r3, r2, r1, &tmpOutSize, min, max); … … 1906 1919 else 1907 1920 #endif 1908 SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1921 { 1922 if(sz_with_regression == SZ_NO_REGRESSION) 1923 SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue); 1924 else 1925 tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); 1926 } 1909 1927 } 1910 1928 else … … 1921 1939 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION || confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION) 1922 1940 { 1923 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1941 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1924 1942 free(tmpByteData); 1925 1943 } … … 3375 3393 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3376 3394 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3377 size_t totalSampleSize = 0; //(r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance;3395 size_t totalSampleSize = 0; 3378 3396 3379 3397 size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset … … 3390 3408 { 3391 3409 radiusIndex = confparams_cpr->maxRangeRadius - 1; 3392 //printf("radiusIndex=%d\n", radiusIndex);3393 3410 } 3394 3411 intervals[radiusIndex]++; 3395 // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData);3396 // fflush(stdout);3397 3412 offset_count += confparams_cpr->sampleDistance; 3398 3413 if(offset_count >= r3){ … … 3410 3425 else data_pos += confparams_cpr->sampleDistance; 3411 3426 } 3412 // printf("sample_count: %ld\n", sample_count);3413 // fflush(stdout);3414 // if(*max_freq < 0.15) *max_freq *= 2;3415 3427 //compute the appropriate number 3416 3428 size_t targetCount = totalSampleSize*confparams_cpr->predThreshold; … … 3430 3442 powerOf2 = 32; 3431 3443 free(intervals); 3432 //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);3433 3444 return powerOf2; 3434 3445 } … … 3750 3761 size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t)); 3751 3762 memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t)); 3752 size_t totalSampleSize = 0;//(r1-1)*(r2-1)/confparams_cpr->sampleDistance; 3753 3754 //float max = oriData[0]; 3755 //float min = oriData[0]; 3763 size_t totalSampleSize = 0; 3756 3764 3757 3765 size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset … … 3812 3820 while(data_pos - oriData < dataLength){ 3813 3821 totalSampleSize++; 3814 //pred_value = 2*data_pos[-1] - data_pos[-2];3815 3822 pred_value = data_pos[-1]; 3816 3823 pred_err = fabs(pred_value - *data_pos); … … 3841 3848 3842 3849 free(intervals); 3843 //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);3844 3850 return powerOf2; 3845 3851 } … … 4037 4043 } 4038 4044 4045 /*The above code is for sz 1.4.13; the following code is for sz 2.0*/ 4046 4047 unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) 4048 { 4049 float mean = 0.0; 4050 size_t len = r1 * r2; 4051 size_t mean_distance = (int) (sqrt(len)); 4052 4053 float * data_pos = oriData; 4054 size_t mean_count = 0; 4055 while(data_pos - oriData < len){ 4056 mean += *data_pos; 4057 mean_count ++; 4058 data_pos += mean_distance; 4059 } 4060 if(mean_count > 0) mean /= mean_count; 4061 size_t range = 8192; 4062 size_t radius = 4096; 4063 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 4064 memset(freq_intervals, 0, range*sizeof(size_t)); 4065 4066 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 4067 int sampleDistance = confparams_cpr->sampleDistance; 4068 float predThreshold = confparams_cpr->predThreshold; 4069 4070 size_t i; 4071 size_t radiusIndex; 4072 float pred_value = 0, pred_err; 4073 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 4074 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 4075 4076 float mean_diff; 4077 ptrdiff_t freq_index; 4078 size_t freq_count = 0; 4079 size_t n1_count = 1; 4080 size_t offset_count = sampleDistance - 1; 4081 size_t offset_count_2 = 0; 4082 size_t sample_count = 0; 4083 data_pos = oriData + r2 + offset_count; 4084 while(data_pos - oriData < len){ 4085 pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1]; 4086 pred_err = fabs(pred_value - *data_pos); 4087 if(pred_err < realPrecision) freq_count ++; 4088 radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2); 4089 if(radiusIndex>=maxRangeRadius) 4090 radiusIndex = maxRangeRadius - 1; 4091 intervals[radiusIndex]++; 4092 4093 mean_diff = *data_pos - mean; 4094 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 4095 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 4096 if(freq_index <= 0){ 4097 freq_intervals[0] ++; 4098 } 4099 else if(freq_index >= range){ 4100 freq_intervals[range - 1] ++; 4101 } 4102 else{ 4103 freq_intervals[freq_index] ++; 4104 } 4105 offset_count += sampleDistance; 4106 if(offset_count >= r2){ 4107 n1_count ++; 4108 offset_count_2 = n1_count % sampleDistance; 4109 data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 4110 offset_count = (sampleDistance - offset_count_2); 4111 if(offset_count == 0) offset_count ++; 4112 } 4113 else data_pos += sampleDistance; 4114 sample_count ++; 4115 } 4116 *max_freq = freq_count * 1.0/ sample_count; 4117 4118 //compute the appropriate number 4119 size_t targetCount = sample_count*predThreshold; 4120 size_t sum = 0; 4121 for(i=0;i<maxRangeRadius;i++) 4122 { 4123 sum += intervals[i]; 4124 if(sum>targetCount) 4125 break; 4126 } 4127 if(i>=maxRangeRadius) 4128 i = maxRangeRadius-1; 4129 unsigned int accIntervals = 2*(i+1); 4130 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 4131 4132 if(powerOf2<32) 4133 powerOf2 = 32; 4134 4135 // collect frequency 4136 size_t max_sum = 0; 4137 size_t max_index = 0; 4138 size_t tmp_sum; 4139 size_t * freq_pos = freq_intervals + 1; 4140 for(size_t i=1; i<range-2; i++){ 4141 tmp_sum = freq_pos[0] + freq_pos[1]; 4142 if(tmp_sum > max_sum){ 4143 max_sum = tmp_sum; 4144 max_index = i; 4145 } 4146 freq_pos ++; 4147 } 4148 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 4149 *mean_freq = max_sum * 1.0 / sample_count; 4150 4151 free(freq_intervals); 4152 free(intervals); 4153 return powerOf2; 4154 } 4155 4156 // 2D: modified for higher performance 4157 #define MIN(a, b) a<b? a : b 4158 unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ 4159 4160 unsigned int quantization_intervals; 4161 float sz_sample_correct_freq = -1;//0.5; //-1 4162 float dense_pos; 4163 float mean_flush_freq; 4164 unsigned char use_mean = 0; 4165 4166 if(exe_params->optQuantMode==1) 4167 { 4168 quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 4169 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 4170 updateQuantizationInfo(quantization_intervals); 4171 } 4172 else{ 4173 quantization_intervals = exe_params->intvCapacity; 4174 } 4175 4176 // calculate block dims 4177 size_t num_x, num_y; 4178 size_t block_size = 16; 4179 4180 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 4181 SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 4182 4183 size_t split_index_x, split_index_y; 4184 size_t early_blockcount_x, early_blockcount_y; 4185 size_t late_blockcount_x, late_blockcount_y; 4186 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 4187 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 4188 4189 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y; 4190 size_t num_blocks = num_x * num_y; 4191 size_t num_elements = r1 * r2; 4192 4193 size_t dim0_offset = r2; 4194 4195 int * result_type = (int *) malloc(num_elements * sizeof(int)); 4196 size_t unpred_data_max_size = max_num_block_elements; 4197 float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); 4198 size_t total_unpred = 0; 4199 size_t unpredictable_count; 4200 float * data_pos = oriData; 4201 int * type = result_type; 4202 size_t offset_x, offset_y; 4203 size_t current_blockcount_x, current_blockcount_y; 4204 4205 float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); 4206 float * reg_params_pos = reg_params; 4207 // move regression part out 4208 size_t params_offset_b = num_blocks; 4209 size_t params_offset_c = 2*num_blocks; 4210 for(size_t i=0; i<num_x; i++){ 4211 for(size_t j=0; j<num_y; j++){ 4212 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4213 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4214 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4215 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4216 4217 data_pos = oriData + offset_x * dim0_offset + offset_y; 4218 4219 { 4220 float * cur_data_pos = data_pos; 4221 float fx = 0.0; 4222 float fy = 0.0; 4223 float f = 0; 4224 double sum_x; 4225 float curData; 4226 for(size_t i=0; i<current_blockcount_x; i++){ 4227 sum_x = 0; 4228 for(size_t j=0; j<current_blockcount_y; j++){ 4229 curData = *cur_data_pos; 4230 sum_x += curData; 4231 fy += curData * j; 4232 cur_data_pos ++; 4233 } 4234 fx += sum_x * i; 4235 f += sum_x; 4236 cur_data_pos += dim0_offset - current_blockcount_y; 4237 } 4238 float coeff = 1.0 / (current_blockcount_x * current_blockcount_y); 4239 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 4240 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 4241 reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2); 4242 } 4243 4244 reg_params_pos ++; 4245 } 4246 } 4247 4248 //Compress coefficient arrays 4249 double precision_a, precision_b, precision_c; 4250 float rel_param_err = 0.15/3; 4251 precision_a = rel_param_err * realPrecision / late_blockcount_x; 4252 precision_b = rel_param_err * realPrecision / late_blockcount_y; 4253 precision_c = rel_param_err * realPrecision; 4254 4255 float mean = 0; 4256 use_mean = 0; 4257 if(use_mean){ 4258 // compute mean 4259 double sum = 0.0; 4260 size_t mean_count = 0; 4261 for(size_t i=0; i<num_elements; i++){ 4262 if(fabs(oriData[i] - dense_pos) < realPrecision){ 4263 sum += oriData[i]; 4264 mean_count ++; 4265 } 4266 } 4267 if(mean_count > 0) mean = sum / mean_count; 4268 } 4269 4270 4271 double tmp_realPrecision = realPrecision; 4272 4273 // use two prediction buffers for higher performance 4274 float * unpredictable_data = result_unpredictable_data; 4275 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 4276 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 4277 size_t reg_count = 0; 4278 size_t strip_dim_0 = early_blockcount_x + 1; 4279 size_t strip_dim_1 = r2 + 1; 4280 size_t strip_dim0_offset = strip_dim_1; 4281 unsigned char * indicator_pos = indicator; 4282 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); 4283 float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); 4284 memset(prediction_buffer_1, 0, prediction_buffer_size); 4285 float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); 4286 memset(prediction_buffer_2, 0, prediction_buffer_size); 4287 float * cur_pb_buf = prediction_buffer_1; 4288 float * next_pb_buf = prediction_buffer_2; 4289 float * cur_pb_buf_pos; 4290 float * next_pb_buf_pos; 4291 int intvCapacity = exe_params->intvCapacity; 4292 int intvRadius = exe_params->intvRadius; 4293 int use_reg = 0; 4294 4295 reg_params_pos = reg_params; 4296 // compress the regression coefficients on the fly 4297 float last_coeffcients[3] = {0.0}; 4298 int coeff_intvCapacity_sz = 65536; 4299 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 4300 int * coeff_type[3]; 4301 int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); 4302 float * coeff_unpred_data[3]; 4303 float * coeff_unpredictable_data = (float *) malloc(num_blocks*3*sizeof(float)); 4304 double precision[3]; 4305 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; 4306 for(int i=0; i<3; i++){ 4307 coeff_type[i] = coeff_result_type + i * num_blocks; 4308 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 4309 } 4310 int coeff_index = 0; 4311 unsigned int coeff_unpredictable_count[3] = {0}; 4312 if(use_mean){ 4313 type = result_type; 4314 int intvCapacity_sz = intvCapacity - 2; 4315 for(size_t i=0; i<num_x; i++){ 4316 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4317 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4318 data_pos = oriData + offset_x * dim0_offset; 4319 4320 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 4321 next_pb_buf_pos = next_pb_buf + 1; 4322 float * pb_pos = cur_pb_buf_pos; 4323 float * next_pb_pos = next_pb_buf_pos; 4324 4325 for(size_t j=0; j<num_y; j++){ 4326 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4327 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4328 4329 /*sampling: decide which predictor to use (regression or lorenzo)*/ 4330 { 4331 float * cur_data_pos; 4332 float curData; 4333 float pred_reg, pred_sz; 4334 float err_sz = 0.0, err_reg = 0.0; 4335 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 4336 // [1, 9] [3, 7] [7, 3] [9, 1] 4337 int count = 0; 4338 for(int i=1; i<current_blockcount_x; i+=2){ 4339 cur_data_pos = data_pos + i * dim0_offset + i; 4340 curData = *cur_data_pos; 4341 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4342 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 4343 4344 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 4345 4346 err_reg += fabs(pred_reg - curData); 4347 4348 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 4349 curData = *cur_data_pos; 4350 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4351 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 4352 err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData)); 4353 4354 err_reg += fabs(pred_reg - curData); 4355 4356 count += 2; 4357 } 4358 4359 use_reg = (err_reg < err_sz); 4360 } 4361 if(use_reg) 4362 { 4363 { 4364 /*predict coefficients in current block via previous reg_block*/ 4365 float cur_coeff; 4366 double diff, itvNum; 4367 for(int e=0; e<3; e++){ 4368 cur_coeff = reg_params_pos[e*num_blocks]; 4369 diff = cur_coeff - last_coeffcients[e]; 4370 itvNum = fabs(diff)/precision[e] + 1; 4371 if (itvNum < coeff_intvCapacity_sz){ 4372 if (diff < 0) itvNum = -itvNum; 4373 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4374 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4375 //ganrantee comporession error against the case of machine-epsilon 4376 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4377 coeff_type[e][coeff_index] = 0; 4378 last_coeffcients[e] = cur_coeff; 4379 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4380 } 4381 } 4382 else{ 4383 coeff_type[e][coeff_index] = 0; 4384 last_coeffcients[e] = cur_coeff; 4385 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4386 } 4387 } 4388 coeff_index ++; 4389 } 4390 float curData; 4391 float pred; 4392 double itvNum; 4393 double diff; 4394 size_t index = 0; 4395 size_t block_unpredictable_count = 0; 4396 float * cur_data_pos = data_pos; 4397 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4398 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4399 curData = *cur_data_pos; 4400 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4401 diff = curData - pred; 4402 itvNum = fabs(diff)/realPrecision + 1; 4403 if (itvNum < intvCapacity){ 4404 if (diff < 0) itvNum = -itvNum; 4405 type[index] = (int) (itvNum/2) + intvRadius; 4406 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4407 //ganrantee comporession error against the case of machine-epsilon 4408 if(fabs(curData - pred)>realPrecision){ 4409 type[index] = 0; 4410 pred = curData; 4411 unpredictable_data[block_unpredictable_count ++] = curData; 4412 } 4413 } 4414 else{ 4415 type[index] = 0; 4416 pred = curData; 4417 unpredictable_data[block_unpredictable_count ++] = curData; 4418 } 4419 index ++; 4420 cur_data_pos ++; 4421 } 4422 /*dealing with the last jj (boundary)*/ 4423 { 4424 size_t jj = current_blockcount_y - 1; 4425 curData = *cur_data_pos; 4426 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4427 diff = curData - pred; 4428 itvNum = fabs(diff)/realPrecision + 1; 4429 if (itvNum < intvCapacity){ 4430 if (diff < 0) itvNum = -itvNum; 4431 type[index] = (int) (itvNum/2) + intvRadius; 4432 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4433 //ganrantee comporession error against the case of machine-epsilon 4434 if(fabs(curData - pred)>realPrecision){ 4435 type[index] = 0; 4436 pred = curData; 4437 unpredictable_data[block_unpredictable_count ++] = curData; 4438 } 4439 } 4440 else{ 4441 type[index] = 0; 4442 pred = curData; 4443 unpredictable_data[block_unpredictable_count ++] = curData; 4444 } 4445 4446 // assign value to block surfaces 4447 pb_pos[ii * strip_dim0_offset + jj] = pred; 4448 index ++; 4449 cur_data_pos ++; 4450 } 4451 cur_data_pos += dim0_offset - current_blockcount_y; 4452 } 4453 /*dealing with the last ii (boundary)*/ 4454 { 4455 size_t ii = current_blockcount_x - 1; 4456 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4457 curData = *cur_data_pos; 4458 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4459 diff = curData - pred; 4460 itvNum = fabs(diff)/realPrecision + 1; 4461 if (itvNum < intvCapacity){ 4462 if (diff < 0) itvNum = -itvNum; 4463 type[index] = (int) (itvNum/2) + intvRadius; 4464 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4465 //ganrantee comporession error against the case of machine-epsilon 4466 if(fabs(curData - pred)>realPrecision){ 4467 type[index] = 0; 4468 pred = curData; 4469 unpredictable_data[block_unpredictable_count ++] = curData; 4470 } 4471 } 4472 else{ 4473 type[index] = 0; 4474 pred = curData; 4475 unpredictable_data[block_unpredictable_count ++] = curData; 4476 } 4477 // assign value to next prediction buffer 4478 next_pb_pos[jj] = pred; 4479 index ++; 4480 cur_data_pos ++; 4481 } 4482 /*dealing with the last jj (boundary)*/ 4483 { 4484 size_t jj = current_blockcount_y - 1; 4485 curData = *cur_data_pos; 4486 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4487 diff = curData - pred; 4488 itvNum = fabs(diff)/realPrecision + 1; 4489 if (itvNum < intvCapacity){ 4490 if (diff < 0) itvNum = -itvNum; 4491 type[index] = (int) (itvNum/2) + intvRadius; 4492 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4493 //ganrantee comporession error against the case of machine-epsilon 4494 if(fabs(curData - pred)>realPrecision){ 4495 type[index] = 0; 4496 pred = curData; 4497 unpredictable_data[block_unpredictable_count ++] = curData; 4498 } 4499 } 4500 else{ 4501 type[index] = 0; 4502 pred = curData; 4503 unpredictable_data[block_unpredictable_count ++] = curData; 4504 } 4505 4506 // assign value to block surfaces 4507 pb_pos[ii * strip_dim0_offset + jj] = pred; 4508 // assign value to next prediction buffer 4509 next_pb_pos[jj] = pred; 4510 4511 index ++; 4512 cur_data_pos ++; 4513 } 4514 } // end ii == -1 4515 unpredictable_count = block_unpredictable_count; 4516 total_unpred += unpredictable_count; 4517 unpredictable_data += unpredictable_count; 4518 reg_count ++; 4519 }// end use_reg 4520 else{ 4521 // use SZ 4522 // SZ predication 4523 unpredictable_count = 0; 4524 float * cur_pb_pos = pb_pos; 4525 float * cur_data_pos = data_pos; 4526 float curData; 4527 float pred2D; 4528 double itvNum, diff; 4529 size_t index = 0; 4530 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4531 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4532 curData = *cur_data_pos; 4533 if(fabs(curData - mean) <= realPrecision){ 4534 // adjust type[index] to intvRadius for coherence with freq in reg 4535 type[index] = intvRadius; 4536 *cur_pb_pos = mean; 4537 } 4538 else 4539 { 4540 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4541 diff = curData - pred2D; 4542 itvNum = fabs(diff)/realPrecision + 1; 4543 if (itvNum < intvCapacity_sz){ 4544 if (diff < 0) itvNum = -itvNum; 4545 type[index] = (int) (itvNum/2) + intvRadius; 4546 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4547 if(type[index] <= intvRadius) type[index] -= 1; 4548 //ganrantee comporession error against the case of machine-epsilon 4549 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4550 type[index] = 0; 4551 *cur_pb_pos = curData; 4552 unpredictable_data[unpredictable_count ++] = curData; 4553 } 4554 } 4555 else{ 4556 type[index] = 0; 4557 *cur_pb_pos = curData; 4558 unpredictable_data[unpredictable_count ++] = curData; 4559 } 4560 } 4561 index ++; 4562 cur_pb_pos ++; 4563 cur_data_pos ++; 4564 } 4565 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 4566 cur_data_pos += dim0_offset - current_blockcount_y; 4567 } 4568 /*dealing with the last ii (boundary)*/ 4569 { 4570 // ii == current_blockcount_x - 1 4571 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4572 curData = *cur_data_pos; 4573 if(fabs(curData - mean) <= realPrecision){ 4574 // adjust type[index] to intvRadius for coherence with freq in reg 4575 type[index] = intvRadius; 4576 *cur_pb_pos = mean; 4577 } 4578 else 4579 { 4580 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4581 diff = curData - pred2D; 4582 itvNum = fabs(diff)/realPrecision + 1; 4583 if (itvNum < intvCapacity_sz){ 4584 if (diff < 0) itvNum = -itvNum; 4585 type[index] = (int) (itvNum/2) + intvRadius; 4586 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4587 if(type[index] <= intvRadius) type[index] -= 1; 4588 //ganrantee comporession error against the case of machine-epsilon 4589 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4590 type[index] = 0; 4591 *cur_pb_pos = curData; 4592 unpredictable_data[unpredictable_count ++] = curData; 4593 } 4594 } 4595 else{ 4596 type[index] = 0; 4597 *cur_pb_pos = curData; 4598 unpredictable_data[unpredictable_count ++] = curData; 4599 } 4600 } 4601 next_pb_pos[jj] = *cur_pb_pos; 4602 index ++; 4603 cur_pb_pos ++; 4604 cur_data_pos ++; 4605 } 4606 } 4607 total_unpred += unpredictable_count; 4608 unpredictable_data += unpredictable_count; 4609 // change indicator 4610 indicator_pos[j] = 1; 4611 }// end SZ 4612 reg_params_pos ++; 4613 data_pos += current_blockcount_y; 4614 pb_pos += current_blockcount_y; 4615 next_pb_pos += current_blockcount_y; 4616 type += current_blockcount_x * current_blockcount_y; 4617 }// end j 4618 indicator_pos += num_y; 4619 float * tmp; 4620 tmp = cur_pb_buf; 4621 cur_pb_buf = next_pb_buf; 4622 next_pb_buf = tmp; 4623 }// end i 4624 }// end use mean 4625 else{ 4626 type = result_type; 4627 int intvCapacity_sz = intvCapacity - 2; 4628 for(size_t i=0; i<num_x; i++){ 4629 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4630 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4631 data_pos = oriData + offset_x * dim0_offset; 4632 4633 cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1; 4634 next_pb_buf_pos = next_pb_buf + 1; 4635 float * pb_pos = cur_pb_buf_pos; 4636 float * next_pb_pos = next_pb_buf_pos; 4637 4638 for(size_t j=0; j<num_y; j++){ 4639 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4640 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4641 /*sampling*/ 4642 { 4643 // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i] 4644 float * cur_data_pos; 4645 float curData; 4646 float pred_reg, pred_sz; 4647 float err_sz = 0.0, err_reg = 0.0; 4648 // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9] 4649 // [1, 9] [3, 7] [7, 3] [9, 1] 4650 int count = 0; 4651 for(int i=1; i<current_blockcount_x; i+=2){ 4652 cur_data_pos = data_pos + i * dim0_offset + i; 4653 curData = *cur_data_pos; 4654 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4655 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c]; 4656 err_sz += fabs(pred_sz - curData); 4657 err_reg += fabs(pred_reg - curData); 4658 4659 cur_data_pos = data_pos + i * dim0_offset + (block_size - i); 4660 curData = *cur_data_pos; 4661 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1]; 4662 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c]; 4663 err_sz += fabs(pred_sz - curData); 4664 err_reg += fabs(pred_reg - curData); 4665 4666 count += 2; 4667 } 4668 err_sz += realPrecision * count * 0.81; 4669 use_reg = (err_reg < err_sz); 4670 4671 } 4672 if(use_reg) 4673 { 4674 { 4675 /*predict coefficients in current block via previous reg_block*/ 4676 float cur_coeff; 4677 double diff, itvNum; 4678 for(int e=0; e<3; e++){ 4679 cur_coeff = reg_params_pos[e*num_blocks]; 4680 diff = cur_coeff - last_coeffcients[e]; 4681 itvNum = fabs(diff)/precision[e] + 1; 4682 if (itvNum < coeff_intvCapacity_sz){ 4683 if (diff < 0) itvNum = -itvNum; 4684 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 4685 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 4686 //ganrantee comporession error against the case of machine-epsilon 4687 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 4688 coeff_type[e][coeff_index] = 0; 4689 last_coeffcients[e] = cur_coeff; 4690 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4691 } 4692 } 4693 else{ 4694 coeff_type[e][coeff_index] = 0; 4695 last_coeffcients[e] = cur_coeff; 4696 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 4697 } 4698 } 4699 coeff_index ++; 4700 } 4701 float curData; 4702 float pred; 4703 double itvNum; 4704 double diff; 4705 size_t index = 0; 4706 size_t block_unpredictable_count = 0; 4707 float * cur_data_pos = data_pos; 4708 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4709 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4710 curData = *cur_data_pos; 4711 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4712 diff = curData - pred; 4713 itvNum = fabs(diff)/realPrecision + 1; 4714 if (itvNum < intvCapacity){ 4715 if (diff < 0) itvNum = -itvNum; 4716 type[index] = (int) (itvNum/2) + intvRadius; 4717 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4718 //ganrantee comporession error against the case of machine-epsilon 4719 if(fabs(curData - pred)>realPrecision){ 4720 type[index] = 0; 4721 pred = curData; 4722 unpredictable_data[block_unpredictable_count ++] = curData; 4723 } 4724 } 4725 else{ 4726 type[index] = 0; 4727 pred = curData; 4728 unpredictable_data[block_unpredictable_count ++] = curData; 4729 } 4730 index ++; 4731 cur_data_pos ++; 4732 } 4733 /*dealing with the last jj (boundary)*/ 4734 { 4735 // jj == current_blockcount_y - 1 4736 size_t jj = current_blockcount_y - 1; 4737 curData = *cur_data_pos; 4738 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4739 diff = curData - pred; 4740 itvNum = fabs(diff)/realPrecision + 1; 4741 if (itvNum < intvCapacity){ 4742 if (diff < 0) itvNum = -itvNum; 4743 type[index] = (int) (itvNum/2) + intvRadius; 4744 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4745 //ganrantee comporession error against the case of machine-epsilon 4746 if(fabs(curData - pred)>realPrecision){ 4747 type[index] = 0; 4748 pred = curData; 4749 unpredictable_data[block_unpredictable_count ++] = curData; 4750 } 4751 } 4752 else{ 4753 type[index] = 0; 4754 pred = curData; 4755 unpredictable_data[block_unpredictable_count ++] = curData; 4756 } 4757 4758 // assign value to block surfaces 4759 pb_pos[ii * strip_dim0_offset + jj] = pred; 4760 index ++; 4761 cur_data_pos ++; 4762 } 4763 cur_data_pos += dim0_offset - current_blockcount_y; 4764 } 4765 /*dealing with the last ii (boundary)*/ 4766 { 4767 size_t ii = current_blockcount_x - 1; 4768 for(size_t jj=0; jj<current_blockcount_y - 1; jj++){ 4769 curData = *cur_data_pos; 4770 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4771 diff = curData - pred; 4772 itvNum = fabs(diff)/realPrecision + 1; 4773 if (itvNum < intvCapacity){ 4774 if (diff < 0) itvNum = -itvNum; 4775 type[index] = (int) (itvNum/2) + intvRadius; 4776 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4777 //ganrantee comporession error against the case of machine-epsilon 4778 if(fabs(curData - pred)>realPrecision){ 4779 type[index] = 0; 4780 pred = curData; 4781 unpredictable_data[block_unpredictable_count ++] = curData; 4782 } 4783 } 4784 else{ 4785 type[index] = 0; 4786 pred = curData; 4787 unpredictable_data[block_unpredictable_count ++] = curData; 4788 } 4789 // assign value to next prediction buffer 4790 next_pb_pos[jj] = pred; 4791 index ++; 4792 cur_data_pos ++; 4793 } 4794 /*dealing with the last jj (boundary)*/ 4795 { 4796 // jj == current_blockcount_y - 1 4797 size_t jj = current_blockcount_y - 1; 4798 curData = *cur_data_pos; 4799 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; 4800 diff = curData - pred; 4801 itvNum = fabs(diff)/realPrecision + 1; 4802 if (itvNum < intvCapacity){ 4803 if (diff < 0) itvNum = -itvNum; 4804 type[index] = (int) (itvNum/2) + intvRadius; 4805 pred = pred + 2 * (type[index] - intvRadius) * realPrecision; 4806 //ganrantee comporession error against the case of machine-epsilon 4807 if(fabs(curData - pred)>realPrecision){ 4808 type[index] = 0; 4809 pred = curData; 4810 unpredictable_data[block_unpredictable_count ++] = curData; 4811 } 4812 } 4813 else{ 4814 type[index] = 0; 4815 pred = curData; 4816 unpredictable_data[block_unpredictable_count ++] = curData; 4817 } 4818 4819 // assign value to block surfaces 4820 pb_pos[ii * strip_dim0_offset + jj] = pred; 4821 // assign value to next prediction buffer 4822 next_pb_pos[jj] = pred; 4823 4824 index ++; 4825 cur_data_pos ++; 4826 } 4827 } // end ii == -1 4828 unpredictable_count = block_unpredictable_count; 4829 total_unpred += unpredictable_count; 4830 unpredictable_data += unpredictable_count; 4831 reg_count ++; 4832 }// end use_reg 4833 else{ 4834 // use SZ 4835 // SZ predication 4836 unpredictable_count = 0; 4837 float * cur_pb_pos = pb_pos; 4838 float * cur_data_pos = data_pos; 4839 float curData; 4840 float pred2D; 4841 double itvNum, diff; 4842 size_t index = 0; 4843 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 4844 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4845 curData = *cur_data_pos; 4846 4847 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4848 diff = curData - pred2D; 4849 itvNum = fabs(diff)/realPrecision + 1; 4850 if (itvNum < intvCapacity_sz){ 4851 if (diff < 0) itvNum = -itvNum; 4852 type[index] = (int) (itvNum/2) + intvRadius; 4853 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4854 //ganrantee comporession error against the case of machine-epsilon 4855 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4856 type[index] = 0; 4857 *cur_pb_pos = curData; 4858 unpredictable_data[unpredictable_count ++] = curData; 4859 } 4860 } 4861 else{ 4862 type[index] = 0; 4863 *cur_pb_pos = curData; 4864 unpredictable_data[unpredictable_count ++] = curData; 4865 } 4866 4867 index ++; 4868 cur_pb_pos ++; 4869 cur_data_pos ++; 4870 } 4871 cur_pb_pos += strip_dim0_offset - current_blockcount_y; 4872 cur_data_pos += dim0_offset - current_blockcount_y; 4873 } 4874 /*dealing with the last ii (boundary)*/ 4875 { 4876 // ii == current_blockcount_x - 1 4877 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4878 curData = *cur_data_pos; 4879 4880 pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; 4881 diff = curData - pred2D; 4882 itvNum = fabs(diff)/realPrecision + 1; 4883 if (itvNum < intvCapacity_sz){ 4884 if (diff < 0) itvNum = -itvNum; 4885 type[index] = (int) (itvNum/2) + intvRadius; 4886 *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 4887 //ganrantee comporession error against the case of machine-epsilon 4888 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 4889 type[index] = 0; 4890 *cur_pb_pos = curData; 4891 unpredictable_data[unpredictable_count ++] = curData; 4892 } 4893 } 4894 else{ 4895 type[index] = 0; 4896 *cur_pb_pos = curData; 4897 unpredictable_data[unpredictable_count ++] = curData; 4898 } 4899 next_pb_pos[jj] = *cur_pb_pos; 4900 index ++; 4901 cur_pb_pos ++; 4902 cur_data_pos ++; 4903 } 4904 } 4905 total_unpred += unpredictable_count; 4906 unpredictable_data += unpredictable_count; 4907 // change indicator 4908 indicator_pos[j] = 1; 4909 }// end SZ 4910 reg_params_pos ++; 4911 data_pos += current_blockcount_y; 4912 pb_pos += current_blockcount_y; 4913 next_pb_pos += current_blockcount_y; 4914 type += current_blockcount_x * current_blockcount_y; 4915 }// end j 4916 indicator_pos += num_y; 4917 float * tmp; 4918 tmp = cur_pb_buf; 4919 cur_pb_buf = next_pb_buf; 4920 next_pb_buf = tmp; 4921 }// end i 4922 } 4923 free(prediction_buffer_1); 4924 free(prediction_buffer_2); 4925 4926 int stateNum = 2*quantization_intervals; 4927 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4928 4929 size_t nodeCount = 0; 4930 size_t i = 0; 4931 init(huffmanTree, result_type, num_elements); 4932 for (i = 0; i < stateNum; i++) 4933 if (huffmanTree->code[i]) nodeCount++; 4934 nodeCount = nodeCount*2-1; 4935 4936 unsigned char *treeBytes; 4937 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4938 4939 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 4940 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 4941 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); 4942 unsigned char * result_pos = result; 4943 initRandomAccessBytes(result_pos); 4944 result_pos += meta_data_offset; 4945 4946 sizeToBytes(result_pos, num_elements); 4947 result_pos += exe_params->SZ_SIZE_TYPE; 4948 4949 intToBytes_bigEndian(result_pos, block_size); 4950 result_pos += sizeof(int); 4951 doubleToBytes(result_pos, realPrecision); 4952 result_pos += sizeof(double); 4953 intToBytes_bigEndian(result_pos, quantization_intervals); 4954 result_pos += sizeof(int); 4955 intToBytes_bigEndian(result_pos, treeByteSize); 4956 result_pos += sizeof(int); 4957 intToBytes_bigEndian(result_pos, nodeCount); 4958 result_pos += sizeof(int); 4959 memcpy(result_pos, treeBytes, treeByteSize); 4960 result_pos += treeByteSize; 4961 free(treeBytes); 4962 4963 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 4964 result_pos += sizeof(unsigned char); 4965 memcpy(result_pos, &mean, sizeof(float)); 4966 result_pos += sizeof(float); 4967 4968 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 4969 result_pos += indicator_size; 4970 4971 //convert the lead/mid/resi to byte stream 4972 if(reg_count>0){ 4973 for(int e=0; e<3; e++){ 4974 int stateNum = 2*coeff_intvCapacity_sz; 4975 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4976 size_t nodeCount = 0; 4977 init(huffmanTree, coeff_type[e], reg_count); 4978 size_t i = 0; 4979 for (i = 0; i < huffmanTree->stateNum; i++) 4980 if (huffmanTree->code[i]) nodeCount++; 4981 nodeCount = nodeCount*2-1; 4982 unsigned char *treeBytes; 4983 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 4984 doubleToBytes(result_pos, precision[e]); 4985 result_pos += sizeof(double); 4986 intToBytes_bigEndian(result_pos, coeff_intvRadius); 4987 result_pos += sizeof(int); 4988 intToBytes_bigEndian(result_pos, treeByteSize); 4989 result_pos += sizeof(int); 4990 intToBytes_bigEndian(result_pos, nodeCount); 4991 result_pos += sizeof(int); 4992 memcpy(result_pos, treeBytes, treeByteSize); 4993 result_pos += treeByteSize; 4994 free(treeBytes); 4995 size_t typeArray_size = 0; 4996 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 4997 sizeToBytes(result_pos, typeArray_size); 4998 result_pos += sizeof(size_t) + typeArray_size; 4999 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 5000 result_pos += sizeof(int); 5001 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); 5002 result_pos += coeff_unpredictable_count[e]*sizeof(float); 5003 SZ_ReleaseHuffman(huffmanTree); 5004 } 5005 } 5006 free(coeff_result_type); 5007 free(coeff_unpredictable_data); 5008 5009 //record the number of unpredictable data and also store them 5010 memcpy(result_pos, &total_unpred, sizeof(size_t)); 5011 result_pos += sizeof(size_t); 5012 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); 5013 result_pos += total_unpred * sizeof(float); 5014 size_t typeArray_size = 0; 5015 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 5016 result_pos += typeArray_size; 5017 5018 size_t totalEncodeSize = result_pos - result; 5019 free(indicator); 5020 free(result_unpredictable_data); 5021 free(result_type); 5022 free(reg_params); 5023 5024 SZ_ReleaseHuffman(huffmanTree); 5025 *comp_size = totalEncodeSize; 5026 5027 return result; 5028 } 5029 5030 unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq) 5031 { 5032 float mean = 0.0; 5033 size_t len = r1 * r2 * r3; 5034 size_t mean_distance = (int) (sqrt(len)); 5035 float * data_pos = oriData; 5036 size_t offset_count = 0; 5037 size_t offset_count_2 = 0; 5038 size_t mean_count = 0; 5039 while(data_pos - oriData < len){ 5040 mean += *data_pos; 5041 mean_count ++; 5042 data_pos += mean_distance; 5043 offset_count += mean_distance; 5044 offset_count_2 += mean_distance; 5045 if(offset_count >= r3){ 5046 offset_count = 0; 5047 data_pos -= 1; 5048 } 5049 if(offset_count_2 >= r2 * r3){ 5050 offset_count_2 = 0; 5051 data_pos -= 1; 5052 } 5053 } 5054 if(mean_count > 0) mean /= mean_count; 5055 size_t range = 8192; 5056 size_t radius = 4096; 5057 size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t)); 5058 memset(freq_intervals, 0, range*sizeof(size_t)); 5059 5060 unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius; 5061 int sampleDistance = confparams_cpr->sampleDistance; 5062 float predThreshold = confparams_cpr->predThreshold; 5063 5064 size_t i; 5065 size_t radiusIndex; 5066 size_t r23=r2*r3; 5067 float pred_value = 0, pred_err; 5068 size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t)); 5069 memset(intervals, 0, maxRangeRadius*sizeof(size_t)); 5070 5071 float mean_diff; 5072 ptrdiff_t freq_index; 5073 size_t freq_count = 0; 5074 size_t sample_count = 0; 5075 5076 offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset 5077 data_pos = oriData + r23 + r3 + offset_count; 5078 size_t n1_count = 1, n2_count = 1; // count i,j sum 5079 5080 while(data_pos - oriData < len){ 5081 5082 pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1]; 5083 pred_err = fabs(pred_value - *data_pos); 5084 if(pred_err < realPrecision) freq_count ++; 5085 radiusIndex = (pred_err/realPrecision+1)/2; 5086 if(radiusIndex>=maxRangeRadius) 5087 { 5088 radiusIndex = maxRangeRadius - 1; 5089 } 5090 intervals[radiusIndex]++; 5091 5092 mean_diff = *data_pos - mean; 5093 if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius; 5094 else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius; 5095 if(freq_index <= 0){ 5096 freq_intervals[0] ++; 5097 } 5098 else if(freq_index >= range){ 5099 freq_intervals[range - 1] ++; 5100 } 5101 else{ 5102 freq_intervals[freq_index] ++; 5103 } 5104 offset_count += sampleDistance; 5105 if(offset_count >= r3){ 5106 n2_count ++; 5107 if(n2_count == r2){ 5108 n1_count ++; 5109 n2_count = 1; 5110 data_pos += r3; 5111 } 5112 offset_count_2 = (n1_count + n2_count) % sampleDistance; 5113 data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2); 5114 offset_count = (sampleDistance - offset_count_2); 5115 if(offset_count == 0) offset_count ++; 5116 } 5117 else data_pos += sampleDistance; 5118 sample_count ++; 5119 } 5120 *max_freq = freq_count * 1.0/ sample_count; 5121 5122 //compute the appropriate number 5123 size_t targetCount = sample_count*predThreshold; 5124 size_t sum = 0; 5125 for(i=0;i<maxRangeRadius;i++) 5126 { 5127 sum += intervals[i]; 5128 if(sum>targetCount) 5129 break; 5130 } 5131 if(i>=maxRangeRadius) 5132 i = maxRangeRadius-1; 5133 unsigned int accIntervals = 2*(i+1); 5134 unsigned int powerOf2 = roundUpToPowerOf2(accIntervals); 5135 5136 if(powerOf2<32) 5137 powerOf2 = 32; 5138 // collect frequency 5139 size_t max_sum = 0; 5140 size_t max_index = 0; 5141 size_t tmp_sum; 5142 size_t * freq_pos = freq_intervals + 1; 5143 for(size_t i=1; i<range-2; i++){ 5144 tmp_sum = freq_pos[0] + freq_pos[1]; 5145 if(tmp_sum > max_sum){ 5146 max_sum = tmp_sum; 5147 max_index = i; 5148 } 5149 freq_pos ++; 5150 } 5151 *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius); 5152 *mean_freq = max_sum * 1.0 / sample_count; 5153 5154 free(freq_intervals); 5155 free(intervals); 5156 return powerOf2; 5157 } 5158 5159 5160 // 3D: modified for higher performance 5161 unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ 5162 5163 #ifdef HAVE_TIMECMPR 5164 float* decData = NULL; 5165 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5166 decData = (float*)(multisteps->hist_data); 5167 #endif 5168 5169 unsigned int quantization_intervals; 5170 float sz_sample_correct_freq = -1;//0.5; //-1 5171 float dense_pos; 5172 float mean_flush_freq; 5173 unsigned char use_mean = 0; 5174 5175 // calculate block dims 5176 size_t num_x, num_y, num_z; 5177 size_t block_size = 6; 5178 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 5179 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 5180 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); 5181 5182 size_t split_index_x, split_index_y, split_index_z; 5183 size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; 5184 size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; 5185 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 5186 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 5187 SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); 5188 5189 size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; 5190 size_t num_blocks = num_x * num_y * num_z; 5191 size_t num_elements = r1 * r2 * r3; 5192 5193 size_t dim0_offset = r2 * r3; 5194 size_t dim1_offset = r3; 5195 5196 int * result_type = (int *) malloc(num_elements * sizeof(int)); 5197 size_t unpred_data_max_size = max_num_block_elements; 5198 float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); 5199 size_t total_unpred = 0; 5200 size_t unpredictable_count; 5201 size_t max_unpred_count = 0; 5202 float * data_pos = oriData; 5203 int * type = result_type; 5204 size_t type_offset; 5205 size_t offset_x, offset_y, offset_z; 5206 size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; 5207 5208 float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); 5209 float * reg_params_pos = reg_params; 5210 // move regression part out 5211 size_t params_offset_b = num_blocks; 5212 size_t params_offset_c = 2*num_blocks; 5213 size_t params_offset_d = 3*num_blocks; 5214 for(size_t i=0; i<num_x; i++){ 5215 for(size_t j=0; j<num_y; j++){ 5216 for(size_t k=0; k<num_z; k++){ 5217 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 5218 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 5219 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 5220 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 5221 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 5222 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 5223 5224 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 5225 /*Calculate regression coefficients*/ 5226 { 5227 float * cur_data_pos = data_pos; 5228 float fx = 0.0; 5229 float fy = 0.0; 5230 float fz = 0.0; 5231 float f = 0; 5232 float sum_x, sum_y; 5233 float curData; 5234 for(size_t i=0; i<current_blockcount_x; i++){ 5235 sum_x = 0; 5236 for(size_t j=0; j<current_blockcount_y; j++){ 5237 sum_y = 0; 5238 for(size_t k=0; k<current_blockcount_z; k++){ 5239 curData = *cur_data_pos; 5240 // f += curData; 5241 // fx += curData * i; 5242 // fy += curData * j; 5243 // fz += curData * k; 5244 sum_y += curData; 5245 fz += curData * k; 5246 cur_data_pos ++; 5247 } 5248 fy += sum_y * j; 5249 sum_x += sum_y; 5250 cur_data_pos += dim1_offset - current_blockcount_z; 5251 } 5252 fx += sum_x * i; 5253 f += sum_x; 5254 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5255 } 5256 float coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z); 5257 reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1); 5258 reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1); 5259 reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1); 5260 reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2); 5261 } 5262 reg_params_pos ++; 5263 } 5264 } 5265 } 5266 5267 //Compress coefficient arrays 5268 double precision_a, precision_b, precision_c, precision_d; 5269 float rel_param_err = 0.025; 5270 precision_a = rel_param_err * realPrecision / late_blockcount_x; 5271 precision_b = rel_param_err * realPrecision / late_blockcount_y; 5272 precision_c = rel_param_err * realPrecision / late_blockcount_z; 5273 precision_d = rel_param_err * realPrecision; 5274 5275 if(exe_params->optQuantMode==1) 5276 { 5277 quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 5278 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 5279 updateQuantizationInfo(quantization_intervals); 5280 } 5281 else{ 5282 quantization_intervals = exe_params->intvCapacity; 5283 } 5284 5285 float mean = 0; 5286 if(use_mean){ 5287 // compute mean 5288 double sum = 0.0; 5289 size_t mean_count = 0; 5290 for(size_t i=0; i<num_elements; i++){ 5291 if(fabs(oriData[i] - dense_pos) < realPrecision){ 5292 sum += oriData[i]; 5293 mean_count ++; 5294 } 5295 } 5296 if(mean_count > 0) mean = sum / mean_count; 5297 } 5298 5299 double tmp_realPrecision = realPrecision; 5300 5301 // use two prediction buffers for higher performance 5302 float * unpredictable_data = result_unpredictable_data; 5303 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 5304 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 5305 size_t reg_count = 0; 5306 size_t strip_dim_0 = early_blockcount_x + 1; 5307 size_t strip_dim_1 = r2 + 1; 5308 size_t strip_dim_2 = r3 + 1; 5309 size_t strip_dim0_offset = strip_dim_1 * strip_dim_2; 5310 size_t strip_dim1_offset = strip_dim_2; 5311 unsigned char * indicator_pos = indicator; 5312 5313 size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float); 5314 float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size); 5315 memset(prediction_buffer_1, 0, prediction_buffer_size); 5316 float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size); 5317 memset(prediction_buffer_2, 0, prediction_buffer_size); 5318 float * cur_pb_buf = prediction_buffer_1; 5319 float * next_pb_buf = prediction_buffer_2; 5320 float * cur_pb_buf_pos; 5321 float * next_pb_buf_pos; 5322 int intvCapacity = exe_params->intvCapacity; 5323 int intvRadius = exe_params->intvRadius; 5324 int use_reg = 0; 5325 float noise = realPrecision * 1.22; 5326 5327 reg_params_pos = reg_params; 5328 // compress the regression coefficients on the fly 5329 float last_coeffcients[4] = {0.0}; 5330 int coeff_intvCapacity_sz = 65536; 5331 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 5332 int * coeff_type[4]; 5333 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 5334 float * coeff_unpred_data[4]; 5335 float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); 5336 double precision[4]; 5337 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; 5338 for(int i=0; i<4; i++){ 5339 coeff_type[i] = coeff_result_type + i * num_blocks; 5340 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 5341 } 5342 int coeff_index = 0; 5343 unsigned int coeff_unpredictable_count[4] = {0}; 5344 5345 if(use_mean){ 5346 int intvCapacity_sz = intvCapacity - 2; 5347 for(size_t i=0; i<num_x; i++){ 5348 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 5349 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 5350 for(size_t j=0; j<num_y; j++){ 5351 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 5352 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 5353 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 5354 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 5355 type = result_type + type_offset; 5356 5357 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 5358 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 5359 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 5360 5361 size_t current_blockcount_z; 5362 float * pb_pos = cur_pb_buf_pos; 5363 float * next_pb_pos = next_pb_buf_pos; 5364 size_t strip_unpredictable_count = 0; 5365 for(size_t k=0; k<num_z; k++){ 5366 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 5367 #ifdef HAVE_TIMECMPR 5368 size_t offset_z = 0; 5369 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 5370 size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 5371 #endif 5372 /*sampling and decide which predictor*/ 5373 { 5374 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 5375 float * cur_data_pos; 5376 float curData; 5377 float pred_reg, pred_sz; 5378 float err_sz = 0.0, err_reg = 0.0; 5379 int bmi = 0; 5380 if(i>0 && j>0 && k>0){ 5381 for(int i=0; i<block_size; i++){ 5382 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5383 curData = *cur_data_pos; 5384 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5385 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5386 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5387 err_reg += fabs(pred_reg - curData); 5388 5389 bmi = block_size - i; 5390 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5391 curData = *cur_data_pos; 5392 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5393 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5394 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5395 err_reg += fabs(pred_reg - curData); 5396 5397 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5398 curData = *cur_data_pos; 5399 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5400 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5401 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5402 err_reg += fabs(pred_reg - curData); 5403 5404 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5405 curData = *cur_data_pos; 5406 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5407 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5408 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5409 err_reg += fabs(pred_reg - curData); 5410 } 5411 } 5412 else{ 5413 for(int i=1; i<block_size; i++){ 5414 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5415 curData = *cur_data_pos; 5416 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5417 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5418 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5419 err_reg += fabs(pred_reg - curData); 5420 5421 bmi = block_size - i; 5422 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5423 curData = *cur_data_pos; 5424 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5425 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5426 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5427 err_reg += fabs(pred_reg - curData); 5428 5429 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5430 curData = *cur_data_pos; 5431 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5432 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5433 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5434 err_reg += fabs(pred_reg - curData); 5435 5436 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5437 curData = *cur_data_pos; 5438 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5439 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5440 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 5441 err_reg += fabs(pred_reg - curData); 5442 5443 } 5444 } 5445 use_reg = (err_reg < err_sz); 5446 } 5447 if(use_reg){ 5448 { 5449 /*predict coefficients in current block via previous reg_block*/ 5450 float cur_coeff; 5451 double diff, itvNum; 5452 for(int e=0; e<4; e++){ 5453 cur_coeff = reg_params_pos[e*num_blocks]; 5454 diff = cur_coeff - last_coeffcients[e]; 5455 itvNum = fabs(diff)/precision[e] + 1; 5456 if (itvNum < coeff_intvCapacity_sz){ 5457 if (diff < 0) itvNum = -itvNum; 5458 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 5459 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 5460 //ganrantee comporession error against the case of machine-epsilon 5461 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 5462 coeff_type[e][coeff_index] = 0; 5463 last_coeffcients[e] = cur_coeff; 5464 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5465 } 5466 } 5467 else{ 5468 coeff_type[e][coeff_index] = 0; 5469 last_coeffcients[e] = cur_coeff; 5470 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5471 } 5472 } 5473 coeff_index ++; 5474 } 5475 float curData; 5476 float pred; 5477 double itvNum; 5478 double diff; 5479 size_t index = 0; 5480 size_t block_unpredictable_count = 0; 5481 float * cur_data_pos = data_pos; 5482 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5483 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5484 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5485 curData = *cur_data_pos; 5486 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5487 diff = curData - pred; 5488 itvNum = fabs(diff)/tmp_realPrecision + 1; 5489 if (itvNum < intvCapacity){ 5490 if (diff < 0) itvNum = -itvNum; 5491 type[index] = (int) (itvNum/2) + intvRadius; 5492 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5493 //ganrantee comporession error against the case of machine-epsilon 5494 if(fabs(curData - pred)>tmp_realPrecision){ 5495 type[index] = 0; 5496 pred = curData; 5497 unpredictable_data[block_unpredictable_count ++] = curData; 5498 } 5499 } 5500 else{ 5501 type[index] = 0; 5502 pred = curData; 5503 unpredictable_data[block_unpredictable_count ++] = curData; 5504 } 5505 5506 #ifdef HAVE_TIMECMPR 5507 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5508 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5509 decData[block_offset + point_offset] = pred; 5510 #endif 5511 5512 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5513 // assign value to block surfaces 5514 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5515 } 5516 index ++; 5517 cur_data_pos ++; 5518 } 5519 cur_data_pos += dim1_offset - current_blockcount_z; 5520 } 5521 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5522 } 5523 /*dealing with the last ii (boundary)*/ 5524 { 5525 // ii == current_blockcount_x - 1 5526 size_t ii = current_blockcount_x - 1; 5527 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5528 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5529 curData = *cur_data_pos; 5530 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5531 diff = curData - pred; 5532 itvNum = fabs(diff)/tmp_realPrecision + 1; 5533 if (itvNum < intvCapacity){ 5534 if (diff < 0) itvNum = -itvNum; 5535 type[index] = (int) (itvNum/2) + intvRadius; 5536 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5537 //ganrantee comporession error against the case of machine-epsilon 5538 if(fabs(curData - pred)>tmp_realPrecision){ 5539 type[index] = 0; 5540 pred = curData; 5541 unpredictable_data[block_unpredictable_count ++] = curData; 5542 } 5543 } 5544 else{ 5545 type[index] = 0; 5546 pred = curData; 5547 unpredictable_data[block_unpredictable_count ++] = curData; 5548 } 5549 5550 #ifdef HAVE_TIMECMPR 5551 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5552 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5553 decData[block_offset + point_offset] = pred; 5554 #endif 5555 5556 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5557 // assign value to block surfaces 5558 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5559 } 5560 // assign value to next prediction buffer 5561 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 5562 index ++; 5563 cur_data_pos ++; 5564 } 5565 cur_data_pos += dim1_offset - current_blockcount_z; 5566 } 5567 } 5568 unpredictable_count = block_unpredictable_count; 5569 strip_unpredictable_count += unpredictable_count; 5570 unpredictable_data += unpredictable_count; 5571 5572 reg_count ++; 5573 } 5574 else{ 5575 // use SZ 5576 // SZ predication 5577 unpredictable_count = 0; 5578 float * cur_pb_pos = pb_pos; 5579 float * cur_data_pos = data_pos; 5580 float curData; 5581 float pred3D; 5582 double itvNum, diff; 5583 size_t index = 0; 5584 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5585 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5586 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5587 5588 curData = *cur_data_pos; 5589 if(fabs(curData - mean) <= realPrecision){ 5590 // adjust type[index] to intvRadius for coherence with freq in reg 5591 type[index] = intvRadius; 5592 *cur_pb_pos = mean; 5593 } 5594 else 5595 { 5596 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5597 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5598 diff = curData - pred3D; 5599 itvNum = fabs(diff)/realPrecision + 1; 5600 if (itvNum < intvCapacity_sz){ 5601 if (diff < 0) itvNum = -itvNum; 5602 type[index] = (int) (itvNum/2) + intvRadius; 5603 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5604 if(type[index] <= intvRadius) type[index] -= 1; 5605 //ganrantee comporession error against the case of machine-epsilon 5606 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5607 type[index] = 0; 5608 *cur_pb_pos = curData; 5609 unpredictable_data[unpredictable_count ++] = curData; 5610 } 5611 } 5612 else{ 5613 type[index] = 0; 5614 *cur_pb_pos = curData; 5615 unpredictable_data[unpredictable_count ++] = curData; 5616 } 5617 } 5618 #ifdef HAVE_TIMECMPR 5619 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5620 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5621 decData[block_offset + point_offset] = *cur_pb_pos; 5622 #endif 5623 5624 index ++; 5625 cur_pb_pos ++; 5626 cur_data_pos ++; 5627 } 5628 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5629 cur_data_pos += dim1_offset - current_blockcount_z; 5630 } 5631 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 5632 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5633 } 5634 /*dealing with the last ii (boundary)*/ 5635 { 5636 // ii == current_blockcount_x - 1 5637 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5638 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5639 5640 curData = *cur_data_pos; 5641 if(fabs(curData - mean) <= realPrecision){ 5642 // adjust type[index] to intvRadius for coherence with freq in reg 5643 type[index] = intvRadius; 5644 *cur_pb_pos = mean; 5645 } 5646 else 5647 { 5648 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5649 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5650 diff = curData - pred3D; 5651 itvNum = fabs(diff)/realPrecision + 1; 5652 if (itvNum < intvCapacity_sz){ 5653 if (diff < 0) itvNum = -itvNum; 5654 type[index] = (int) (itvNum/2) + intvRadius; 5655 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5656 if(type[index] <= intvRadius) type[index] -= 1; 5657 //ganrantee comporession error against the case of machine-epsilon 5658 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5659 type[index] = 0; 5660 *cur_pb_pos = curData; 5661 unpredictable_data[unpredictable_count ++] = curData; 5662 } 5663 } 5664 else{ 5665 type[index] = 0; 5666 *cur_pb_pos = curData; 5667 unpredictable_data[unpredictable_count ++] = curData; 5668 } 5669 } 5670 #ifdef HAVE_TIMECMPR 5671 size_t ii = current_blockcount_x - 1; 5672 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5673 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5674 decData[block_offset + point_offset] = *cur_pb_pos; 5675 #endif 5676 5677 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 5678 index ++; 5679 cur_pb_pos ++; 5680 cur_data_pos ++; 5681 } 5682 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5683 cur_data_pos += dim1_offset - current_blockcount_z; 5684 } 5685 } 5686 strip_unpredictable_count += unpredictable_count; 5687 unpredictable_data += unpredictable_count; 5688 // change indicator 5689 indicator_pos[k] = 1; 5690 }// end SZ 5691 5692 reg_params_pos ++; 5693 data_pos += current_blockcount_z; 5694 pb_pos += current_blockcount_z; 5695 next_pb_pos += current_blockcount_z; 5696 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 5697 5698 } // end k 5699 5700 if(strip_unpredictable_count > max_unpred_count){ 5701 max_unpred_count = strip_unpredictable_count; 5702 } 5703 total_unpred += strip_unpredictable_count; 5704 indicator_pos += num_z; 5705 }// end j 5706 float * tmp; 5707 tmp = cur_pb_buf; 5708 cur_pb_buf = next_pb_buf; 5709 next_pb_buf = tmp; 5710 }// end i 5711 } 5712 else{ 5713 int intvCapacity_sz = intvCapacity - 2; 5714 for(size_t i=0; i<num_x; i++){ 5715 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 5716 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 5717 5718 for(size_t j=0; j<num_y; j++){ 5719 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 5720 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 5721 data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset; 5722 // copy bottom plane from plane buffer 5723 // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(float)); 5724 type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset; 5725 type = result_type + type_offset; 5726 5727 // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1) 5728 cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1; 5729 next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1; 5730 5731 size_t current_blockcount_z; 5732 float * pb_pos = cur_pb_buf_pos; 5733 float * next_pb_pos = next_pb_buf_pos; 5734 size_t strip_unpredictable_count = 0; 5735 for(size_t k=0; k<num_z; k++){ 5736 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 5737 #ifdef HAVE_TIMECMPR 5738 size_t offset_z = 0; 5739 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 5740 size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 5741 #endif 5742 /*sampling*/ 5743 { 5744 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 5745 float * cur_data_pos; 5746 float curData; 5747 float pred_reg, pred_sz; 5748 float err_sz = 0.0, err_reg = 0.0; 5749 int bmi; 5750 if(i>0 && j>0 && k>0){ 5751 for(int i=0; i<block_size; i++){ 5752 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5753 curData = *cur_data_pos; 5754 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5755 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5756 err_sz += fabs(pred_sz - curData) + noise; 5757 err_reg += fabs(pred_reg - curData); 5758 5759 bmi = block_size - i; 5760 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5761 curData = *cur_data_pos; 5762 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5763 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5764 err_sz += fabs(pred_sz - curData) + noise; 5765 err_reg += fabs(pred_reg - curData); 5766 5767 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5768 curData = *cur_data_pos; 5769 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5770 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5771 err_sz += fabs(pred_sz - curData) + noise; 5772 err_reg += fabs(pred_reg - curData); 5773 5774 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5775 curData = *cur_data_pos; 5776 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5777 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5778 err_sz += fabs(pred_sz - curData) + noise; 5779 err_reg += fabs(pred_reg - curData); 5780 } 5781 } 5782 else{ 5783 for(int i=1; i<block_size; i++){ 5784 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i; 5785 curData = *cur_data_pos; 5786 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5787 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5788 err_sz += fabs(pred_sz - curData) + noise; 5789 err_reg += fabs(pred_reg - curData); 5790 5791 bmi = block_size - i; 5792 cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi; 5793 curData = *cur_data_pos; 5794 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5795 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5796 err_sz += fabs(pred_sz - curData) + noise; 5797 err_reg += fabs(pred_reg - curData); 5798 5799 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i; 5800 curData = *cur_data_pos; 5801 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5802 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 5803 err_sz += fabs(pred_sz - curData) + noise; 5804 err_reg += fabs(pred_reg - curData); 5805 5806 cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi; 5807 curData = *cur_data_pos; 5808 pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1]; 5809 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 5810 err_sz += fabs(pred_sz - curData) + noise; 5811 err_reg += fabs(pred_reg - curData); 5812 } 5813 } 5814 use_reg = (err_reg < err_sz); 5815 5816 } 5817 if(use_reg) 5818 { 5819 { 5820 /*predict coefficients in current block via previous reg_block*/ 5821 float cur_coeff; 5822 double diff, itvNum; 5823 for(int e=0; e<4; e++){ 5824 cur_coeff = reg_params_pos[e*num_blocks]; 5825 diff = cur_coeff - last_coeffcients[e]; 5826 itvNum = fabs(diff)/precision[e] + 1; 5827 if (itvNum < coeff_intvCapacity_sz){ 5828 if (diff < 0) itvNum = -itvNum; 5829 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 5830 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 5831 //ganrantee comporession error against the case of machine-epsilon 5832 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 5833 coeff_type[e][coeff_index] = 0; 5834 last_coeffcients[e] = cur_coeff; 5835 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5836 } 5837 } 5838 else{ 5839 coeff_type[e][coeff_index] = 0; 5840 last_coeffcients[e] = cur_coeff; 5841 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 5842 } 5843 } 5844 coeff_index ++; 5845 } 5846 float curData; 5847 float pred; 5848 double itvNum; 5849 double diff; 5850 size_t index = 0; 5851 size_t block_unpredictable_count = 0; 5852 float * cur_data_pos = data_pos; 5853 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5854 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5855 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5856 5857 curData = *cur_data_pos; 5858 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5859 diff = curData - pred; 5860 itvNum = fabs(diff)/tmp_realPrecision + 1; 5861 if (itvNum < intvCapacity){ 5862 if (diff < 0) itvNum = -itvNum; 5863 type[index] = (int) (itvNum/2) + intvRadius; 5864 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5865 //ganrantee comporession error against the case of machine-epsilon 5866 if(fabs(curData - pred)>tmp_realPrecision){ 5867 type[index] = 0; 5868 pred = curData; 5869 unpredictable_data[block_unpredictable_count ++] = curData; 5870 } 5871 } 5872 else{ 5873 type[index] = 0; 5874 pred = curData; 5875 unpredictable_data[block_unpredictable_count ++] = curData; 5876 } 5877 5878 #ifdef HAVE_TIMECMPR 5879 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5880 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5881 decData[block_offset + point_offset] = pred; 5882 #endif 5883 5884 5885 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5886 // assign value to block surfaces 5887 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5888 } 5889 index ++; 5890 cur_data_pos ++; 5891 } 5892 cur_data_pos += dim1_offset - current_blockcount_z; 5893 } 5894 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5895 } 5896 /*dealing with the last ii (boundary)*/ 5897 { 5898 // ii == current_blockcount_x - 1 5899 size_t ii = current_blockcount_x - 1; 5900 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5901 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5902 curData = *cur_data_pos; 5903 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 5904 diff = curData - pred; 5905 itvNum = fabs(diff)/tmp_realPrecision + 1; 5906 if (itvNum < intvCapacity){ 5907 if (diff < 0) itvNum = -itvNum; 5908 type[index] = (int) (itvNum/2) + intvRadius; 5909 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5910 //ganrantee comporession error against the case of machine-epsilon 5911 if(fabs(curData - pred)>tmp_realPrecision){ 5912 type[index] = 0; 5913 pred = curData; 5914 unpredictable_data[block_unpredictable_count ++] = curData; 5915 } 5916 } 5917 else{ 5918 type[index] = 0; 5919 pred = curData; 5920 unpredictable_data[block_unpredictable_count ++] = curData; 5921 } 5922 5923 #ifdef HAVE_TIMECMPR 5924 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5925 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5926 decData[block_offset + point_offset] = pred; 5927 #endif 5928 5929 if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){ 5930 // assign value to block surfaces 5931 pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred; 5932 } 5933 // assign value to next prediction buffer 5934 next_pb_pos[jj * strip_dim1_offset + kk] = pred; 5935 index ++; 5936 cur_data_pos ++; 5937 } 5938 cur_data_pos += dim1_offset - current_blockcount_z; 5939 } 5940 } 5941 unpredictable_count = block_unpredictable_count; 5942 strip_unpredictable_count += unpredictable_count; 5943 unpredictable_data += unpredictable_count; 5944 reg_count ++; 5945 } 5946 else{ 5947 // use SZ 5948 // SZ predication 5949 unpredictable_count = 0; 5950 float * cur_pb_pos = pb_pos; 5951 float * cur_data_pos = data_pos; 5952 float curData; 5953 float pred3D; 5954 double itvNum, diff; 5955 size_t index = 0; 5956 for(size_t ii=0; ii<current_blockcount_x - 1; ii++){ 5957 for(size_t jj=0; jj<current_blockcount_y; jj++){ 5958 for(size_t kk=0; kk<current_blockcount_z; kk++){ 5959 5960 curData = *cur_data_pos; 5961 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 5962 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 5963 diff = curData - pred3D; 5964 itvNum = fabs(diff)/realPrecision + 1; 5965 if (itvNum < intvCapacity_sz){ 5966 if (diff < 0) itvNum = -itvNum; 5967 type[index] = (int) (itvNum/2) + intvRadius; 5968 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 5969 //ganrantee comporession error against the case of machine-epsilon 5970 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 5971 type[index] = 0; 5972 *cur_pb_pos = curData; 5973 unpredictable_data[unpredictable_count ++] = curData; 5974 } 5975 } 5976 else{ 5977 type[index] = 0; 5978 *cur_pb_pos = curData; 5979 unpredictable_data[unpredictable_count ++] = curData; 5980 } 5981 5982 #ifdef HAVE_TIMECMPR 5983 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 5984 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 5985 decData[block_offset + point_offset] = *cur_pb_pos; 5986 #endif 5987 index ++; 5988 cur_pb_pos ++; 5989 cur_data_pos ++; 5990 } 5991 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 5992 cur_data_pos += dim1_offset - current_blockcount_z; 5993 } 5994 cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset; 5995 cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 5996 } 5997 /*dealing with the last ii (boundary)*/ 5998 { 5999 // ii == current_blockcount_x - 1 6000 for(size_t jj=0; jj<current_blockcount_y; jj++){ 6001 for(size_t kk=0; kk<current_blockcount_z; kk++){ 6002 6003 curData = *cur_data_pos; 6004 pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] 6005 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6006 diff = curData - pred3D; 6007 itvNum = fabs(diff)/realPrecision + 1; 6008 if (itvNum < intvCapacity_sz){ 6009 if (diff < 0) itvNum = -itvNum; 6010 type[index] = (int) (itvNum/2) + intvRadius; 6011 *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6012 //ganrantee comporession error against the case of machine-epsilon 6013 if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ 6014 type[index] = 0; 6015 *cur_pb_pos = curData; 6016 unpredictable_data[unpredictable_count ++] = curData; 6017 } 6018 } 6019 else{ 6020 type[index] = 0; 6021 *cur_pb_pos = curData; 6022 unpredictable_data[unpredictable_count ++] = curData; 6023 } 6024 6025 #ifdef HAVE_TIMECMPR 6026 size_t ii = current_blockcount_x - 1; 6027 size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk; 6028 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION) 6029 decData[block_offset + point_offset] = *cur_pb_pos; 6030 #endif 6031 6032 // assign value to next prediction buffer 6033 next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos; 6034 index ++; 6035 cur_pb_pos ++; 6036 cur_data_pos ++; 6037 } 6038 cur_pb_pos += strip_dim1_offset - current_blockcount_z; 6039 cur_data_pos += dim1_offset - current_blockcount_z; 6040 } 6041 } 6042 strip_unpredictable_count += unpredictable_count; 6043 unpredictable_data += unpredictable_count; 6044 // change indicator 6045 indicator_pos[k] = 1; 6046 }// end SZ 6047 6048 reg_params_pos ++; 6049 data_pos += current_blockcount_z; 6050 pb_pos += current_blockcount_z; 6051 next_pb_pos += current_blockcount_z; 6052 type += current_blockcount_x * current_blockcount_y * current_blockcount_z; 6053 6054 } 6055 6056 if(strip_unpredictable_count > max_unpred_count){ 6057 max_unpred_count = strip_unpredictable_count; 6058 } 6059 total_unpred += strip_unpredictable_count; 6060 indicator_pos += num_z; 6061 } 6062 float * tmp; 6063 tmp = cur_pb_buf; 6064 cur_pb_buf = next_pb_buf; 6065 next_pb_buf = tmp; 6066 } 6067 } 6068 6069 free(prediction_buffer_1); 6070 free(prediction_buffer_2); 6071 6072 int stateNum = 2*quantization_intervals; 6073 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6074 6075 size_t nodeCount = 0; 6076 init(huffmanTree, result_type, num_elements); 6077 size_t i = 0; 6078 for (i = 0; i < huffmanTree->stateNum; i++) 6079 if (huffmanTree->code[i]) nodeCount++; 6080 nodeCount = nodeCount*2-1; 6081 6082 unsigned char *treeBytes; 6083 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6084 6085 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 6086 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 6087 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); 6088 unsigned char * result_pos = result; 6089 initRandomAccessBytes(result_pos); 6090 6091 result_pos += meta_data_offset; 6092 6093 sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 6094 result_pos += exe_params->SZ_SIZE_TYPE; 6095 6096 intToBytes_bigEndian(result_pos, block_size); 6097 result_pos += sizeof(int); 6098 doubleToBytes(result_pos, realPrecision); 6099 result_pos += sizeof(double); 6100 intToBytes_bigEndian(result_pos, quantization_intervals); 6101 result_pos += sizeof(int); 6102 intToBytes_bigEndian(result_pos, treeByteSize); 6103 result_pos += sizeof(int); 6104 intToBytes_bigEndian(result_pos, nodeCount); 6105 result_pos += sizeof(int); 6106 memcpy(result_pos, treeBytes, treeByteSize); 6107 result_pos += treeByteSize; 6108 free(treeBytes); 6109 6110 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 6111 result_pos += sizeof(unsigned char); 6112 memcpy(result_pos, &mean, sizeof(float)); 6113 result_pos += sizeof(float); 6114 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 6115 result_pos += indicator_size; 6116 6117 //convert the lead/mid/resi to byte stream 6118 if(reg_count > 0){ 6119 for(int e=0; e<4; e++){ 6120 int stateNum = 2*coeff_intvCapacity_sz; 6121 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6122 size_t nodeCount = 0; 6123 init(huffmanTree, coeff_type[e], reg_count); 6124 size_t i = 0; 6125 for (i = 0; i < huffmanTree->stateNum; i++) 6126 if (huffmanTree->code[i]) nodeCount++; 6127 nodeCount = nodeCount*2-1; 6128 unsigned char *treeBytes; 6129 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6130 doubleToBytes(result_pos, precision[e]); 6131 result_pos += sizeof(double); 6132 intToBytes_bigEndian(result_pos, coeff_intvRadius); 6133 result_pos += sizeof(int); 6134 intToBytes_bigEndian(result_pos, treeByteSize); 6135 result_pos += sizeof(int); 6136 intToBytes_bigEndian(result_pos, nodeCount); 6137 result_pos += sizeof(int); 6138 memcpy(result_pos, treeBytes, treeByteSize); 6139 result_pos += treeByteSize; 6140 free(treeBytes); 6141 size_t typeArray_size = 0; 6142 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 6143 sizeToBytes(result_pos, typeArray_size); 6144 result_pos += sizeof(size_t) + typeArray_size; 6145 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 6146 result_pos += sizeof(int); 6147 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); 6148 result_pos += coeff_unpredictable_count[e]*sizeof(float); 6149 SZ_ReleaseHuffman(huffmanTree); 6150 } 6151 } 6152 free(coeff_result_type); 6153 free(coeff_unpredictable_data); 6154 6155 //record the number of unpredictable data and also store them 6156 memcpy(result_pos, &total_unpred, sizeof(size_t)); 6157 result_pos += sizeof(size_t); 6158 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); 6159 result_pos += total_unpred * sizeof(float); 6160 size_t typeArray_size = 0; 6161 encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size); 6162 result_pos += typeArray_size; 6163 size_t totalEncodeSize = result_pos - result; 6164 free(indicator); 6165 free(result_unpredictable_data); 6166 free(result_type); 6167 free(reg_params); 6168 6169 6170 SZ_ReleaseHuffman(huffmanTree); 6171 *comp_size = totalEncodeSize; 6172 return result; 6173 } 6174 6175 unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ 6176 6177 unsigned int quantization_intervals; 6178 float sz_sample_correct_freq = -1;//0.5; //-1 6179 float dense_pos; 6180 float mean_flush_freq; 6181 unsigned char use_mean = 0; 6182 6183 // calculate block dims 6184 size_t num_x, num_y, num_z; 6185 size_t block_size = 6; 6186 num_x = (r1 - 1) / block_size + 1; 6187 num_y = (r2 - 1) / block_size + 1; 6188 num_z = (r3 - 1) / block_size + 1; 6189 6190 size_t max_num_block_elements = block_size * block_size * block_size; 6191 size_t num_blocks = num_x * num_y * num_z; 6192 size_t num_elements = r1 * r2 * r3; 6193 6194 size_t dim0_offset = r2 * r3; 6195 size_t dim1_offset = r3; 6196 6197 int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); 6198 size_t unpred_data_max_size = max_num_block_elements; 6199 float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks); 6200 size_t total_unpred = 0; 6201 size_t unpredictable_count; 6202 float * data_pos = oriData; 6203 int * type = result_type; 6204 float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float)); 6205 float * reg_params_pos = reg_params; 6206 // move regression part out 6207 size_t params_offset_b = num_blocks; 6208 size_t params_offset_c = 2*num_blocks; 6209 size_t params_offset_d = 3*num_blocks; 6210 float * pred_buffer = (float *) malloc((block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); 6211 float * pred_buffer_pos = NULL; 6212 float * block_data_pos_x = NULL; 6213 float * block_data_pos_y = NULL; 6214 float * block_data_pos_z = NULL; 6215 for(size_t i=0; i<num_x; i++){ 6216 for(size_t j=0; j<num_y; j++){ 6217 for(size_t k=0; k<num_z; k++){ 6218 data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 6219 pred_buffer_pos = pred_buffer; 6220 block_data_pos_x = data_pos; 6221 // use the buffer as block_size*block_size*block_size 6222 for(int ii=0; ii<block_size; ii++){ 6223 block_data_pos_y = block_data_pos_x; 6224 for(int jj=0; jj<block_size; jj++){ 6225 block_data_pos_z = block_data_pos_y; 6226 for(int kk=0; kk<block_size; kk++){ 6227 *pred_buffer_pos = *block_data_pos_z; 6228 if(k*block_size + kk + 1 < r3) block_data_pos_z ++; 6229 pred_buffer_pos ++; 6230 } 6231 if(j*block_size + jj + 1 < r2) block_data_pos_y += dim1_offset; 6232 } 6233 if(i*block_size + ii + 1 < r1) block_data_pos_x += dim0_offset; 6234 } 6235 /*Calculate regression coefficients*/ 6236 { 6237 float * cur_data_pos = pred_buffer; 6238 float fx = 0.0; 6239 float fy = 0.0; 6240 float fz = 0.0; 6241 float f = 0; 6242 float sum_x, sum_y; 6243 float curData; 6244 for(size_t i=0; i<block_size; i++){ 6245 sum_x = 0; 6246 for(size_t j=0; j<block_size; j++){ 6247 sum_y = 0; 6248 for(size_t k=0; k<block_size; k++){ 6249 curData = *cur_data_pos; 6250 sum_y += curData; 6251 fz += curData * k; 6252 cur_data_pos ++; 6253 } 6254 fy += sum_y * j; 6255 sum_x += sum_y; 6256 } 6257 fx += sum_x * i; 6258 f += sum_x; 6259 } 6260 float coeff = 1.0 / (block_size * block_size * block_size); 6261 reg_params_pos[0] = (2 * fx / (block_size - 1) - f) * 6 * coeff / (block_size + 1); 6262 reg_params_pos[params_offset_b] = (2 * fy / (block_size - 1) - f) * 6 * coeff / (block_size + 1); 6263 reg_params_pos[params_offset_c] = (2 * fz / (block_size - 1) - f) * 6 * coeff / (block_size + 1); 6264 reg_params_pos[params_offset_d] = f * coeff - ((block_size - 1) * reg_params_pos[0] / 2 + (block_size - 1) * reg_params_pos[params_offset_b] / 2 + (block_size - 1) * reg_params_pos[params_offset_c] / 2); 6265 } 6266 reg_params_pos ++; 6267 } 6268 } 6269 } 6270 6271 //Compress coefficient arrays 6272 double precision_a, precision_b, precision_c, precision_d; 6273 float rel_param_err = 0.025; 6274 precision_a = rel_param_err * realPrecision / block_size; 6275 precision_b = rel_param_err * realPrecision / block_size; 6276 precision_c = rel_param_err * realPrecision / block_size; 6277 precision_d = rel_param_err * realPrecision; 6278 6279 if(exe_params->optQuantMode==1) 6280 { 6281 quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq); 6282 if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1; 6283 updateQuantizationInfo(quantization_intervals); 6284 } 6285 else{ 6286 quantization_intervals = exe_params->intvCapacity; 6287 } 6288 6289 float mean = 0; 6290 if(use_mean){ 6291 // compute mean 6292 double sum = 0.0; 6293 size_t mean_count = 0; 6294 for(size_t i=0; i<num_elements; i++){ 6295 if(fabs(oriData[i] - dense_pos) < realPrecision){ 6296 sum += oriData[i]; 6297 mean_count ++; 6298 } 6299 } 6300 if(mean_count > 0) mean = sum / mean_count; 6301 } 6302 6303 double tmp_realPrecision = realPrecision; 6304 6305 // use two prediction buffers for higher performance 6306 float * unpredictable_data = result_unpredictable_data; 6307 unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); 6308 memset(indicator, 0, num_blocks * sizeof(unsigned char)); 6309 size_t reg_count = 0; 6310 unsigned char * indicator_pos = indicator; 6311 6312 int intvCapacity = exe_params->intvCapacity; 6313 int intvRadius = exe_params->intvRadius; 6314 int use_reg = 0; 6315 float noise = realPrecision * 1.22; 6316 6317 reg_params_pos = reg_params; 6318 // compress the regression coefficients on the fly 6319 float last_coeffcients[4] = {0.0}; 6320 int coeff_intvCapacity_sz = 65536; 6321 int coeff_intvRadius = coeff_intvCapacity_sz / 2; 6322 int * coeff_type[4]; 6323 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 6324 float * coeff_unpred_data[4]; 6325 float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float)); 6326 double precision[4]; 6327 precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; 6328 for(int i=0; i<4; i++){ 6329 coeff_type[i] = coeff_result_type + i * num_blocks; 6330 coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; 6331 } 6332 int coeff_index = 0; 6333 unsigned int coeff_unpredictable_count[4] = {0}; 6334 6335 memset(pred_buffer, 0, (block_size+1)*(block_size+1)*(block_size+1)*sizeof(float)); 6336 int pred_buffer_block_size = block_size + 1; 6337 int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size; 6338 int strip_dim1_offset = pred_buffer_block_size; 6339 6340 if(use_mean){ 6341 int intvCapacity_sz = intvCapacity - 2; 6342 type = result_type; 6343 for(size_t i=0; i<num_x; i++){ 6344 for(size_t j=0; j<num_y; j++){ 6345 for(size_t k=0; k<num_z; k++){ 6346 data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 6347 // add 1 in x, y, z offset 6348 pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6349 block_data_pos_x = data_pos; 6350 for(int ii=0; ii<block_size; ii++){ 6351 block_data_pos_y = block_data_pos_x; 6352 for(int jj=0; jj<block_size; jj++){ 6353 block_data_pos_z = block_data_pos_y; 6354 for(int kk=0; kk<block_size; kk++){ 6355 *pred_buffer_pos = *block_data_pos_z; 6356 if(k*block_size + kk + 1< r3) block_data_pos_z ++; 6357 pred_buffer_pos ++; 6358 } 6359 // add 1 in z offset 6360 pred_buffer_pos ++; 6361 if(j*block_size + jj + 1< r2) block_data_pos_y += dim1_offset; 6362 } 6363 // add 1 in y offset 6364 pred_buffer_pos += pred_buffer_block_size; 6365 if(i*block_size + ii + 1< r1) block_data_pos_x += dim0_offset; 6366 } 6367 /*sampling and decide which predictor*/ 6368 { 6369 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 6370 float * cur_data_pos; 6371 float curData; 6372 float pred_reg, pred_sz; 6373 float err_sz = 0.0, err_reg = 0.0; 6374 int bmi = 0; 6375 for(int i=2; i<=block_size; i++){ 6376 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i; 6377 curData = *cur_data_pos; 6378 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6379 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6380 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6381 err_reg += fabs(pred_reg - curData); 6382 6383 bmi = block_size - i; 6384 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi; 6385 curData = *cur_data_pos; 6386 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6387 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6388 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6389 err_reg += fabs(pred_reg - curData); 6390 6391 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i; 6392 curData = *cur_data_pos; 6393 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6394 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6395 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6396 err_reg += fabs(pred_reg - curData); 6397 6398 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi; 6399 curData = *cur_data_pos; 6400 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6401 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6402 err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData)); 6403 err_reg += fabs(pred_reg - curData); 6404 } 6405 6406 use_reg = (err_reg < err_sz); 6407 } 6408 if(use_reg){ 6409 { 6410 /*predict coefficients in current block via previous reg_block*/ 6411 float cur_coeff; 6412 double diff, itvNum; 6413 for(int e=0; e<4; e++){ 6414 cur_coeff = reg_params_pos[e*num_blocks]; 6415 diff = cur_coeff - last_coeffcients[e]; 6416 itvNum = fabs(diff)/precision[e] + 1; 6417 if (itvNum < coeff_intvCapacity_sz){ 6418 if (diff < 0) itvNum = -itvNum; 6419 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 6420 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 6421 //ganrantee comporession error against the case of machine-epsilon 6422 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 6423 coeff_type[e][coeff_index] = 0; 6424 last_coeffcients[e] = cur_coeff; 6425 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6426 } 6427 } 6428 else{ 6429 coeff_type[e][coeff_index] = 0; 6430 last_coeffcients[e] = cur_coeff; 6431 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6432 } 6433 } 6434 coeff_index ++; 6435 } 6436 float curData; 6437 float pred; 6438 double itvNum; 6439 double diff; 6440 size_t index = 0; 6441 size_t block_unpredictable_count = 0; 6442 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6443 for(size_t ii=0; ii<block_size; ii++){ 6444 for(size_t jj=0; jj<block_size; jj++){ 6445 for(size_t kk=0; kk<block_size; kk++){ 6446 curData = *cur_data_pos; 6447 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 6448 diff = curData - pred; 6449 itvNum = fabs(diff)/tmp_realPrecision + 1; 6450 if (itvNum < intvCapacity){ 6451 if (diff < 0) itvNum = -itvNum; 6452 type[index] = (int) (itvNum/2) + intvRadius; 6453 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6454 //ganrantee comporession error against the case of machine-epsilon 6455 if(fabs(curData - pred)>tmp_realPrecision){ 6456 type[index] = 0; 6457 pred = curData; 6458 unpredictable_data[block_unpredictable_count ++] = curData; 6459 } 6460 } 6461 else{ 6462 type[index] = 0; 6463 pred = curData; 6464 unpredictable_data[block_unpredictable_count ++] = curData; 6465 } 6466 index ++; 6467 cur_data_pos ++; 6468 } 6469 cur_data_pos ++; 6470 } 6471 cur_data_pos += pred_buffer_block_size; 6472 } 6473 6474 total_unpred += block_unpredictable_count; 6475 unpredictable_data += block_unpredictable_count; 6476 reg_count ++; 6477 } 6478 else{ 6479 // use SZ 6480 // SZ predication 6481 unpredictable_count = 0; 6482 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6483 float curData; 6484 float pred3D; 6485 double itvNum, diff; 6486 size_t index = 0; 6487 for(size_t ii=0; ii<block_size; ii++){ 6488 for(size_t jj=0; jj<block_size; jj++){ 6489 for(size_t kk=0; kk<block_size; kk++){ 6490 6491 curData = *cur_data_pos; 6492 if(fabs(curData - mean) <= realPrecision){ 6493 type[index] = 1; 6494 *cur_data_pos = mean; 6495 } 6496 else 6497 { 6498 pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] 6499 - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6500 diff = curData - pred3D; 6501 itvNum = fabs(diff)/realPrecision + 1; 6502 if (itvNum < intvCapacity_sz){ 6503 if (diff < 0) itvNum = -itvNum; 6504 type[index] = (int) (itvNum/2) + intvRadius; 6505 *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6506 //ganrantee comporession error against the case of machine-epsilon 6507 if(fabs(curData - *cur_data_pos)>tmp_realPrecision){ 6508 type[index] = 0; 6509 *cur_data_pos = curData; 6510 unpredictable_data[unpredictable_count ++] = curData; 6511 } 6512 } 6513 else{ 6514 type[index] = 0; 6515 *cur_data_pos = curData; 6516 unpredictable_data[unpredictable_count ++] = curData; 6517 } 6518 } 6519 index ++; 6520 cur_data_pos ++; 6521 } 6522 cur_data_pos ++; 6523 } 6524 cur_data_pos += pred_buffer_block_size; 6525 } 6526 total_unpred += unpredictable_count; 6527 unpredictable_data += unpredictable_count; 6528 // change indicator 6529 indicator_pos[k] = 1; 6530 }// end SZ 6531 reg_params_pos ++; 6532 type += block_size * block_size * block_size; 6533 } // end k 6534 indicator_pos += num_z; 6535 }// end j 6536 }// end i 6537 } 6538 else{ 6539 int intvCapacity_sz = intvCapacity - 2; 6540 type = result_type; 6541 for(size_t i=0; i<num_x; i++){ 6542 for(size_t j=0; j<num_y; j++){ 6543 for(size_t k=0; k<num_z; k++){ 6544 data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 6545 // add 1 in x, y, z offset 6546 pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6547 block_data_pos_x = data_pos; 6548 for(int ii=0; ii<block_size; ii++){ 6549 block_data_pos_y = block_data_pos_x; 6550 for(int jj=0; jj<block_size; jj++){ 6551 block_data_pos_z = block_data_pos_y; 6552 for(int kk=0; kk<block_size; kk++){ 6553 *pred_buffer_pos = *block_data_pos_z; 6554 if(k*block_size + kk < r3) block_data_pos_z ++; 6555 pred_buffer_pos ++; 6556 } 6557 // add 1 in z offset 6558 pred_buffer_pos ++; 6559 if(j*block_size + jj < r2) block_data_pos_y += dim1_offset; 6560 } 6561 // add 1 in y offset 6562 pred_buffer_pos += pred_buffer_block_size; 6563 if(i*block_size + ii < r1) block_data_pos_x += dim0_offset; 6564 } 6565 /*sampling*/ 6566 { 6567 // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4] 6568 float * cur_data_pos; 6569 float curData; 6570 float pred_reg, pred_sz; 6571 float err_sz = 0.0, err_reg = 0.0; 6572 int bmi; 6573 for(int i=2; i<=block_size; i++){ 6574 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i; 6575 curData = *cur_data_pos; 6576 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6577 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6578 err_sz += fabs(pred_sz - curData) + noise; 6579 err_reg += fabs(pred_reg - curData); 6580 6581 bmi = block_size - i; 6582 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi; 6583 curData = *cur_data_pos; 6584 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6585 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6586 err_sz += fabs(pred_sz - curData) + noise; 6587 err_reg += fabs(pred_reg - curData); 6588 6589 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i; 6590 curData = *cur_data_pos; 6591 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6592 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d]; 6593 err_sz += fabs(pred_sz - curData) + noise; 6594 err_reg += fabs(pred_reg - curData); 6595 6596 cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi; 6597 curData = *cur_data_pos; 6598 pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6599 pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d]; 6600 err_sz += fabs(pred_sz - curData) + noise; 6601 err_reg += fabs(pred_reg - curData); 6602 } 6603 6604 use_reg = (err_reg < err_sz); 6605 6606 } 6607 if(use_reg) 6608 { 6609 { 6610 /*predict coefficients in current block via previous reg_block*/ 6611 float cur_coeff; 6612 double diff, itvNum; 6613 for(int e=0; e<4; e++){ 6614 cur_coeff = reg_params_pos[e*num_blocks]; 6615 diff = cur_coeff - last_coeffcients[e]; 6616 itvNum = fabs(diff)/precision[e] + 1; 6617 if (itvNum < coeff_intvCapacity_sz){ 6618 if (diff < 0) itvNum = -itvNum; 6619 coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; 6620 last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e]; 6621 //ganrantee comporession error against the case of machine-epsilon 6622 if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){ 6623 coeff_type[e][coeff_index] = 0; 6624 last_coeffcients[e] = cur_coeff; 6625 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6626 } 6627 } 6628 else{ 6629 coeff_type[e][coeff_index] = 0; 6630 last_coeffcients[e] = cur_coeff; 6631 coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff; 6632 } 6633 } 6634 coeff_index ++; 6635 } 6636 float curData; 6637 float pred; 6638 double itvNum; 6639 double diff; 6640 size_t index = 0; 6641 size_t block_unpredictable_count = 0; 6642 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6643 for(size_t ii=0; ii<block_size; ii++){ 6644 for(size_t jj=0; jj<block_size; jj++){ 6645 for(size_t kk=0; kk<block_size; kk++){ 6646 curData = *cur_data_pos; 6647 pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; 6648 diff = curData - pred; 6649 itvNum = fabs(diff)/tmp_realPrecision + 1; 6650 if (itvNum < intvCapacity){ 6651 if (diff < 0) itvNum = -itvNum; 6652 type[index] = (int) (itvNum/2) + intvRadius; 6653 pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6654 //ganrantee comporession error against the case of machine-epsilon 6655 if(fabs(curData - pred)>tmp_realPrecision){ 6656 type[index] = 0; 6657 pred = curData; 6658 unpredictable_data[block_unpredictable_count ++] = curData; 6659 } 6660 } 6661 else{ 6662 type[index] = 0; 6663 pred = curData; 6664 unpredictable_data[block_unpredictable_count ++] = curData; 6665 } 6666 index ++; 6667 cur_data_pos ++; 6668 } 6669 cur_data_pos ++; 6670 } 6671 cur_data_pos += pred_buffer_block_size; 6672 } 6673 total_unpred += block_unpredictable_count; 6674 unpredictable_data += block_unpredictable_count; 6675 reg_count ++; 6676 } 6677 else{ 6678 // use SZ 6679 // SZ predication 6680 unpredictable_count = 0; 6681 float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1; 6682 float curData; 6683 float pred3D; 6684 double itvNum, diff; 6685 size_t index = 0; 6686 for(size_t ii=0; ii<block_size; ii++){ 6687 for(size_t jj=0; jj<block_size; jj++){ 6688 for(size_t kk=0; kk<block_size; kk++){ 6689 curData = *cur_data_pos; 6690 pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] 6691 - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1]; 6692 diff = curData - pred3D; 6693 itvNum = fabs(diff)/realPrecision + 1; 6694 if (itvNum < intvCapacity_sz){ 6695 if (diff < 0) itvNum = -itvNum; 6696 type[index] = (int) (itvNum/2) + intvRadius; 6697 *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; 6698 //ganrantee comporession error against the case of machine-epsilon 6699 if(fabs(curData - *cur_data_pos)>tmp_realPrecision){ 6700 type[index] = 0; 6701 *cur_data_pos = curData; 6702 unpredictable_data[unpredictable_count ++] = curData; 6703 } 6704 } 6705 else{ 6706 type[index] = 0; 6707 *cur_data_pos = curData; 6708 unpredictable_data[unpredictable_count ++] = curData; 6709 } 6710 index ++; 6711 cur_data_pos ++; 6712 } 6713 cur_data_pos ++; 6714 } 6715 cur_data_pos += pred_buffer_block_size; 6716 } 6717 total_unpred += unpredictable_count; 6718 unpredictable_data += unpredictable_count; 6719 // change indicator 6720 indicator_pos[k] = 1; 6721 }// end SZ 6722 reg_params_pos ++; 6723 type += block_size * block_size * block_size; 6724 } 6725 indicator_pos += num_z; 6726 } 6727 } 6728 } 6729 free(pred_buffer); 6730 int stateNum = 2*quantization_intervals; 6731 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6732 6733 size_t nodeCount = 0; 6734 init(huffmanTree, result_type, num_blocks*max_num_block_elements); 6735 size_t i = 0; 6736 for (i = 0; i < huffmanTree->stateNum; i++) 6737 if (huffmanTree->code[i]) nodeCount++; 6738 nodeCount = nodeCount*2-1; 6739 6740 unsigned char *treeBytes; 6741 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6742 6743 unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; 6744 // total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements 6745 unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1); 6746 unsigned char * result_pos = result; 6747 initRandomAccessBytes(result_pos); 6748 6749 result_pos += meta_data_offset; 6750 6751 sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8 6752 result_pos += exe_params->SZ_SIZE_TYPE; 6753 6754 intToBytes_bigEndian(result_pos, block_size); 6755 result_pos += sizeof(int); 6756 doubleToBytes(result_pos, realPrecision); 6757 result_pos += sizeof(double); 6758 intToBytes_bigEndian(result_pos, quantization_intervals); 6759 result_pos += sizeof(int); 6760 intToBytes_bigEndian(result_pos, treeByteSize); 6761 result_pos += sizeof(int); 6762 intToBytes_bigEndian(result_pos, nodeCount); 6763 result_pos += sizeof(int); 6764 memcpy(result_pos, treeBytes, treeByteSize); 6765 result_pos += treeByteSize; 6766 free(treeBytes); 6767 6768 memcpy(result_pos, &use_mean, sizeof(unsigned char)); 6769 result_pos += sizeof(unsigned char); 6770 memcpy(result_pos, &mean, sizeof(float)); 6771 result_pos += sizeof(float); 6772 size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos); 6773 result_pos += indicator_size; 6774 6775 //convert the lead/mid/resi to byte stream 6776 if(reg_count > 0){ 6777 for(int e=0; e<4; e++){ 6778 int stateNum = 2*coeff_intvCapacity_sz; 6779 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 6780 size_t nodeCount = 0; 6781 init(huffmanTree, coeff_type[e], reg_count); 6782 size_t i = 0; 6783 for (i = 0; i < huffmanTree->stateNum; i++) 6784 if (huffmanTree->code[i]) nodeCount++; 6785 nodeCount = nodeCount*2-1; 6786 unsigned char *treeBytes; 6787 unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); 6788 doubleToBytes(result_pos, precision[e]); 6789 result_pos += sizeof(double); 6790 intToBytes_bigEndian(result_pos, coeff_intvRadius); 6791 result_pos += sizeof(int); 6792 intToBytes_bigEndian(result_pos, treeByteSize); 6793 result_pos += sizeof(int); 6794 intToBytes_bigEndian(result_pos, nodeCount); 6795 result_pos += sizeof(int); 6796 memcpy(result_pos, treeBytes, treeByteSize); 6797 result_pos += treeByteSize; 6798 free(treeBytes); 6799 size_t typeArray_size = 0; 6800 encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size); 6801 sizeToBytes(result_pos, typeArray_size); 6802 result_pos += sizeof(size_t) + typeArray_size; 6803 intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]); 6804 result_pos += sizeof(int); 6805 memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float)); 6806 result_pos += coeff_unpredictable_count[e]*sizeof(float); 6807 SZ_ReleaseHuffman(huffmanTree); 6808 } 6809 } 6810 free(coeff_result_type); 6811 free(coeff_unpredictable_data); 6812 6813 //record the number of unpredictable data and also store them 6814 memcpy(result_pos, &total_unpred, sizeof(size_t)); 6815 result_pos += sizeof(size_t); 6816 memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float)); 6817 result_pos += total_unpred * sizeof(float); 6818 size_t typeArray_size = 0; 6819 encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size); 6820 result_pos += typeArray_size; 6821 size_t totalEncodeSize = result_pos - result; 6822 free(indicator); 6823 free(result_unpredictable_data); 6824 free(result_type); 6825 free(reg_params); 6826 6827 6828 SZ_ReleaseHuffman(huffmanTree); 6829 *comp_size = totalEncodeSize; 6830 return result; 6831 } -
TabularUnified thirdparty/SZ/sz/src/sz_float_pwr.c ¶
r2c47b73 r9ee2ce3 24 24 #include "zlib.h" 25 25 #include "rw.h" 26 #include "utility.h" 26 27 27 28 void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, float* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision) … … 1781 1782 free_TightDataPointStorageF(tdps); 1782 1783 } 1784 1785 #include <stdbool.h> 1786 1787 void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max){ 1788 1789 float * log_data = (float *) malloc(dataLength * sizeof(float)); 1790 1791 unsigned char * signs = (unsigned char *) malloc(dataLength); 1792 memset(signs, 0, dataLength); 1793 // preprocess 1794 float max_abs_log_data; 1795 if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); 1796 else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); 1797 else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); 1798 float min_log_data = max_abs_log_data; 1799 bool positive = true; 1800 for(size_t i=0; i<dataLength; i++){ 1801 if(oriData[i] < 0){ 1802 signs[i] = 1; 1803 log_data[i] = -oriData[i]; 1804 positive = false; 1805 } 1806 else 1807 log_data[i] = oriData[i]; 1808 if(log_data[i] > 0){ 1809 log_data[i] = log2(log_data[i]); 1810 if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; 1811 if(log_data[i] < min_log_data) min_log_data = log_data[i]; 1812 } 1813 } 1814 1815 float valueRangeSize, medianValue_f; 1816 computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); 1817 if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); 1818 double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; 1819 for(size_t i=0; i<dataLength; i++){ 1820 if(oriData[i] == 0){ 1821 log_data[i] = min_log_data - 2.0001*realPrecision; 1822 } 1823 } 1824 1825 TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ(log_data, dataLength, realPrecision, valueRangeSize, medianValue_f); 1826 tdps->minLogValue = min_log_data - 1.0001*realPrecision; 1827 free(log_data); 1828 if(!positive){ 1829 unsigned char * comp_signs; 1830 // compress signs 1831 unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); 1832 tdps->pwrErrBoundBytes = comp_signs; 1833 tdps->pwrErrBoundBytes_size = signSize; 1834 } 1835 else{ 1836 tdps->pwrErrBoundBytes = NULL; 1837 tdps->pwrErrBoundBytes_size = 0; 1838 } 1839 free(signs); 1840 1841 convertTDPStoFlatBytes_float(tdps, newByteData, outSize); 1842 if(*outSize>dataLength*sizeof(float)) 1843 SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); 1844 1845 free_TightDataPointStorageF(tdps); 1846 } 1847 1848 void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max){ 1849 1850 size_t dataLength = r1 * r2; 1851 float * log_data = (float *) malloc(dataLength * sizeof(float)); 1852 1853 unsigned char * signs = (unsigned char *) malloc(dataLength); 1854 memset(signs, 0, dataLength); 1855 // preprocess 1856 float max_abs_log_data; 1857 if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); 1858 else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); 1859 else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); 1860 float min_log_data = max_abs_log_data; 1861 bool positive = true; 1862 for(size_t i=0; i<dataLength; i++){ 1863 if(oriData[i] < 0){ 1864 signs[i] = 1; 1865 log_data[i] = -oriData[i]; 1866 positive = false; 1867 } 1868 else 1869 log_data[i] = oriData[i]; 1870 if(log_data[i] > 0){ 1871 log_data[i] = log2(log_data[i]); 1872 if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; 1873 if(log_data[i] < min_log_data) min_log_data = log_data[i]; 1874 } 1875 } 1876 1877 float valueRangeSize, medianValue_f; 1878 computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); 1879 if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); 1880 double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; 1881 for(size_t i=0; i<dataLength; i++){ 1882 if(oriData[i] == 0){ 1883 log_data[i] = min_log_data - 2.0001*realPrecision; 1884 } 1885 } 1886 1887 TightDataPointStorageF* tdps = SZ_compress_float_2D_MDQ(log_data, r1, r2, realPrecision, valueRangeSize, medianValue_f); 1888 tdps->minLogValue = min_log_data - 1.0001*realPrecision; 1889 free(log_data); 1890 if(!positive){ 1891 unsigned char * comp_signs; 1892 // compress signs 1893 unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); 1894 tdps->pwrErrBoundBytes = comp_signs; 1895 tdps->pwrErrBoundBytes_size = signSize; 1896 } 1897 else{ 1898 tdps->pwrErrBoundBytes = NULL; 1899 tdps->pwrErrBoundBytes_size = 0; 1900 } 1901 free(signs); 1902 1903 convertTDPStoFlatBytes_float(tdps, newByteData, outSize); 1904 if(*outSize>dataLength*sizeof(float)) 1905 SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); 1906 1907 free_TightDataPointStorageF(tdps); 1908 } 1909 1910 void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max){ 1911 1912 size_t dataLength = r1 * r2 * r3; 1913 float * log_data = (float *) malloc(dataLength * sizeof(float)); 1914 1915 unsigned char * signs = (unsigned char *) malloc(dataLength); 1916 memset(signs, 0, dataLength); 1917 // preprocess 1918 float max_abs_log_data; 1919 if(min == 0) max_abs_log_data = fabs(log2(fabs(max))); 1920 else if(max == 0) max_abs_log_data = fabs(log2(fabs(min))); 1921 else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max))); 1922 float min_log_data = max_abs_log_data; 1923 bool positive = true; 1924 for(size_t i=0; i<dataLength; i++){ 1925 if(oriData[i] < 0){ 1926 signs[i] = 1; 1927 log_data[i] = -oriData[i]; 1928 positive = false; 1929 } 1930 else 1931 log_data[i] = oriData[i]; 1932 if(log_data[i] > 0){ 1933 log_data[i] = log2(log_data[i]); 1934 if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i]; 1935 if(log_data[i] < min_log_data) min_log_data = log_data[i]; 1936 } 1937 } 1938 1939 float valueRangeSize, medianValue_f; 1940 computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f); 1941 if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data); 1942 double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7; 1943 for(size_t i=0; i<dataLength; i++){ 1944 if(oriData[i] == 0){ 1945 log_data[i] = min_log_data - 2.0001*realPrecision; 1946 } 1947 } 1948 1949 TightDataPointStorageF* tdps = SZ_compress_float_3D_MDQ(log_data, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f); 1950 tdps->minLogValue = min_log_data - 1.0001*realPrecision; 1951 free(log_data); 1952 if(!positive){ 1953 unsigned char * comp_signs; 1954 // compress signs 1955 unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs); 1956 tdps->pwrErrBoundBytes = comp_signs; 1957 tdps->pwrErrBoundBytes_size = signSize; 1958 } 1959 else{ 1960 tdps->pwrErrBoundBytes = NULL; 1961 tdps->pwrErrBoundBytes_size = 0; 1962 } 1963 free(signs); 1964 1965 convertTDPStoFlatBytes_float(tdps, newByteData, outSize); 1966 if(*outSize>dataLength*sizeof(float)) 1967 SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize); 1968 1969 free_TightDataPointStorageF(tdps); 1970 } -
TabularUnified thirdparty/SZ/sz/src/sz_int16.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_int16.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, double realPrecision) … … 267 268 pred = last3CmprsData[0]; 268 269 predAbsErr = llabs(curData - pred); 269 if(predAbsErr< =checkRadius)270 if(predAbsErr<checkRadius) 270 271 { 271 272 state = (predAbsErr/realPrecision+1)/2; … … 1370 1371 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1371 1372 { 1372 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1373 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1373 1374 free(tmpByteData); 1374 1375 } -
TabularUnified thirdparty/SZ/sz/src/sz_int32.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_int32.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, double realPrecision) … … 269 270 pred = last3CmprsData[0]; 270 271 predAbsErr = llabs(curData - pred); 271 if(predAbsErr< =checkRadius)272 if(predAbsErr<checkRadius) 272 273 { 273 274 state = (predAbsErr/realPrecision+1)/2; … … 1254 1255 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1255 1256 { 1256 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1257 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1257 1258 free(tmpByteData); 1258 1259 } -
TabularUnified thirdparty/SZ/sz/src/sz_int64.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_int64.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, double realPrecision) … … 270 271 pred = last3CmprsData[0]; 271 272 predAbsErr = llabs(curData - pred); 272 if(predAbsErr< =checkRadius)273 if(predAbsErr<checkRadius) 273 274 { 274 275 state = (predAbsErr/realPrecision+1)/2; … … 1255 1256 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1256 1257 { 1257 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1258 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1258 1259 free(tmpByteData); 1259 1260 } -
TabularUnified thirdparty/SZ/sz/src/sz_int8.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_int8.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, double realPrecision) … … 267 268 pred = last3CmprsData[0]; 268 269 predAbsErr = llabs(curData - pred); 269 if(predAbsErr< =checkRadius)270 if(predAbsErr<checkRadius) 270 271 { 271 272 state = (predAbsErr/realPrecision+1)/2; … … 1371 1372 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1372 1373 { 1373 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1374 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1374 1375 free(tmpByteData); 1375 1376 } -
TabularUnified thirdparty/SZ/sz/src/sz_uint16.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_uint16.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength, double realPrecision) … … 267 268 pred = last3CmprsData[0]; 268 269 predAbsErr = llabs(curData - pred); 269 if(predAbsErr< =checkRadius)270 if(predAbsErr<checkRadius) 270 271 { 271 272 state = (predAbsErr/realPrecision+1)/2; … … 1370 1371 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1371 1372 { 1372 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1373 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1373 1374 free(tmpByteData); 1374 1375 } -
TabularUnified thirdparty/SZ/sz/src/sz_uint32.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_uint32.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength, double realPrecision) … … 269 270 pred = last3CmprsData[0]; 270 271 predAbsErr = llabs(curData - pred); 271 if(predAbsErr< =checkRadius)272 if(predAbsErr<checkRadius) 272 273 { 273 274 state = (predAbsErr/realPrecision+1)/2; … … 1254 1255 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1255 1256 { 1256 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1257 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1257 1258 free(tmpByteData); 1258 1259 } -
TabularUnified thirdparty/SZ/sz/src/sz_uint64.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_uint64.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength, double realPrecision) … … 269 270 pred = last3CmprsData[0]; 270 271 predAbsErr = llabs(curData - pred); 271 if(predAbsErr< =checkRadius)272 if(predAbsErr<checkRadius) 272 273 { 273 274 state = (predAbsErr/realPrecision+1)/2; … … 1254 1255 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1255 1256 { 1256 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1257 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1257 1258 free(tmpByteData); 1258 1259 } -
TabularUnified thirdparty/SZ/sz/src/sz_uint8.c ¶
r2c47b73 r9ee2ce3 22 22 #include "TightDataPointStorageI.h" 23 23 #include "sz_uint8.h" 24 #include "utility.h" 24 25 25 26 unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, double realPrecision) … … 267 268 pred = last3CmprsData[0]; 268 269 predAbsErr = llabs(curData - pred); 269 if(predAbsErr< =checkRadius)270 if(predAbsErr<checkRadius) 270 271 { 271 272 state = (predAbsErr/realPrecision+1)/2; … … 1371 1372 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION) 1372 1373 { 1373 *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);1374 *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData); 1374 1375 free(tmpByteData); 1375 1376 } -
TabularUnified thirdparty/SZ/sz/src/szd_double.c ¶
r2c47b73 r9ee2ce3 17 17 #include "szd_double_pwr.h" 18 18 #include "szd_double_ts.h" 19 #include "utility.h" 19 20 20 21 int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize) … … 30 31 if(cmpSize!=12+4+MetaDataByteLength && cmpSize!=12+8+MetaDataByteLength) 31 32 { 32 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);33 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 33 34 if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION) 34 35 { 35 if( isZlib)36 if(confparams_dec->losslessCompressor!=-1) 36 37 confparams_dec->szMode = SZ_BEST_COMPRESSION; 37 38 else … … 47 48 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 48 49 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 49 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);50 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE); 50 51 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 51 52 //memcpy(szTmpBytes, tmpBytes, tmpSize); … … 81 82 } 82 83 } 83 else if (dim == 1) 84 getSnapshotData_double_1D(newData,r1,tdps, errBoundMode); 85 else 86 if (dim == 2) 87 getSnapshotData_double_2D(newData,r2,r1,tdps, errBoundMode); 88 else 89 if (dim == 3) 90 getSnapshotData_double_3D(newData,r3,r2,r1,tdps, errBoundMode); 91 else 92 if (dim == 4) 93 getSnapshotData_double_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); 94 else 84 else 95 85 { 96 printf("Error: currently support only at most 4 dimensions!\n"); 97 status = SZ_DERR; 98 } 86 if(tdps->raBytes_size > 0) //v2.0 87 { 88 if (dim == 1) 89 getSnapshotData_double_1D(newData,r1,tdps, errBoundMode); 90 else if(dim == 2) 91 decompressDataSeries_double_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes); 92 else if(dim == 3) 93 decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes); 94 else if(dim == 4) 95 decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes); 96 else 97 { 98 printf("Error: currently support only at most 4 dimensions!\n"); 99 status = SZ_DERR; 100 } 101 } 102 else //1.4.13 103 { 104 if (dim == 1) 105 getSnapshotData_double_1D(newData,r1,tdps, errBoundMode); 106 else 107 if (dim == 2) 108 getSnapshotData_double_2D(newData,r2,r1,tdps, errBoundMode); 109 else 110 if (dim == 3) 111 getSnapshotData_double_3D(newData,r3,r2,r1,tdps, errBoundMode); 112 else 113 if (dim == 4) 114 getSnapshotData_double_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); 115 else 116 { 117 printf("Error: currently support only at most 4 dimensions!\n"); 118 status = SZ_DERR; 119 } 120 } 121 } 122 99 123 free_TightDataPointStorageD2(tdps); 100 124 if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=12+MetaDataByteLength+exe_params->SZ_SIZE_TYPE) … … 1648 1672 else 1649 1673 { 1650 //decompressDataSeries_double_1D_pwr(data, dataSeriesLength, tdps);1651 decompressDataSeries_double_1D_pwrgroup(data, dataSeriesLength, tdps);1674 decompressDataSeries_double_1D_pwr_pre_log(data, dataSeriesLength, tdps); 1675 //decompressDataSeries_double_1D_pwrgroup(data, dataSeriesLength, tdps); 1652 1676 } 1653 1677 return; … … 1672 1696 decompressDataSeries_double_1D(&decmpData, dataSeriesLength, tdps); 1673 1697 else 1674 decompressDataSeries_double_1D_pwr(&decmpData, dataSeriesLength, tdps); 1698 //decompressDataSeries_double_1D_pwr(&decmpData, dataSeriesLength, tdps); 1699 decompressDataSeries_double_1D_pwr_pre_log(&decmpData, dataSeriesLength, tdps); 1675 1700 // insert the decompressed data 1676 1701 size_t k = 0; … … 1712 1737 } 1713 1738 else 1714 decompressDataSeries_double_2D_pwr(data, r1, r2, tdps); 1739 //decompressDataSeries_double_2D_pwr(data, r1, r2, tdps); 1740 decompressDataSeries_double_2D_pwr_pre_log(data, r1, r2, tdps); 1715 1741 return; 1716 1742 } else { … … 1734 1760 decompressDataSeries_double_2D(&decmpData, r1, r2, tdps); 1735 1761 else 1736 decompressDataSeries_double_2D_pwr(&decmpData, r1, r2, tdps); 1762 //decompressDataSeries_double_2D_pwr(&decmpData, r1, r2, tdps); 1763 decompressDataSeries_double_2D_pwr_pre_log(&decmpData, r1, r2, tdps); 1737 1764 // insert the decompressed data 1738 1765 size_t k = 0; … … 1774 1801 } 1775 1802 else 1776 decompressDataSeries_double_3D_pwr(data, r1, r2, r3, tdps); 1803 //decompressDataSeries_double_3D_pwr(data, r1, r2, r3, tdps); 1804 decompressDataSeries_double_3D_pwr_pre_log(data, r1, r2, r3, tdps); 1777 1805 return; 1778 1806 } else { … … 1796 1824 decompressDataSeries_double_3D(&decmpData, r1, r2, r3, tdps); 1797 1825 else 1798 decompressDataSeries_double_3D_pwr(&decmpData, r1, r2, r3, tdps); 1826 //decompressDataSeries_double_3D_pwr(&decmpData, r1, r2, r3, tdps); 1827 decompressDataSeries_double_3D_pwr_pre_log(&decmpData, r1, r2, r3, tdps); 1799 1828 // insert the decompressed data 1800 1829 size_t k = 0; … … 1837 1866 else 1838 1867 { 1839 decompressDataSeries_double_3D_pwr(data, r1*r2, r3, r4, tdps); 1868 //decompressDataSeries_double_3D_pwr(data, r1*r2, r3, r4, tdps); 1869 decompressDataSeries_double_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps); 1840 1870 //ToDO 1841 1871 //decompressDataSeries_double_4D_pwr(data, r1, r2, r3, r4, tdps); … … 1859 1889 decompressDataSeries_double_4D(&decmpData, r1, r2, r3, r4, tdps); 1860 1890 else 1861 decompressDataSeries_double_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); 1891 //decompressDataSeries_double_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); 1892 decompressDataSeries_double_3D_pwr_pre_log(&decmpData, r1*r2, r3, r4, tdps); 1862 1893 //ToDo 1863 1894 //decompressDataSeries_double_4D_pwr(&decmpData, r1, r2, r3, r4, tdps); … … 1874 1905 } 1875 1906 } 1907 1908 void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data){ 1909 1910 size_t dim0_offset = r2; 1911 size_t num_elements = r1 * r2; 1912 1913 *data = (double*)malloc(sizeof(double)*num_elements); 1914 1915 unsigned char * comp_data_pos = comp_data; 1916 1917 size_t block_size = bytesToInt_bigEndian(comp_data_pos); 1918 comp_data_pos += sizeof(int); 1919 // calculate block dims 1920 size_t num_x, num_y; 1921 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 1922 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 1923 1924 size_t split_index_x, split_index_y; 1925 size_t early_blockcount_x, early_blockcount_y; 1926 size_t late_blockcount_x, late_blockcount_y; 1927 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 1928 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 1929 1930 size_t num_blocks = num_x * num_y; 1931 1932 double realPrecision = bytesToDouble(comp_data_pos); 1933 comp_data_pos += sizeof(double); 1934 unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); 1935 comp_data_pos += sizeof(int); 1936 1937 updateQuantizationInfo(intervals); 1938 1939 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 1940 comp_data_pos += sizeof(int); 1941 1942 int stateNum = 2*intervals; 1943 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 1944 1945 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 1946 1947 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); 1948 comp_data_pos += sizeof(int) + tree_size; 1949 1950 double mean; 1951 unsigned char use_mean; 1952 memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); 1953 comp_data_pos += sizeof(unsigned char); 1954 memcpy(&mean, comp_data_pos, sizeof(double)); 1955 comp_data_pos += sizeof(double); 1956 size_t reg_count = 0; 1957 1958 unsigned char * indicator; 1959 size_t indicator_bitlength = (num_blocks - 1)/8 + 1; 1960 convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); 1961 comp_data_pos += indicator_bitlength; 1962 for(size_t i=0; i<num_blocks; i++){ 1963 if(!indicator[i]) reg_count ++; 1964 } 1965 //printf("reg_count: %ld\n", reg_count); 1966 1967 int coeff_intvRadius[3]; 1968 int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); 1969 int * coeff_type[3]; 1970 double precision[3]; 1971 double * coeff_unpred_data[3]; 1972 if(reg_count > 0){ 1973 for(int i=0; i<3; i++){ 1974 precision[i] = bytesToDouble(comp_data_pos); 1975 comp_data_pos += sizeof(double); 1976 coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); 1977 comp_data_pos += sizeof(int); 1978 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 1979 comp_data_pos += sizeof(int); 1980 int stateNum = 2*coeff_intvRadius[i]*2; 1981 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 1982 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 1983 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); 1984 comp_data_pos += sizeof(int) + tree_size; 1985 1986 coeff_type[i] = coeff_result_type + i * num_blocks; 1987 size_t typeArray_size = bytesToSize(comp_data_pos); 1988 decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); 1989 comp_data_pos += sizeof(size_t) + typeArray_size; 1990 int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); 1991 comp_data_pos += sizeof(int); 1992 coeff_unpred_data[i] = (double *) comp_data_pos; 1993 comp_data_pos += coeff_unpred_count * sizeof(double); 1994 SZ_ReleaseHuffman(huffmanTree); 1995 } 1996 } 1997 double last_coefficients[3] = {0.0}; 1998 int coeff_unpred_data_count[3] = {0}; 1999 int coeff_index = 0; 2000 updateQuantizationInfo(intervals); 2001 2002 size_t total_unpred; 2003 memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); 2004 comp_data_pos += sizeof(size_t); 2005 double * unpred_data = (double *) comp_data_pos; 2006 comp_data_pos += total_unpred * sizeof(double); 2007 2008 int * result_type = (int *) malloc(num_elements * sizeof(int)); 2009 decode(comp_data_pos, num_elements, root, result_type); 2010 SZ_ReleaseHuffman(huffmanTree); 2011 2012 int intvRadius = exe_params->intvRadius; 2013 2014 int * type; 2015 2016 double * data_pos = *data; 2017 size_t offset_x, offset_y; 2018 size_t current_blockcount_x, current_blockcount_y; 2019 size_t cur_unpred_count; 2020 2021 unsigned char * indicator_pos = indicator; 2022 if(use_mean){ 2023 type = result_type; 2024 for(size_t i=0; i<num_x; i++){ 2025 for(size_t j=0; j<num_y; j++){ 2026 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 2027 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 2028 data_pos = *data + offset_x * dim0_offset + offset_y; 2029 2030 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 2031 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 2032 2033 size_t current_block_elements = current_blockcount_x * current_blockcount_y; 2034 if(*indicator_pos){ 2035 // decompress by SZ 2036 2037 double * block_data_pos = data_pos; 2038 double pred; 2039 size_t index = 0; 2040 int type_; 2041 // d11 is current data 2042 size_t unpredictable_count = 0; 2043 double d00, d01, d10; 2044 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2045 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2046 type_ = type[index]; 2047 if(type_ == intvRadius){ 2048 *block_data_pos = mean; 2049 } 2050 else if(type_ == 0){ 2051 *block_data_pos = unpred_data[unpredictable_count ++]; 2052 } 2053 else{ 2054 d00 = d01 = d10 = 1; 2055 if(i == 0 && ii == 0){ 2056 d00 = d01 = 0; 2057 } 2058 if(j == 0 && jj == 0){ 2059 d00 = d10 = 0; 2060 } 2061 if(d00){ 2062 d00 = block_data_pos[- dim0_offset - 1]; 2063 } 2064 if(d01){ 2065 d01 = block_data_pos[- dim0_offset]; 2066 } 2067 if(d10){ 2068 d10 = block_data_pos[- 1]; 2069 } 2070 if(type_ < intvRadius) type_ += 1; 2071 pred = d10 + d01 - d00; 2072 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2073 } 2074 index ++; 2075 block_data_pos ++; 2076 } 2077 block_data_pos += dim0_offset - current_blockcount_y; 2078 } 2079 cur_unpred_count = unpredictable_count; 2080 } 2081 else{ 2082 // decompress by regression 2083 { 2084 //restore regression coefficients 2085 double pred; 2086 int type_; 2087 for(int e=0; e<3; e++){ 2088 type_ = coeff_type[e][coeff_index]; 2089 if (type_ != 0){ 2090 pred = last_coefficients[e]; 2091 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2092 } 2093 else{ 2094 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2095 coeff_unpred_data_count[e] ++; 2096 } 2097 } 2098 coeff_index ++; 2099 } 2100 { 2101 double * block_data_pos = data_pos; 2102 double pred; 2103 int type_; 2104 size_t index = 0; 2105 size_t unpredictable_count = 0; 2106 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2107 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2108 type_ = type[index]; 2109 if (type_ != 0){ 2110 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; 2111 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2112 } 2113 else{ 2114 *block_data_pos = unpred_data[unpredictable_count ++]; 2115 } 2116 2117 index ++; 2118 block_data_pos ++; 2119 } 2120 block_data_pos += dim0_offset - current_blockcount_y; 2121 } 2122 cur_unpred_count = unpredictable_count; 2123 } 2124 } 2125 2126 type += current_block_elements; 2127 indicator_pos ++; 2128 unpred_data += cur_unpred_count; 2129 } 2130 } 2131 } 2132 else{ 2133 type = result_type; 2134 for(size_t i=0; i<num_x; i++){ 2135 for(size_t j=0; j<num_y; j++){ 2136 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 2137 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 2138 data_pos = *data + offset_x * dim0_offset + offset_y; 2139 2140 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 2141 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 2142 2143 size_t current_block_elements = current_blockcount_x * current_blockcount_y; 2144 if(*indicator_pos){ 2145 // decompress by SZ 2146 2147 double * block_data_pos = data_pos; 2148 double pred; 2149 size_t index = 0; 2150 int type_; 2151 // d11 is current data 2152 size_t unpredictable_count = 0; 2153 double d00, d01, d10; 2154 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2155 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2156 type_ = type[index]; 2157 if(type_ == 0){ 2158 *block_data_pos = unpred_data[unpredictable_count ++]; 2159 } 2160 else{ 2161 d00 = d01 = d10 = 1; 2162 if(i == 0 && ii == 0){ 2163 d00 = d01 = 0; 2164 } 2165 if(j == 0 && jj == 0){ 2166 d00 = d10 = 0; 2167 } 2168 if(d00){ 2169 d00 = block_data_pos[- dim0_offset - 1]; 2170 } 2171 if(d01){ 2172 d01 = block_data_pos[- dim0_offset]; 2173 } 2174 if(d10){ 2175 d10 = block_data_pos[- 1]; 2176 } 2177 pred = d10 + d01 - d00; 2178 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2179 } 2180 index ++; 2181 block_data_pos ++; 2182 } 2183 block_data_pos += dim0_offset - current_blockcount_y; 2184 } 2185 cur_unpred_count = unpredictable_count; 2186 } 2187 else{ 2188 // decompress by regression 2189 { 2190 //restore regression coefficients 2191 double pred; 2192 int type_; 2193 for(int e=0; e<3; e++){ 2194 type_ = coeff_type[e][coeff_index]; 2195 if (type_ != 0){ 2196 pred = last_coefficients[e]; 2197 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2198 } 2199 else{ 2200 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2201 coeff_unpred_data_count[e] ++; 2202 } 2203 } 2204 coeff_index ++; 2205 } 2206 { 2207 double * block_data_pos = data_pos; 2208 double pred; 2209 int type_; 2210 size_t index = 0; 2211 size_t unpredictable_count = 0; 2212 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2213 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2214 type_ = type[index]; 2215 if (type_ != 0){ 2216 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; 2217 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2218 } 2219 else{ 2220 *block_data_pos = unpred_data[unpredictable_count ++]; 2221 } 2222 index ++; 2223 block_data_pos ++; 2224 } 2225 block_data_pos += dim0_offset - current_blockcount_y; 2226 } 2227 cur_unpred_count = unpredictable_count; 2228 } 2229 } 2230 2231 type += current_block_elements; 2232 indicator_pos ++; 2233 unpred_data += cur_unpred_count; 2234 } 2235 } 2236 } 2237 free(coeff_result_type); 2238 2239 free(indicator); 2240 free(result_type); 2241 } 2242 2243 2244 void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ 2245 2246 size_t dim0_offset = r2 * r3; 2247 size_t dim1_offset = r3; 2248 size_t num_elements = r1 * r2 * r3; 2249 2250 *data = (double*)malloc(sizeof(double)*num_elements); 2251 2252 unsigned char * comp_data_pos = comp_data; 2253 2254 size_t block_size = bytesToInt_bigEndian(comp_data_pos); 2255 comp_data_pos += sizeof(int); 2256 // calculate block dims 2257 size_t num_x, num_y, num_z; 2258 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 2259 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 2260 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); 2261 2262 size_t split_index_x, split_index_y, split_index_z; 2263 size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; 2264 size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; 2265 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 2266 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 2267 SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); 2268 2269 size_t num_blocks = num_x * num_y * num_z; 2270 2271 double realPrecision = bytesToDouble(comp_data_pos); 2272 comp_data_pos += sizeof(double); 2273 unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); 2274 comp_data_pos += sizeof(int); 2275 2276 updateQuantizationInfo(intervals); 2277 2278 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 2279 comp_data_pos += sizeof(int); 2280 2281 int stateNum = 2*intervals; 2282 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 2283 2284 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 2285 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+4, nodeCount); 2286 comp_data_pos += sizeof(int) + tree_size; 2287 2288 double mean; 2289 unsigned char use_mean; 2290 memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); 2291 comp_data_pos += sizeof(unsigned char); 2292 memcpy(&mean, comp_data_pos, sizeof(double)); 2293 comp_data_pos += sizeof(double); 2294 size_t reg_count = 0; 2295 2296 unsigned char * indicator; 2297 size_t indicator_bitlength = (num_blocks - 1)/8 + 1; 2298 convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); 2299 comp_data_pos += indicator_bitlength; 2300 for(size_t i=0; i<num_blocks; i++){ 2301 if(!indicator[i]) reg_count ++; 2302 } 2303 2304 int coeff_intvRadius[4]; 2305 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 2306 int * coeff_type[4]; 2307 double precision[4]; 2308 double * coeff_unpred_data[4]; 2309 if(reg_count > 0){ 2310 for(int i=0; i<4; i++){ 2311 precision[i] = bytesToDouble(comp_data_pos); 2312 comp_data_pos += sizeof(double); 2313 coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); 2314 comp_data_pos += sizeof(int); 2315 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 2316 comp_data_pos += sizeof(int); 2317 int stateNum = 2*coeff_intvRadius[i]*2; 2318 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 2319 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 2320 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+4, nodeCount); 2321 comp_data_pos += sizeof(int) + tree_size; 2322 2323 coeff_type[i] = coeff_result_type + i * num_blocks; 2324 size_t typeArray_size = bytesToSize(comp_data_pos); 2325 decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); 2326 comp_data_pos += sizeof(size_t) + typeArray_size; 2327 int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); 2328 comp_data_pos += sizeof(int); 2329 coeff_unpred_data[i] = (double *) comp_data_pos; 2330 comp_data_pos += coeff_unpred_count * sizeof(double); 2331 SZ_ReleaseHuffman(huffmanTree); 2332 } 2333 } 2334 double last_coefficients[4] = {0.0}; 2335 int coeff_unpred_data_count[4] = {0}; 2336 int coeff_index = 0; 2337 updateQuantizationInfo(intervals); 2338 2339 size_t total_unpred; 2340 memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); 2341 comp_data_pos += sizeof(size_t); 2342 double * unpred_data = (double *) comp_data_pos; 2343 comp_data_pos += total_unpred * sizeof(double); 2344 2345 int * result_type = (int *) malloc(num_elements * sizeof(int)); 2346 decode(comp_data_pos, num_elements, root, result_type); 2347 SZ_ReleaseHuffman(huffmanTree); 2348 2349 int intvRadius = exe_params->intvRadius; 2350 2351 int * type; 2352 double * data_pos = *data; 2353 size_t offset_x, offset_y, offset_z; 2354 size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; 2355 size_t cur_unpred_count; 2356 unsigned char * indicator_pos = indicator; 2357 if(use_mean){ 2358 // type = result_type; 2359 2360 // for(size_t i=0; i<num_x; i++){ 2361 // for(size_t j=0; j<num_y; j++){ 2362 // for(size_t k=0; k<num_z; k++){ 2363 // offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 2364 // offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 2365 // offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 2366 // data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 2367 2368 // current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 2369 // current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 2370 // current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 2371 2372 // // type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset + offset_z * current_blockcount_x * current_blockcount_y; 2373 // // type = result_type + type_offset; 2374 // size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 2375 // // index = i * num_y * num_z + j * num_z + k; 2376 2377 // // printf("i j k: %ld %ld %ld\toffset: %ld %ld %ld\tindicator: %ld\n", i, j, k, offset_x, offset_y, offset_z, indicator[index]); 2378 // if(*indicator_pos){ 2379 // // decompress by SZ 2380 // // cur_unpred_count = decompressDataSeries_double_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); 2381 // double * block_data_pos = data_pos; 2382 // double pred; 2383 // size_t index = 0; 2384 // int type_; 2385 // // d111 is current data 2386 // size_t unpredictable_count = 0; 2387 // double d000, d001, d010, d011, d100, d101, d110; 2388 // for(size_t ii=0; ii<current_blockcount_x; ii++){ 2389 // for(size_t jj=0; jj<current_blockcount_y; jj++){ 2390 // for(size_t kk=0; kk<current_blockcount_z; kk++){ 2391 // type_ = type[index]; 2392 // if(type_ == intvRadius){ 2393 // *block_data_pos = mean; 2394 // } 2395 // else if(type_ == 0){ 2396 // *block_data_pos = unpred_data[unpredictable_count ++]; 2397 // } 2398 // else{ 2399 // d000 = d001 = d010 = d011 = d100 = d101 = d110 = 1; 2400 // if(i == 0 && ii == 0){ 2401 // d000 = d001 = d010 = d011 = 0; 2402 // } 2403 // if(j == 0 && jj == 0){ 2404 // d000 = d001 = d100 = d101 = 0; 2405 // } 2406 // if(k == 0 && kk == 0){ 2407 // d000 = d010 = d100 = d110 = 0; 2408 // } 2409 // if(d000){ 2410 // d000 = block_data_pos[- dim0_offset - dim1_offset - 1]; 2411 // } 2412 // if(d001){ 2413 // d001 = block_data_pos[- dim0_offset - dim1_offset]; 2414 // } 2415 // if(d010){ 2416 // d010 = block_data_pos[- dim0_offset - 1]; 2417 // } 2418 // if(d011){ 2419 // d011 = block_data_pos[- dim0_offset]; 2420 // } 2421 // if(d100){ 2422 // d100 = block_data_pos[- dim1_offset - 1]; 2423 // } 2424 // if(d101){ 2425 // d101 = block_data_pos[- dim1_offset]; 2426 // } 2427 // if(d110){ 2428 // d110 = block_data_pos[- 1]; 2429 // } 2430 // if(type_ < intvRadius) type_ += 1; 2431 // pred = d110 + d101 + d011 - d100 - d010 - d001 + d000; 2432 // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2433 // } 2434 // index ++; 2435 // block_data_pos ++; 2436 // } 2437 // block_data_pos += dim1_offset - current_blockcount_z; 2438 // } 2439 // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2440 // } 2441 // cur_unpred_count = unpredictable_count; 2442 // } 2443 // else{ 2444 // // decompress by regression 2445 // { 2446 // //restore regression coefficients 2447 // double pred; 2448 // int type_; 2449 // for(int e=0; e<4; e++){ 2450 // // if(i == 0 && j == 0 && k == 19){ 2451 // // printf("~\n"); 2452 // // } 2453 // type_ = coeff_type[e][coeff_index]; 2454 // if (type_ != 0){ 2455 // pred = last_coefficients[e]; 2456 // last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2457 // } 2458 // else{ 2459 // last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2460 // coeff_unpred_data_count[e] ++; 2461 // } 2462 // if(fabs(last_coefficients[e]) > 10000){ 2463 // printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]); 2464 // exit(0); 2465 // } 2466 // } 2467 // coeff_index ++; 2468 // } 2469 // { 2470 // double * block_data_pos = data_pos; 2471 // double pred; 2472 // int type_; 2473 // size_t index = 0; 2474 // size_t unpredictable_count = 0; 2475 // for(size_t ii=0; ii<current_blockcount_x; ii++){ 2476 // for(size_t jj=0; jj<current_blockcount_y; jj++){ 2477 // for(size_t kk=0; kk<current_blockcount_z; kk++){ 2478 // if(block_data_pos - (*data) == 19470788){ 2479 // printf("dec stop\n"); 2480 // } 2481 2482 // type_ = type[index]; 2483 // if (type_ != 0){ 2484 // pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 2485 // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2486 // } 2487 // else{ 2488 // *block_data_pos = unpred_data[unpredictable_count ++]; 2489 // } 2490 // index ++; 2491 // block_data_pos ++; 2492 // } 2493 // block_data_pos += dim1_offset - current_blockcount_z; 2494 // } 2495 // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2496 // } 2497 // cur_unpred_count = unpredictable_count; 2498 // } 2499 // } 2500 2501 // type += current_block_elements; 2502 // indicator_pos ++; 2503 // unpred_data += cur_unpred_count; 2504 // // decomp_unpred += cur_unpred_count; 2505 // // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); 2506 // // fflush(stdout); 2507 // } 2508 // } 2509 // } 2510 2511 type = result_type; 2512 // i == 0 2513 { 2514 // j == 0 2515 { 2516 // k == 0 2517 { 2518 data_pos = *data; 2519 2520 current_blockcount_x = early_blockcount_x; 2521 current_blockcount_y = early_blockcount_y; 2522 current_blockcount_z = early_blockcount_z; 2523 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 2524 if(*indicator_pos){ 2525 // decompress by SZ 2526 double * block_data_pos = data_pos; 2527 double pred; 2528 size_t index = 0; 2529 int type_; 2530 size_t unpredictable_count = 0; 2531 // ii == 0 2532 { 2533 // jj == 0 2534 { 2535 { 2536 // kk == 0 2537 type_ = type[index]; 2538 if(type_ == intvRadius){ 2539 *block_data_pos = mean; 2540 } 2541 else if(type_ == 0){ 2542 *block_data_pos = unpred_data[unpredictable_count ++]; 2543 } 2544 else{ 2545 if(type_ < intvRadius) type_ += 1; 2546 pred = 0; 2547 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2548 } 2549 index ++; 2550 block_data_pos ++; 2551 } 2552 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2553 type_ = type[index]; 2554 if(type_ == intvRadius){ 2555 *block_data_pos = mean; 2556 } 2557 else if(type_ == 0){ 2558 *block_data_pos = unpred_data[unpredictable_count ++]; 2559 } 2560 else{ 2561 if(type_ < intvRadius) type_ += 1; 2562 pred = block_data_pos[- 1]; 2563 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2564 } 2565 index ++; 2566 block_data_pos ++; 2567 } 2568 block_data_pos += dim1_offset - current_blockcount_z; 2569 } 2570 for(size_t jj=1; jj<current_blockcount_y; jj++){ 2571 { 2572 // kk == 0 2573 type_ = type[index]; 2574 if(type_ == intvRadius){ 2575 *block_data_pos = mean; 2576 } 2577 else if(type_ == 0){ 2578 *block_data_pos = unpred_data[unpredictable_count ++]; 2579 } 2580 else{ 2581 if(type_ < intvRadius) type_ += 1; 2582 pred = block_data_pos[- dim1_offset]; 2583 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2584 } 2585 index ++; 2586 block_data_pos ++; 2587 } 2588 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2589 type_ = type[index]; 2590 if(type_ == intvRadius){ 2591 *block_data_pos = mean; 2592 } 2593 else if(type_ == 0){ 2594 *block_data_pos = unpred_data[unpredictable_count ++]; 2595 } 2596 else{ 2597 if(type_ < intvRadius) type_ += 1; 2598 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 2599 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2600 } 2601 index ++; 2602 block_data_pos ++; 2603 } 2604 block_data_pos += dim1_offset - current_blockcount_z; 2605 } 2606 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2607 } 2608 for(size_t ii=1; ii<current_blockcount_x; ii++){ 2609 // jj == 0 2610 { 2611 { 2612 // kk == 0 2613 type_ = type[index]; 2614 if(type_ == intvRadius){ 2615 *block_data_pos = mean; 2616 } 2617 else if(type_ == 0){ 2618 *block_data_pos = unpred_data[unpredictable_count ++]; 2619 } 2620 else{ 2621 if(type_ < intvRadius) type_ += 1; 2622 pred = block_data_pos[- dim0_offset]; 2623 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2624 } 2625 index ++; 2626 block_data_pos ++; 2627 } 2628 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2629 type_ = type[index]; 2630 if(type_ == intvRadius){ 2631 *block_data_pos = mean; 2632 } 2633 else if(type_ == 0){ 2634 *block_data_pos = unpred_data[unpredictable_count ++]; 2635 } 2636 else{ 2637 if(type_ < intvRadius) type_ += 1; 2638 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 2639 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2640 } 2641 index ++; 2642 block_data_pos ++; 2643 } 2644 block_data_pos += dim1_offset - current_blockcount_z; 2645 } 2646 for(size_t jj=1; jj<current_blockcount_y; jj++){ 2647 { 2648 // kk == 0 2649 type_ = type[index]; 2650 if(type_ == intvRadius){ 2651 *block_data_pos = mean; 2652 } 2653 else if(type_ == 0){ 2654 *block_data_pos = unpred_data[unpredictable_count ++]; 2655 } 2656 else{ 2657 if(type_ < intvRadius) type_ += 1; 2658 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 2659 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2660 } 2661 index ++; 2662 block_data_pos ++; 2663 } 2664 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2665 type_ = type[index]; 2666 if(type_ == intvRadius){ 2667 *block_data_pos = mean; 2668 } 2669 else if(type_ == 0){ 2670 *block_data_pos = unpred_data[unpredictable_count ++]; 2671 } 2672 else{ 2673 if(type_ < intvRadius) type_ += 1; 2674 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 2675 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2676 } 2677 index ++; 2678 block_data_pos ++; 2679 } 2680 block_data_pos += dim1_offset - current_blockcount_z; 2681 } 2682 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2683 } 2684 cur_unpred_count = unpredictable_count; 2685 } 2686 else{ 2687 // decompress by regression 2688 { 2689 //restore regression coefficients 2690 double pred; 2691 int type_; 2692 for(int e=0; e<4; e++){ 2693 type_ = coeff_type[e][coeff_index]; 2694 if (type_ != 0){ 2695 pred = last_coefficients[e]; 2696 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2697 } 2698 else{ 2699 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2700 coeff_unpred_data_count[e] ++; 2701 } 2702 } 2703 coeff_index ++; 2704 } 2705 { 2706 double * block_data_pos = data_pos; 2707 double pred; 2708 int type_; 2709 size_t index = 0; 2710 size_t unpredictable_count = 0; 2711 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2712 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2713 for(size_t kk=0; kk<current_blockcount_z; kk++){ 2714 type_ = type[index]; 2715 if (type_ != 0){ 2716 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 2717 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2718 } 2719 else{ 2720 *block_data_pos = unpred_data[unpredictable_count ++]; 2721 } 2722 index ++; 2723 block_data_pos ++; 2724 } 2725 block_data_pos += dim1_offset - current_blockcount_z; 2726 } 2727 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2728 } 2729 cur_unpred_count = unpredictable_count; 2730 } 2731 } 2732 indicator_pos ++; 2733 type += current_block_elements; 2734 unpred_data += cur_unpred_count; 2735 } // end k == 0 2736 // i == 0 j == 0 k != 0 2737 for(size_t k=1; k<num_z; k++){ 2738 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 2739 data_pos = *data + offset_z; 2740 2741 current_blockcount_x = early_blockcount_x; 2742 current_blockcount_y = early_blockcount_y; 2743 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 2744 2745 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 2746 if(*indicator_pos){ 2747 // decompress by SZ 2748 double * block_data_pos = data_pos; 2749 double pred; 2750 size_t index = 0; 2751 int type_; 2752 size_t unpredictable_count = 0; 2753 // ii == 0 2754 { 2755 // jj == 0 2756 { 2757 for(size_t kk=0; kk<current_blockcount_z; kk++){ 2758 type_ = type[index]; 2759 if(type_ == intvRadius){ 2760 *block_data_pos = mean; 2761 } 2762 else if(type_ == 0){ 2763 *block_data_pos = unpred_data[unpredictable_count ++]; 2764 } 2765 else{ 2766 if(type_ < intvRadius) type_ += 1; 2767 pred = block_data_pos[- 1]; 2768 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2769 } 2770 index ++; 2771 block_data_pos ++; 2772 } 2773 block_data_pos += dim1_offset - current_blockcount_z; 2774 } 2775 for(size_t jj=1; jj<current_blockcount_y; jj++){ 2776 for(size_t kk=0; kk<current_blockcount_z; kk++){ 2777 type_ = type[index]; 2778 if(type_ == intvRadius){ 2779 *block_data_pos = mean; 2780 } 2781 else if(type_ == 0){ 2782 *block_data_pos = unpred_data[unpredictable_count ++]; 2783 } 2784 else{ 2785 if(type_ < intvRadius) type_ += 1; 2786 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 2787 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2788 } 2789 index ++; 2790 block_data_pos ++; 2791 } 2792 block_data_pos += dim1_offset - current_blockcount_z; 2793 } 2794 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2795 } 2796 for(size_t ii=1; ii<current_blockcount_x; ii++){ 2797 // jj == 0 2798 { 2799 for(size_t kk=0; kk<current_blockcount_z; kk++){ 2800 type_ = type[index]; 2801 if(type_ == intvRadius){ 2802 *block_data_pos = mean; 2803 } 2804 else if(type_ == 0){ 2805 *block_data_pos = unpred_data[unpredictable_count ++]; 2806 } 2807 else{ 2808 if(type_ < intvRadius) type_ += 1; 2809 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 2810 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2811 } 2812 index ++; 2813 block_data_pos ++; 2814 } 2815 block_data_pos += dim1_offset - current_blockcount_z; 2816 } 2817 for(size_t jj=1; jj<current_blockcount_y; jj++){ 2818 for(size_t kk=0; kk<current_blockcount_z; kk++){ 2819 type_ = type[index]; 2820 if(type_ == intvRadius){ 2821 *block_data_pos = mean; 2822 } 2823 else if(type_ == 0){ 2824 *block_data_pos = unpred_data[unpredictable_count ++]; 2825 } 2826 else{ 2827 if(type_ < intvRadius) type_ += 1; 2828 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 2829 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2830 } 2831 index ++; 2832 block_data_pos ++; 2833 } 2834 block_data_pos += dim1_offset - current_blockcount_z; 2835 } 2836 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2837 } 2838 cur_unpred_count = unpredictable_count; 2839 } 2840 else{ 2841 // decompress by regression 2842 { 2843 //restore regression coefficients 2844 double pred; 2845 int type_; 2846 for(int e=0; e<4; e++){ 2847 type_ = coeff_type[e][coeff_index]; 2848 if (type_ != 0){ 2849 pred = last_coefficients[e]; 2850 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2851 } 2852 else{ 2853 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2854 coeff_unpred_data_count[e] ++; 2855 } 2856 } 2857 coeff_index ++; 2858 } 2859 { 2860 double * block_data_pos = data_pos; 2861 double pred; 2862 int type_; 2863 size_t index = 0; 2864 size_t unpredictable_count = 0; 2865 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2866 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2867 for(size_t kk=0; kk<current_blockcount_z; kk++){ 2868 type_ = type[index]; 2869 if (type_ != 0){ 2870 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 2871 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2872 } 2873 else{ 2874 *block_data_pos = unpred_data[unpredictable_count ++]; 2875 } 2876 index ++; 2877 block_data_pos ++; 2878 } 2879 block_data_pos += dim1_offset - current_blockcount_z; 2880 } 2881 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2882 } 2883 cur_unpred_count = unpredictable_count; 2884 } 2885 } 2886 indicator_pos ++; 2887 type += current_block_elements; 2888 unpred_data += cur_unpred_count; 2889 } 2890 }// end j==0 2891 for(size_t j=1; j<num_y; j++){ 2892 // k == 0 2893 { 2894 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 2895 data_pos = *data + offset_y * dim1_offset; 2896 2897 current_blockcount_x = early_blockcount_x; 2898 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 2899 current_blockcount_z = early_blockcount_z; 2900 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 2901 if(*indicator_pos){ 2902 // decompress by SZ 2903 double * block_data_pos = data_pos; 2904 double pred; 2905 size_t index = 0; 2906 int type_; 2907 size_t unpredictable_count = 0; 2908 // ii == 0 2909 { 2910 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2911 { 2912 // kk == 0 2913 type_ = type[index]; 2914 if(type_ == intvRadius){ 2915 *block_data_pos = mean; 2916 } 2917 else if(type_ == 0){ 2918 *block_data_pos = unpred_data[unpredictable_count ++]; 2919 } 2920 else{ 2921 if(type_ < intvRadius) type_ += 1; 2922 pred = block_data_pos[- dim1_offset]; 2923 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2924 } 2925 index ++; 2926 block_data_pos ++; 2927 } 2928 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2929 type_ = type[index]; 2930 if(type_ == intvRadius){ 2931 *block_data_pos = mean; 2932 } 2933 else if(type_ == 0){ 2934 *block_data_pos = unpred_data[unpredictable_count ++]; 2935 } 2936 else{ 2937 if(type_ < intvRadius) type_ += 1; 2938 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 2939 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2940 } 2941 index ++; 2942 block_data_pos ++; 2943 } 2944 block_data_pos += dim1_offset - current_blockcount_z; 2945 } 2946 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2947 } 2948 for(size_t ii=1; ii<current_blockcount_x; ii++){ 2949 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2950 { 2951 // kk == 0 2952 type_ = type[index]; 2953 if(type_ == intvRadius){ 2954 *block_data_pos = mean; 2955 } 2956 else if(type_ == 0){ 2957 *block_data_pos = unpred_data[unpredictable_count ++]; 2958 } 2959 else{ 2960 if(type_ < intvRadius) type_ += 1; 2961 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 2962 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2963 } 2964 index ++; 2965 block_data_pos ++; 2966 } 2967 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2968 type_ = type[index]; 2969 if(type_ == intvRadius){ 2970 *block_data_pos = mean; 2971 } 2972 else if(type_ == 0){ 2973 *block_data_pos = unpred_data[unpredictable_count ++]; 2974 } 2975 else{ 2976 if(type_ < intvRadius) type_ += 1; 2977 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 2978 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2979 } 2980 index ++; 2981 block_data_pos ++; 2982 } 2983 block_data_pos += dim1_offset - current_blockcount_z; 2984 } 2985 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2986 } 2987 cur_unpred_count = unpredictable_count; 2988 } 2989 else{ 2990 // decompress by regression 2991 { 2992 //restore regression coefficients 2993 double pred; 2994 int type_; 2995 for(int e=0; e<4; e++){ 2996 type_ = coeff_type[e][coeff_index]; 2997 if (type_ != 0){ 2998 pred = last_coefficients[e]; 2999 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3000 } 3001 else{ 3002 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3003 coeff_unpred_data_count[e] ++; 3004 } 3005 } 3006 coeff_index ++; 3007 } 3008 { 3009 double * block_data_pos = data_pos; 3010 double pred; 3011 int type_; 3012 size_t index = 0; 3013 size_t unpredictable_count = 0; 3014 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3015 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3016 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3017 type_ = type[index]; 3018 if (type_ != 0){ 3019 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3020 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3021 } 3022 else{ 3023 *block_data_pos = unpred_data[unpredictable_count ++]; 3024 } 3025 index ++; 3026 block_data_pos ++; 3027 } 3028 block_data_pos += dim1_offset - current_blockcount_z; 3029 } 3030 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3031 } 3032 cur_unpred_count = unpredictable_count; 3033 } 3034 } 3035 indicator_pos ++; 3036 type += current_block_elements; 3037 unpred_data += cur_unpred_count; 3038 } // end k == 0 3039 for(size_t k=1; k<num_z; k++){ 3040 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3041 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3042 data_pos = *data + offset_y * dim1_offset + offset_z; 3043 3044 current_blockcount_x = early_blockcount_x; 3045 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3046 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3047 3048 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3049 if(*indicator_pos){ 3050 // decompress by SZ 3051 double * block_data_pos = data_pos; 3052 double pred; 3053 size_t index = 0; 3054 int type_; 3055 size_t unpredictable_count = 0; 3056 // ii == 0 3057 { 3058 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3059 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3060 type_ = type[index]; 3061 if(type_ == intvRadius){ 3062 *block_data_pos = mean; 3063 } 3064 else if(type_ == 0){ 3065 *block_data_pos = unpred_data[unpredictable_count ++]; 3066 } 3067 else{ 3068 if(type_ < intvRadius) type_ += 1; 3069 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3070 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3071 } 3072 index ++; 3073 block_data_pos ++; 3074 } 3075 block_data_pos += dim1_offset - current_blockcount_z; 3076 } 3077 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3078 } 3079 for(size_t ii=1; ii<current_blockcount_x; ii++){ 3080 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3081 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3082 type_ = type[index]; 3083 if(type_ == intvRadius){ 3084 *block_data_pos = mean; 3085 } 3086 else if(type_ == 0){ 3087 *block_data_pos = unpred_data[unpredictable_count ++]; 3088 } 3089 else{ 3090 if(type_ < intvRadius) type_ += 1; 3091 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3092 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3093 } 3094 index ++; 3095 block_data_pos ++; 3096 } 3097 block_data_pos += dim1_offset - current_blockcount_z; 3098 } 3099 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3100 } 3101 cur_unpred_count = unpredictable_count; 3102 } 3103 else{ 3104 // decompress by regression 3105 { 3106 //restore regression coefficients 3107 double pred; 3108 int type_; 3109 for(int e=0; e<4; e++){ 3110 type_ = coeff_type[e][coeff_index]; 3111 if (type_ != 0){ 3112 pred = last_coefficients[e]; 3113 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3114 } 3115 else{ 3116 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3117 coeff_unpred_data_count[e] ++; 3118 } 3119 } 3120 coeff_index ++; 3121 } 3122 { 3123 double * block_data_pos = data_pos; 3124 double pred; 3125 int type_; 3126 size_t index = 0; 3127 size_t unpredictable_count = 0; 3128 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3129 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3130 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3131 type_ = type[index]; 3132 if (type_ != 0){ 3133 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3134 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3135 } 3136 else{ 3137 *block_data_pos = unpred_data[unpredictable_count ++]; 3138 } 3139 index ++; 3140 block_data_pos ++; 3141 } 3142 block_data_pos += dim1_offset - current_blockcount_z; 3143 } 3144 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3145 } 3146 cur_unpred_count = unpredictable_count; 3147 } 3148 } 3149 indicator_pos ++; 3150 type += current_block_elements; 3151 unpred_data += cur_unpred_count; 3152 } 3153 } 3154 } // end i==0 3155 for(size_t i=1; i<num_x; i++){ 3156 // j == 0 3157 { 3158 // k == 0 3159 { 3160 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3161 data_pos = *data + offset_x * dim0_offset; 3162 3163 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3164 current_blockcount_y = early_blockcount_y; 3165 current_blockcount_z = early_blockcount_z; 3166 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3167 if(*indicator_pos){ 3168 // decompress by SZ 3169 double * block_data_pos = data_pos; 3170 double pred; 3171 size_t index = 0; 3172 int type_; 3173 size_t unpredictable_count = 0; 3174 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3175 // jj == 0 3176 { 3177 { 3178 // kk == 0 3179 type_ = type[index]; 3180 if(type_ == intvRadius){ 3181 *block_data_pos = mean; 3182 } 3183 else if(type_ == 0){ 3184 *block_data_pos = unpred_data[unpredictable_count ++]; 3185 } 3186 else{ 3187 if(type_ < intvRadius) type_ += 1; 3188 pred = block_data_pos[- dim0_offset]; 3189 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3190 } 3191 index ++; 3192 block_data_pos ++; 3193 } 3194 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3195 type_ = type[index]; 3196 if(type_ == intvRadius){ 3197 *block_data_pos = mean; 3198 } 3199 else if(type_ == 0){ 3200 *block_data_pos = unpred_data[unpredictable_count ++]; 3201 } 3202 else{ 3203 if(type_ < intvRadius) type_ += 1; 3204 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 3205 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3206 } 3207 index ++; 3208 block_data_pos ++; 3209 } 3210 block_data_pos += dim1_offset - current_blockcount_z; 3211 } 3212 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3213 { 3214 // kk == 0 3215 type_ = type[index]; 3216 if(type_ == intvRadius){ 3217 *block_data_pos = mean; 3218 } 3219 else if(type_ == 0){ 3220 *block_data_pos = unpred_data[unpredictable_count ++]; 3221 } 3222 else{ 3223 if(type_ < intvRadius) type_ += 1; 3224 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 3225 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3226 } 3227 index ++; 3228 block_data_pos ++; 3229 } 3230 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3231 type_ = type[index]; 3232 if(type_ == intvRadius){ 3233 *block_data_pos = mean; 3234 } 3235 else if(type_ == 0){ 3236 *block_data_pos = unpred_data[unpredictable_count ++]; 3237 } 3238 else{ 3239 if(type_ < intvRadius) type_ += 1; 3240 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3241 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3242 } 3243 index ++; 3244 block_data_pos ++; 3245 } 3246 block_data_pos += dim1_offset - current_blockcount_z; 3247 } 3248 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3249 } 3250 cur_unpred_count = unpredictable_count; 3251 } 3252 else{ 3253 // decompress by regression 3254 { 3255 //restore regression coefficients 3256 double pred; 3257 int type_; 3258 for(int e=0; e<4; e++){ 3259 type_ = coeff_type[e][coeff_index]; 3260 if (type_ != 0){ 3261 pred = last_coefficients[e]; 3262 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3263 } 3264 else{ 3265 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3266 coeff_unpred_data_count[e] ++; 3267 } 3268 } 3269 coeff_index ++; 3270 } 3271 { 3272 double * block_data_pos = data_pos; 3273 double pred; 3274 int type_; 3275 size_t index = 0; 3276 size_t unpredictable_count = 0; 3277 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3278 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3279 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3280 type_ = type[index]; 3281 if (type_ != 0){ 3282 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3283 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3284 } 3285 else{ 3286 *block_data_pos = unpred_data[unpredictable_count ++]; 3287 } 3288 index ++; 3289 block_data_pos ++; 3290 } 3291 block_data_pos += dim1_offset - current_blockcount_z; 3292 } 3293 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3294 } 3295 cur_unpred_count = unpredictable_count; 3296 } 3297 } 3298 indicator_pos ++; 3299 type += current_block_elements; 3300 unpred_data += cur_unpred_count; 3301 } // end k == 0 3302 for(size_t k=1; k<num_z; k++){ 3303 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3304 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3305 data_pos = *data + offset_x * dim0_offset + offset_z; 3306 3307 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3308 current_blockcount_y = early_blockcount_y; 3309 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3310 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3311 if(*indicator_pos){ 3312 // decompress by SZ 3313 double * block_data_pos = data_pos; 3314 double pred; 3315 size_t index = 0; 3316 int type_; 3317 size_t unpredictable_count = 0; 3318 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3319 // jj == 0 3320 { 3321 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3322 type_ = type[index]; 3323 if(type_ == intvRadius){ 3324 *block_data_pos = mean; 3325 } 3326 else if(type_ == 0){ 3327 *block_data_pos = unpred_data[unpredictable_count ++]; 3328 } 3329 else{ 3330 if(type_ < intvRadius) type_ += 1; 3331 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 3332 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3333 } 3334 index ++; 3335 block_data_pos ++; 3336 } 3337 block_data_pos += dim1_offset - current_blockcount_z; 3338 } 3339 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3340 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3341 type_ = type[index]; 3342 if(type_ == intvRadius){ 3343 *block_data_pos = mean; 3344 } 3345 else if(type_ == 0){ 3346 *block_data_pos = unpred_data[unpredictable_count ++]; 3347 } 3348 else{ 3349 if(type_ < intvRadius) type_ += 1; 3350 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3351 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3352 } 3353 index ++; 3354 block_data_pos ++; 3355 } 3356 block_data_pos += dim1_offset - current_blockcount_z; 3357 } 3358 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3359 } 3360 cur_unpred_count = unpredictable_count; 3361 } 3362 else{ 3363 // decompress by regression 3364 { 3365 //restore regression coefficients 3366 double pred; 3367 int type_; 3368 for(int e=0; e<4; e++){ 3369 type_ = coeff_type[e][coeff_index]; 3370 if (type_ != 0){ 3371 pred = last_coefficients[e]; 3372 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3373 } 3374 else{ 3375 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3376 coeff_unpred_data_count[e] ++; 3377 } 3378 } 3379 coeff_index ++; 3380 } 3381 { 3382 double * block_data_pos = data_pos; 3383 double pred; 3384 int type_; 3385 size_t index = 0; 3386 size_t unpredictable_count = 0; 3387 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3388 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3389 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3390 type_ = type[index]; 3391 if (type_ != 0){ 3392 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3393 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3394 } 3395 else{ 3396 *block_data_pos = unpred_data[unpredictable_count ++]; 3397 } 3398 index ++; 3399 block_data_pos ++; 3400 } 3401 block_data_pos += dim1_offset - current_blockcount_z; 3402 } 3403 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3404 } 3405 cur_unpred_count = unpredictable_count; 3406 } 3407 } 3408 indicator_pos ++; 3409 type += current_block_elements; 3410 unpred_data += cur_unpred_count; 3411 } 3412 }// end j = 0 3413 for(size_t j=1; j<num_y; j++){ 3414 // k == 0 3415 { 3416 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3417 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3418 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; 3419 3420 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3421 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3422 current_blockcount_z = early_blockcount_z; 3423 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3424 if(*indicator_pos){ 3425 // decompress by SZ 3426 double * block_data_pos = data_pos; 3427 double pred; 3428 size_t index = 0; 3429 int type_; 3430 size_t unpredictable_count = 0; 3431 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3432 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3433 { 3434 // kk == 0 3435 type_ = type[index]; 3436 if(type_ == intvRadius){ 3437 *block_data_pos = mean; 3438 } 3439 else if(type_ == 0){ 3440 *block_data_pos = unpred_data[unpredictable_count ++]; 3441 } 3442 else{ 3443 if(type_ < intvRadius) type_ += 1; 3444 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 3445 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3446 } 3447 index ++; 3448 block_data_pos ++; 3449 } 3450 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3451 type_ = type[index]; 3452 if(type_ == intvRadius){ 3453 *block_data_pos = mean; 3454 } 3455 else if(type_ == 0){ 3456 *block_data_pos = unpred_data[unpredictable_count ++]; 3457 } 3458 else{ 3459 if(type_ < intvRadius) type_ += 1; 3460 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3461 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3462 } 3463 index ++; 3464 block_data_pos ++; 3465 } 3466 block_data_pos += dim1_offset - current_blockcount_z; 3467 } 3468 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3469 } 3470 cur_unpred_count = unpredictable_count; 3471 } 3472 else{ 3473 // decompress by regression 3474 { 3475 //restore regression coefficients 3476 double pred; 3477 int type_; 3478 for(int e=0; e<4; e++){ 3479 type_ = coeff_type[e][coeff_index]; 3480 if (type_ != 0){ 3481 pred = last_coefficients[e]; 3482 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3483 } 3484 else{ 3485 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3486 coeff_unpred_data_count[e] ++; 3487 } 3488 } 3489 coeff_index ++; 3490 } 3491 { 3492 double * block_data_pos = data_pos; 3493 double pred; 3494 int type_; 3495 size_t index = 0; 3496 size_t unpredictable_count = 0; 3497 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3498 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3499 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3500 type_ = type[index]; 3501 if (type_ != 0){ 3502 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3503 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3504 } 3505 else{ 3506 *block_data_pos = unpred_data[unpredictable_count ++]; 3507 } 3508 index ++; 3509 block_data_pos ++; 3510 } 3511 block_data_pos += dim1_offset - current_blockcount_z; 3512 } 3513 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3514 } 3515 cur_unpred_count = unpredictable_count; 3516 } 3517 } 3518 indicator_pos ++; 3519 type += current_block_elements; 3520 unpred_data += cur_unpred_count; 3521 } // end k == 0 3522 for(size_t k=1; k<num_z; k++){ 3523 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3524 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3525 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3526 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 3527 3528 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3529 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3530 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3531 3532 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3533 if(*indicator_pos){ 3534 // decompress by SZ 3535 double * block_data_pos = data_pos; 3536 double pred; 3537 size_t index = 0; 3538 int type_; 3539 size_t unpredictable_count = 0; 3540 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3541 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3542 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3543 type_ = type[index]; 3544 if(type_ == intvRadius){ 3545 *block_data_pos = mean; 3546 } 3547 else if(type_ == 0){ 3548 *block_data_pos = unpred_data[unpredictable_count ++]; 3549 } 3550 else{ 3551 if(type_ < intvRadius) type_ += 1; 3552 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3553 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3554 } 3555 index ++; 3556 block_data_pos ++; 3557 } 3558 block_data_pos += dim1_offset - current_blockcount_z; 3559 } 3560 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3561 } 3562 cur_unpred_count = unpredictable_count; 3563 } 3564 else{ 3565 // decompress by regression 3566 { 3567 //restore regression coefficients 3568 double pred; 3569 int type_; 3570 for(int e=0; e<4; e++){ 3571 type_ = coeff_type[e][coeff_index]; 3572 if (type_ != 0){ 3573 pred = last_coefficients[e]; 3574 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3575 } 3576 else{ 3577 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3578 coeff_unpred_data_count[e] ++; 3579 } 3580 } 3581 coeff_index ++; 3582 } 3583 { 3584 double * block_data_pos = data_pos; 3585 double pred; 3586 int type_; 3587 size_t index = 0; 3588 size_t unpredictable_count = 0; 3589 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3590 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3591 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3592 type_ = type[index]; 3593 if (type_ != 0){ 3594 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3595 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3596 } 3597 else{ 3598 *block_data_pos = unpred_data[unpredictable_count ++]; 3599 } 3600 index ++; 3601 block_data_pos ++; 3602 } 3603 block_data_pos += dim1_offset - current_blockcount_z; 3604 } 3605 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3606 } 3607 cur_unpred_count = unpredictable_count; 3608 } 3609 } 3610 indicator_pos ++; 3611 type += current_block_elements; 3612 unpred_data += cur_unpred_count; 3613 } 3614 } 3615 } 3616 } 3617 else{ 3618 type = result_type; 3619 // i == 0 3620 { 3621 // j == 0 3622 { 3623 // k == 0 3624 { 3625 data_pos = *data; 3626 3627 current_blockcount_x = early_blockcount_x; 3628 current_blockcount_y = early_blockcount_y; 3629 current_blockcount_z = early_blockcount_z; 3630 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3631 if(*indicator_pos){ 3632 // decompress by SZ 3633 double * block_data_pos = data_pos; 3634 double pred; 3635 size_t index = 0; 3636 int type_; 3637 size_t unpredictable_count = 0; 3638 // ii == 0 3639 { 3640 // jj == 0 3641 { 3642 { 3643 // kk == 0 3644 type_ = type[index]; 3645 if(type_ == 0){ 3646 *block_data_pos = unpred_data[unpredictable_count ++]; 3647 } 3648 else{ 3649 pred = 0; 3650 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3651 } 3652 index ++; 3653 block_data_pos ++; 3654 } 3655 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3656 type_ = type[index]; 3657 if(type_ == 0){ 3658 *block_data_pos = unpred_data[unpredictable_count ++]; 3659 } 3660 else{ 3661 pred = block_data_pos[- 1]; 3662 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3663 } 3664 index ++; 3665 block_data_pos ++; 3666 } 3667 block_data_pos += dim1_offset - current_blockcount_z; 3668 } 3669 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3670 { 3671 // kk == 0 3672 type_ = type[index]; 3673 if(type_ == 0){ 3674 *block_data_pos = unpred_data[unpredictable_count ++]; 3675 } 3676 else{ 3677 pred = block_data_pos[- dim1_offset]; 3678 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3679 } 3680 index ++; 3681 block_data_pos ++; 3682 } 3683 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3684 type_ = type[index]; 3685 if(type_ == 0){ 3686 *block_data_pos = unpred_data[unpredictable_count ++]; 3687 } 3688 else{ 3689 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3690 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3691 } 3692 index ++; 3693 block_data_pos ++; 3694 } 3695 block_data_pos += dim1_offset - current_blockcount_z; 3696 } 3697 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3698 } 3699 for(size_t ii=1; ii<current_blockcount_x; ii++){ 3700 // jj == 0 3701 { 3702 { 3703 // kk == 0 3704 type_ = type[index]; 3705 if(type_ == 0){ 3706 *block_data_pos = unpred_data[unpredictable_count ++]; 3707 } 3708 else{ 3709 pred = block_data_pos[- dim0_offset]; 3710 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3711 } 3712 index ++; 3713 block_data_pos ++; 3714 } 3715 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3716 type_ = type[index]; 3717 if(type_ == 0){ 3718 *block_data_pos = unpred_data[unpredictable_count ++]; 3719 } 3720 else{ 3721 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 3722 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3723 } 3724 index ++; 3725 block_data_pos ++; 3726 } 3727 block_data_pos += dim1_offset - current_blockcount_z; 3728 } 3729 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3730 { 3731 // kk == 0 3732 type_ = type[index]; 3733 if(type_ == 0){ 3734 *block_data_pos = unpred_data[unpredictable_count ++]; 3735 } 3736 else{ 3737 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 3738 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3739 } 3740 index ++; 3741 block_data_pos ++; 3742 } 3743 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3744 type_ = type[index]; 3745 if(type_ == 0){ 3746 *block_data_pos = unpred_data[unpredictable_count ++]; 3747 } 3748 else{ 3749 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3750 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3751 } 3752 index ++; 3753 block_data_pos ++; 3754 } 3755 block_data_pos += dim1_offset - current_blockcount_z; 3756 } 3757 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3758 } 3759 cur_unpred_count = unpredictable_count; 3760 } 3761 else{ 3762 // decompress by regression 3763 { 3764 //restore regression coefficients 3765 double pred; 3766 int type_; 3767 for(int e=0; e<4; e++){ 3768 type_ = coeff_type[e][coeff_index]; 3769 if (type_ != 0){ 3770 pred = last_coefficients[e]; 3771 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3772 } 3773 else{ 3774 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3775 coeff_unpred_data_count[e] ++; 3776 } 3777 } 3778 coeff_index ++; 3779 } 3780 { 3781 double * block_data_pos = data_pos; 3782 double pred; 3783 int type_; 3784 size_t index = 0; 3785 size_t unpredictable_count = 0; 3786 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3787 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3788 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3789 type_ = type[index]; 3790 if (type_ != 0){ 3791 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3792 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3793 } 3794 else{ 3795 *block_data_pos = unpred_data[unpredictable_count ++]; 3796 } 3797 index ++; 3798 block_data_pos ++; 3799 } 3800 block_data_pos += dim1_offset - current_blockcount_z; 3801 } 3802 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3803 } 3804 cur_unpred_count = unpredictable_count; 3805 } 3806 } 3807 indicator_pos ++; 3808 type += current_block_elements; 3809 unpred_data += cur_unpred_count; 3810 } // end k == 0 3811 // i == 0 j == 0 k != 0 3812 for(size_t k=1; k<num_z; k++){ 3813 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3814 data_pos = *data + offset_z; 3815 3816 current_blockcount_x = early_blockcount_x; 3817 current_blockcount_y = early_blockcount_y; 3818 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3819 3820 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3821 if(*indicator_pos){ 3822 // decompress by SZ 3823 double * block_data_pos = data_pos; 3824 double pred; 3825 size_t index = 0; 3826 int type_; 3827 size_t unpredictable_count = 0; 3828 // ii == 0 3829 { 3830 // jj == 0 3831 { 3832 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3833 type_ = type[index]; 3834 if(type_ == 0){ 3835 *block_data_pos = unpred_data[unpredictable_count ++]; 3836 } 3837 else{ 3838 pred = block_data_pos[- 1]; 3839 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3840 } 3841 index ++; 3842 block_data_pos ++; 3843 } 3844 block_data_pos += dim1_offset - current_blockcount_z; 3845 } 3846 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3847 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3848 type_ = type[index]; 3849 if(type_ == 0){ 3850 *block_data_pos = unpred_data[unpredictable_count ++]; 3851 } 3852 else{ 3853 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3854 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3855 } 3856 index ++; 3857 block_data_pos ++; 3858 } 3859 block_data_pos += dim1_offset - current_blockcount_z; 3860 } 3861 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3862 } 3863 for(size_t ii=1; ii<current_blockcount_x; ii++){ 3864 // jj == 0 3865 { 3866 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3867 type_ = type[index]; 3868 if(type_ == 0){ 3869 *block_data_pos = unpred_data[unpredictable_count ++]; 3870 } 3871 else{ 3872 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 3873 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3874 } 3875 index ++; 3876 block_data_pos ++; 3877 } 3878 block_data_pos += dim1_offset - current_blockcount_z; 3879 } 3880 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3881 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3882 type_ = type[index]; 3883 if(type_ == 0){ 3884 *block_data_pos = unpred_data[unpredictable_count ++]; 3885 } 3886 else{ 3887 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3888 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3889 } 3890 index ++; 3891 block_data_pos ++; 3892 } 3893 block_data_pos += dim1_offset - current_blockcount_z; 3894 } 3895 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3896 } 3897 cur_unpred_count = unpredictable_count; 3898 } 3899 else{ 3900 // decompress by regression 3901 { 3902 //restore regression coefficients 3903 double pred; 3904 int type_; 3905 for(int e=0; e<4; e++){ 3906 type_ = coeff_type[e][coeff_index]; 3907 if (type_ != 0){ 3908 pred = last_coefficients[e]; 3909 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3910 } 3911 else{ 3912 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3913 coeff_unpred_data_count[e] ++; 3914 } 3915 } 3916 coeff_index ++; 3917 } 3918 { 3919 double * block_data_pos = data_pos; 3920 double pred; 3921 int type_; 3922 size_t index = 0; 3923 size_t unpredictable_count = 0; 3924 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3925 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3926 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3927 type_ = type[index]; 3928 if (type_ != 0){ 3929 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3930 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3931 } 3932 else{ 3933 *block_data_pos = unpred_data[unpredictable_count ++]; 3934 } 3935 index ++; 3936 block_data_pos ++; 3937 } 3938 block_data_pos += dim1_offset - current_blockcount_z; 3939 } 3940 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3941 } 3942 cur_unpred_count = unpredictable_count; 3943 } 3944 } 3945 indicator_pos ++; 3946 type += current_block_elements; 3947 unpred_data += cur_unpred_count; 3948 } 3949 }// end j==0 3950 for(size_t j=1; j<num_y; j++){ 3951 // k == 0 3952 { 3953 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3954 data_pos = *data + offset_y * dim1_offset; 3955 3956 current_blockcount_x = early_blockcount_x; 3957 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3958 current_blockcount_z = early_blockcount_z; 3959 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3960 if(*indicator_pos){ 3961 // decompress by SZ 3962 double * block_data_pos = data_pos; 3963 double pred; 3964 size_t index = 0; 3965 int type_; 3966 size_t unpredictable_count = 0; 3967 // ii == 0 3968 { 3969 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3970 { 3971 // kk == 0 3972 type_ = type[index]; 3973 if(type_ == 0){ 3974 *block_data_pos = unpred_data[unpredictable_count ++]; 3975 } 3976 else{ 3977 pred = block_data_pos[- dim1_offset]; 3978 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3979 } 3980 index ++; 3981 block_data_pos ++; 3982 } 3983 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3984 type_ = type[index]; 3985 if(type_ == 0){ 3986 *block_data_pos = unpred_data[unpredictable_count ++]; 3987 } 3988 else{ 3989 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3990 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3991 } 3992 index ++; 3993 block_data_pos ++; 3994 } 3995 block_data_pos += dim1_offset - current_blockcount_z; 3996 } 3997 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3998 } 3999 for(size_t ii=1; ii<current_blockcount_x; ii++){ 4000 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4001 { 4002 // kk == 0 4003 type_ = type[index]; 4004 if(type_ == 0){ 4005 *block_data_pos = unpred_data[unpredictable_count ++]; 4006 } 4007 else{ 4008 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 4009 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4010 } 4011 index ++; 4012 block_data_pos ++; 4013 } 4014 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4015 type_ = type[index]; 4016 if(type_ == 0){ 4017 *block_data_pos = unpred_data[unpredictable_count ++]; 4018 } 4019 else{ 4020 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4021 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4022 } 4023 index ++; 4024 block_data_pos ++; 4025 } 4026 block_data_pos += dim1_offset - current_blockcount_z; 4027 } 4028 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4029 } 4030 cur_unpred_count = unpredictable_count; 4031 } 4032 else{ 4033 // decompress by regression 4034 { 4035 //restore regression coefficients 4036 double pred; 4037 int type_; 4038 for(int e=0; e<4; e++){ 4039 type_ = coeff_type[e][coeff_index]; 4040 if (type_ != 0){ 4041 pred = last_coefficients[e]; 4042 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4043 } 4044 else{ 4045 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4046 coeff_unpred_data_count[e] ++; 4047 } 4048 } 4049 coeff_index ++; 4050 } 4051 { 4052 double * block_data_pos = data_pos; 4053 double pred; 4054 int type_; 4055 size_t index = 0; 4056 size_t unpredictable_count = 0; 4057 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4058 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4059 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4060 type_ = type[index]; 4061 if (type_ != 0){ 4062 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4063 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4064 } 4065 else{ 4066 *block_data_pos = unpred_data[unpredictable_count ++]; 4067 } 4068 index ++; 4069 block_data_pos ++; 4070 } 4071 block_data_pos += dim1_offset - current_blockcount_z; 4072 } 4073 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4074 } 4075 cur_unpred_count = unpredictable_count; 4076 } 4077 } 4078 indicator_pos ++; 4079 type += current_block_elements; 4080 unpred_data += cur_unpred_count; 4081 } // end k == 0 4082 for(size_t k=1; k<num_z; k++){ 4083 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4084 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4085 data_pos = *data + offset_y * dim1_offset + offset_z; 4086 4087 current_blockcount_x = early_blockcount_x; 4088 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4089 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4090 4091 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4092 if(*indicator_pos){ 4093 // decompress by SZ 4094 double * block_data_pos = data_pos; 4095 double pred; 4096 size_t index = 0; 4097 int type_; 4098 size_t unpredictable_count = 0; 4099 // ii == 0 4100 { 4101 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4102 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4103 type_ = type[index]; 4104 if(type_ == 0){ 4105 *block_data_pos = unpred_data[unpredictable_count ++]; 4106 } 4107 else{ 4108 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 4109 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4110 } 4111 index ++; 4112 block_data_pos ++; 4113 } 4114 block_data_pos += dim1_offset - current_blockcount_z; 4115 } 4116 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4117 } 4118 for(size_t ii=1; ii<current_blockcount_x; ii++){ 4119 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4120 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4121 type_ = type[index]; 4122 if(type_ == 0){ 4123 *block_data_pos = unpred_data[unpredictable_count ++]; 4124 } 4125 else{ 4126 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4127 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4128 } 4129 index ++; 4130 block_data_pos ++; 4131 } 4132 block_data_pos += dim1_offset - current_blockcount_z; 4133 } 4134 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4135 } 4136 cur_unpred_count = unpredictable_count; 4137 } 4138 else{ 4139 // decompress by regression 4140 { 4141 //restore regression coefficients 4142 double pred; 4143 int type_; 4144 for(int e=0; e<4; e++){ 4145 type_ = coeff_type[e][coeff_index]; 4146 if (type_ != 0){ 4147 pred = last_coefficients[e]; 4148 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4149 } 4150 else{ 4151 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4152 coeff_unpred_data_count[e] ++; 4153 } 4154 } 4155 coeff_index ++; 4156 } 4157 { 4158 double * block_data_pos = data_pos; 4159 double pred; 4160 int type_; 4161 size_t index = 0; 4162 size_t unpredictable_count = 0; 4163 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4164 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4165 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4166 type_ = type[index]; 4167 if (type_ != 0){ 4168 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4169 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4170 } 4171 else{ 4172 *block_data_pos = unpred_data[unpredictable_count ++]; 4173 } 4174 index ++; 4175 block_data_pos ++; 4176 } 4177 block_data_pos += dim1_offset - current_blockcount_z; 4178 } 4179 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4180 } 4181 cur_unpred_count = unpredictable_count; 4182 } 4183 } 4184 indicator_pos ++; 4185 type += current_block_elements; 4186 unpred_data += cur_unpred_count; 4187 } 4188 } 4189 } // end i==0 4190 for(size_t i=1; i<num_x; i++){ 4191 // j == 0 4192 { 4193 // k == 0 4194 { 4195 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4196 data_pos = *data + offset_x * dim0_offset; 4197 4198 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4199 current_blockcount_y = early_blockcount_y; 4200 current_blockcount_z = early_blockcount_z; 4201 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4202 if(*indicator_pos){ 4203 // decompress by SZ 4204 double * block_data_pos = data_pos; 4205 double pred; 4206 size_t index = 0; 4207 int type_; 4208 size_t unpredictable_count = 0; 4209 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4210 // jj == 0 4211 { 4212 { 4213 // kk == 0 4214 type_ = type[index]; 4215 if(type_ == 0){ 4216 *block_data_pos = unpred_data[unpredictable_count ++]; 4217 } 4218 else{ 4219 pred = block_data_pos[- dim0_offset]; 4220 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4221 } 4222 index ++; 4223 block_data_pos ++; 4224 } 4225 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4226 type_ = type[index]; 4227 if(type_ == 0){ 4228 *block_data_pos = unpred_data[unpredictable_count ++]; 4229 } 4230 else{ 4231 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 4232 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4233 } 4234 index ++; 4235 block_data_pos ++; 4236 } 4237 block_data_pos += dim1_offset - current_blockcount_z; 4238 } 4239 for(size_t jj=1; jj<current_blockcount_y; jj++){ 4240 { 4241 // kk == 0 4242 type_ = type[index]; 4243 if(type_ == 0){ 4244 *block_data_pos = unpred_data[unpredictable_count ++]; 4245 } 4246 else{ 4247 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 4248 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4249 } 4250 index ++; 4251 block_data_pos ++; 4252 } 4253 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4254 type_ = type[index]; 4255 if(type_ == 0){ 4256 *block_data_pos = unpred_data[unpredictable_count ++]; 4257 } 4258 else{ 4259 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4260 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4261 } 4262 index ++; 4263 block_data_pos ++; 4264 } 4265 block_data_pos += dim1_offset - current_blockcount_z; 4266 } 4267 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4268 } 4269 cur_unpred_count = unpredictable_count; 4270 } 4271 else{ 4272 // decompress by regression 4273 { 4274 //restore regression coefficients 4275 double pred; 4276 int type_; 4277 for(int e=0; e<4; e++){ 4278 type_ = coeff_type[e][coeff_index]; 4279 if (type_ != 0){ 4280 pred = last_coefficients[e]; 4281 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4282 } 4283 else{ 4284 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4285 coeff_unpred_data_count[e] ++; 4286 } 4287 } 4288 coeff_index ++; 4289 } 4290 { 4291 double * block_data_pos = data_pos; 4292 double pred; 4293 int type_; 4294 size_t index = 0; 4295 size_t unpredictable_count = 0; 4296 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4297 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4298 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4299 type_ = type[index]; 4300 if (type_ != 0){ 4301 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4302 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4303 } 4304 else{ 4305 *block_data_pos = unpred_data[unpredictable_count ++]; 4306 } 4307 index ++; 4308 block_data_pos ++; 4309 } 4310 block_data_pos += dim1_offset - current_blockcount_z; 4311 } 4312 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4313 } 4314 cur_unpred_count = unpredictable_count; 4315 } 4316 } 4317 indicator_pos ++; 4318 type += current_block_elements; 4319 unpred_data += cur_unpred_count; 4320 } // end k == 0 4321 for(size_t k=1; k<num_z; k++){ 4322 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4323 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4324 data_pos = *data + offset_x * dim0_offset + offset_z; 4325 4326 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4327 current_blockcount_y = early_blockcount_y; 4328 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4329 4330 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4331 if(*indicator_pos){ 4332 // decompress by SZ 4333 double * block_data_pos = data_pos; 4334 double pred; 4335 size_t index = 0; 4336 int type_; 4337 size_t unpredictable_count = 0; 4338 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4339 // jj == 0 4340 { 4341 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4342 type_ = type[index]; 4343 if(type_ == 0){ 4344 *block_data_pos = unpred_data[unpredictable_count ++]; 4345 } 4346 else{ 4347 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 4348 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4349 } 4350 index ++; 4351 block_data_pos ++; 4352 } 4353 block_data_pos += dim1_offset - current_blockcount_z; 4354 } 4355 for(size_t jj=1; jj<current_blockcount_y; jj++){ 4356 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4357 type_ = type[index]; 4358 if(type_ == 0){ 4359 *block_data_pos = unpred_data[unpredictable_count ++]; 4360 } 4361 else{ 4362 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4363 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4364 } 4365 index ++; 4366 block_data_pos ++; 4367 } 4368 block_data_pos += dim1_offset - current_blockcount_z; 4369 } 4370 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4371 } 4372 cur_unpred_count = unpredictable_count; 4373 } 4374 else{ 4375 // decompress by regression 4376 { 4377 //restore regression coefficients 4378 double pred; 4379 int type_; 4380 for(int e=0; e<4; e++){ 4381 type_ = coeff_type[e][coeff_index]; 4382 if (type_ != 0){ 4383 pred = last_coefficients[e]; 4384 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4385 } 4386 else{ 4387 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4388 coeff_unpred_data_count[e] ++; 4389 } 4390 } 4391 coeff_index ++; 4392 } 4393 { 4394 double * block_data_pos = data_pos; 4395 double pred; 4396 int type_; 4397 size_t index = 0; 4398 size_t unpredictable_count = 0; 4399 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4400 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4401 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4402 type_ = type[index]; 4403 if (type_ != 0){ 4404 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4405 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4406 } 4407 else{ 4408 *block_data_pos = unpred_data[unpredictable_count ++]; 4409 } 4410 index ++; 4411 block_data_pos ++; 4412 } 4413 block_data_pos += dim1_offset - current_blockcount_z; 4414 } 4415 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4416 } 4417 cur_unpred_count = unpredictable_count; 4418 } 4419 } 4420 indicator_pos ++; 4421 type += current_block_elements; 4422 unpred_data += cur_unpred_count; 4423 } 4424 }// end j = 0 4425 for(size_t j=1; j<num_y; j++){ 4426 // k == 0 4427 { 4428 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4429 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4430 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; 4431 4432 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4433 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4434 current_blockcount_z = early_blockcount_z; 4435 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4436 if(*indicator_pos){ 4437 // decompress by SZ 4438 double * block_data_pos = data_pos; 4439 double pred; 4440 size_t index = 0; 4441 int type_; 4442 size_t unpredictable_count = 0; 4443 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4444 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4445 { 4446 // kk == 0 4447 type_ = type[index]; 4448 if(type_ == 0){ 4449 *block_data_pos = unpred_data[unpredictable_count ++]; 4450 } 4451 else{ 4452 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 4453 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4454 } 4455 index ++; 4456 block_data_pos ++; 4457 } 4458 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4459 type_ = type[index]; 4460 if(type_ == 0){ 4461 *block_data_pos = unpred_data[unpredictable_count ++]; 4462 } 4463 else{ 4464 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4465 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4466 } 4467 index ++; 4468 block_data_pos ++; 4469 } 4470 block_data_pos += dim1_offset - current_blockcount_z; 4471 } 4472 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4473 } 4474 cur_unpred_count = unpredictable_count; 4475 } 4476 else{ 4477 // decompress by regression 4478 { 4479 //restore regression coefficients 4480 double pred; 4481 int type_; 4482 for(int e=0; e<4; e++){ 4483 type_ = coeff_type[e][coeff_index]; 4484 if (type_ != 0){ 4485 pred = last_coefficients[e]; 4486 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4487 } 4488 else{ 4489 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4490 coeff_unpred_data_count[e] ++; 4491 } 4492 } 4493 coeff_index ++; 4494 } 4495 { 4496 double * block_data_pos = data_pos; 4497 double pred; 4498 int type_; 4499 size_t index = 0; 4500 size_t unpredictable_count = 0; 4501 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4502 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4503 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4504 type_ = type[index]; 4505 if (type_ != 0){ 4506 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4507 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4508 } 4509 else{ 4510 *block_data_pos = unpred_data[unpredictable_count ++]; 4511 } 4512 index ++; 4513 block_data_pos ++; 4514 } 4515 block_data_pos += dim1_offset - current_blockcount_z; 4516 } 4517 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4518 } 4519 cur_unpred_count = unpredictable_count; 4520 } 4521 } 4522 indicator_pos ++; 4523 type += current_block_elements; 4524 unpred_data += cur_unpred_count; 4525 } // end k == 0 4526 for(size_t k=1; k<num_z; k++){ 4527 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4528 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4529 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4530 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 4531 4532 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4533 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4534 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4535 4536 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4537 if(*indicator_pos){ 4538 // decompress by SZ 4539 double * block_data_pos = data_pos; 4540 double pred; 4541 size_t index = 0; 4542 int type_; 4543 size_t unpredictable_count = 0; 4544 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4545 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4546 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4547 type_ = type[index]; 4548 if(type_ == 0){ 4549 *block_data_pos = unpred_data[unpredictable_count ++]; 4550 } 4551 else{ 4552 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4553 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4554 } 4555 index ++; 4556 block_data_pos ++; 4557 } 4558 block_data_pos += dim1_offset - current_blockcount_z; 4559 } 4560 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4561 } 4562 cur_unpred_count = unpredictable_count; 4563 } 4564 else{ 4565 // decompress by regression 4566 { 4567 //restore regression coefficients 4568 double pred; 4569 int type_; 4570 for(int e=0; e<4; e++){ 4571 type_ = coeff_type[e][coeff_index]; 4572 if (type_ != 0){ 4573 pred = last_coefficients[e]; 4574 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4575 } 4576 else{ 4577 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4578 coeff_unpred_data_count[e] ++; 4579 } 4580 } 4581 coeff_index ++; 4582 } 4583 { 4584 double * block_data_pos = data_pos; 4585 double pred; 4586 int type_; 4587 size_t index = 0; 4588 size_t unpredictable_count = 0; 4589 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4590 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4591 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4592 type_ = type[index]; 4593 if (type_ != 0){ 4594 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4595 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4596 } 4597 else{ 4598 *block_data_pos = unpred_data[unpredictable_count ++]; 4599 } 4600 index ++; 4601 block_data_pos ++; 4602 } 4603 block_data_pos += dim1_offset - current_blockcount_z; 4604 } 4605 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4606 } 4607 cur_unpred_count = unpredictable_count; 4608 } 4609 } 4610 indicator_pos ++; 4611 type += current_block_elements; 4612 unpred_data += cur_unpred_count; 4613 } 4614 } 4615 } 4616 } 4617 4618 free(coeff_result_type); 4619 4620 free(indicator); 4621 free(result_type); 4622 } -
TabularUnified thirdparty/SZ/sz/src/szd_double_pwr.c ¶
r2c47b73 r9ee2ce3 14 14 #include "sz.h" 15 15 #include "Huffman.h" 16 #include "utility.h" 16 17 //#include "rw.h" 17 18 … … 1348 1349 free(groupID); 1349 1350 } 1351 1352 void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps) { 1353 1354 decompressDataSeries_double_1D(data, dataSeriesLength, tdps); 1355 double threshold = tdps->minLogValue; 1356 if(tdps->pwrErrBoundBytes_size > 0){ 1357 unsigned char * signs; 1358 sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); 1359 1360 for(size_t i=0; i<dataSeriesLength; i++){ 1361 if((*data)[i] < threshold) (*data)[i] = 0; 1362 else (*data)[i] = exp2((*data)[i]); 1363 if(signs[i]) (*data)[i] = -((*data)[i]); 1364 } 1365 free(signs); 1366 } 1367 else{ 1368 for(size_t i=0; i<dataSeriesLength; i++){ 1369 if((*data)[i] < threshold) (*data)[i] = 0; 1370 else (*data)[i] = exp2((*data)[i]); 1371 } 1372 } 1373 1374 } 1375 1376 void decompressDataSeries_double_2D_pwr_pre_log(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps) { 1377 1378 size_t dataSeriesLength = r1 * r2; 1379 decompressDataSeries_double_2D(data, r1, r2, tdps); 1380 double threshold = tdps->minLogValue; 1381 if(tdps->pwrErrBoundBytes_size > 0){ 1382 unsigned char * signs; 1383 sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); 1384 1385 for(size_t i=0; i<dataSeriesLength; i++){ 1386 if((*data)[i] < threshold) (*data)[i] = 0; 1387 else (*data)[i] = exp2((*data)[i]); 1388 if(signs[i]) (*data)[i] = -((*data)[i]); 1389 } 1390 free(signs); 1391 } 1392 else{ 1393 for(size_t i=0; i<dataSeriesLength; i++){ 1394 if((*data)[i] < threshold) (*data)[i] = 0; 1395 else (*data)[i] = exp2((*data)[i]); 1396 } 1397 } 1398 } 1399 1400 void decompressDataSeries_double_3D_pwr_pre_log(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps) { 1401 1402 size_t dataSeriesLength = r1 * r2 * r3; 1403 decompressDataSeries_double_3D(data, r1, r2, r3, tdps); 1404 double threshold = tdps->minLogValue; 1405 if(tdps->pwrErrBoundBytes_size > 0){ 1406 unsigned char * signs; 1407 sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); 1408 1409 for(size_t i=0; i<dataSeriesLength; i++){ 1410 if((*data)[i] < threshold) (*data)[i] = 0; 1411 else (*data)[i] = exp2((*data)[i]); 1412 if(signs[i]) (*data)[i] = -((*data)[i]); 1413 } 1414 free(signs); 1415 } 1416 else{ 1417 for(size_t i=0; i<dataSeriesLength; i++){ 1418 if((*data)[i] < threshold) (*data)[i] = 0; 1419 else (*data)[i] = exp2((*data)[i]); 1420 } 1421 } 1422 } 1423 1350 1424 #pragma GCC diagnostic pop -
TabularUnified thirdparty/SZ/sz/src/szd_float.c ¶
r2c47b73 r9ee2ce3 1 1 /** 2 2 * @file szd_float.c 3 * @author Sheng Di and Dingwen Tao4 * @date Aug, 201 63 * @author Sheng Di, Dingwen Tao, Xin Liang 4 * @date Aug, 2018 5 5 * @brief 6 6 * (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory. … … 17 17 #include "szd_float_pwr.h" 18 18 #include "szd_float_ts.h" 19 #include "utility.h" 19 20 20 21 /** … … 36 37 if(cmpSize!=8+4+MetaDataByteLength && cmpSize!=8+8+MetaDataByteLength) //4,8 means two posibilities of SZ_SIZE_TYPE 37 38 { 38 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);39 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 40 if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION) 40 41 { 41 if( isZlib)42 if(confparams_dec->losslessCompressor!=-1) 42 43 confparams_dec->szMode = SZ_BEST_COMPRESSION; 43 44 else … … 54 55 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 55 56 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 56 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize57 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 57 58 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 58 59 //memcpy(szTmpBytes, tmpBytes, tmpSize); … … 89 90 } 90 91 } 91 else if (dim == 1) 92 getSnapshotData_float_1D(newData,r1,tdps, errBoundMode); 93 else 94 if (dim == 2) 95 getSnapshotData_float_2D(newData,r2,r1,tdps, errBoundMode); 96 else 97 if (dim == 3) 98 getSnapshotData_float_3D(newData,r3,r2,r1,tdps, errBoundMode); 99 else 100 if (dim == 4) 101 getSnapshotData_float_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); 102 else 92 else 103 93 { 104 printf("Error: currently support only at most 4 dimensions!\n"); 105 status = SZ_DERR; 94 if(tdps->raBytes_size > 0) //v2.0 95 { 96 if (dim == 1) 97 getSnapshotData_float_1D(newData,r1,tdps, errBoundMode); 98 else if(dim == 2) 99 decompressDataSeries_float_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes); 100 else if(dim == 3) 101 decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes); 102 else if(dim == 4) 103 decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes); 104 else 105 { 106 printf("Error: currently support only at most 4 dimensions!\n"); 107 status = SZ_DERR; 108 } 109 } 110 else //1.4.13 111 { 112 if (dim == 1) 113 getSnapshotData_float_1D(newData,r1,tdps, errBoundMode); 114 else if (dim == 2) 115 getSnapshotData_float_2D(newData,r2,r1,tdps, errBoundMode); 116 else if (dim == 3) 117 getSnapshotData_float_3D(newData,r3,r2,r1,tdps, errBoundMode); 118 else if (dim == 4) 119 getSnapshotData_float_4D(newData,r4,r3,r2,r1,tdps, errBoundMode); 120 else 121 { 122 printf("Error: currently support only at most 4 dimensions!\n"); 123 status = SZ_DERR; 124 } 125 } 106 126 } 107 127 free_TightDataPointStorageF2(tdps); … … 1652 1672 else 1653 1673 { 1654 //decompressDataSeries_float_1D_pwr(data, dataSeriesLength, tdps);1655 decompressDataSeries_float_1D_pwrgroup(data, dataSeriesLength, tdps);1674 decompressDataSeries_float_1D_pwr_pre_log(data, dataSeriesLength, tdps); 1675 //decompressDataSeries_float_1D_pwrgroup(data, dataSeriesLength, tdps); 1656 1676 } 1657 1677 return; … … 1676 1696 decompressDataSeries_float_1D(&decmpData, dataSeriesLength, tdps); 1677 1697 else 1678 decompressDataSeries_float_1D_pwr(&decmpData, dataSeriesLength, tdps); 1698 //decompressDataSeries_float_1D_pwr(&decmpData, dataSeriesLength, tdps); 1699 decompressDataSeries_float_1D_pwr_pre_log(&decmpData, dataSeriesLength, tdps); 1679 1700 // insert the decompressed data 1680 1701 size_t k = 0; … … 1717 1738 else 1718 1739 { 1719 decompressDataSeries_float_2D_pwr(data, r1, r2, tdps); 1740 //decompressDataSeries_float_2D_pwr(data, r1, r2, tdps); 1741 decompressDataSeries_float_2D_pwr_pre_log(data, r1, r2, tdps); 1720 1742 } 1721 1743 … … 1741 1763 decompressDataSeries_float_2D(&decmpData, r1, r2, tdps); 1742 1764 else 1743 decompressDataSeries_float_2D_pwr(&decmpData, r1, r2, tdps); 1765 //decompressDataSeries_float_2D_pwr(&decmpData, r1, r2, tdps); 1766 decompressDataSeries_float_2D_pwr_pre_log(&decmpData, r1, r2, tdps); 1744 1767 // insert the decompressed data 1745 1768 size_t k = 0; … … 1774 1797 decompressDataSeries_float_3D(data, r1, r2, r3, tdps); 1775 1798 else 1776 decompressDataSeries_float_1D_ts(data, r1*r2*r3, multisteps, tdps);1799 decompressDataSeries_float_1D_ts(data, dataSeriesLength, multisteps, tdps); 1777 1800 } 1778 1801 else … … 1782 1805 else 1783 1806 { 1784 decompressDataSeries_float_3D_pwr(data, r1, r2, r3, tdps); 1807 //decompressDataSeries_float_3D_pwr(data, r1, r2, r3, tdps); 1808 decompressDataSeries_float_3D_pwr_pre_log(data, r1, r2, r3, tdps); 1785 1809 } 1786 1810 … … 1806 1830 decompressDataSeries_float_3D(&decmpData, r1, r2, r3, tdps); 1807 1831 else 1808 decompressDataSeries_float_3D_pwr(&decmpData, r1, r2, r3, tdps); 1832 //decompressDataSeries_float_3D_pwr(&decmpData, r1, r2, r3, tdps); 1833 decompressDataSeries_float_3D_pwr_pre_log(&decmpData, r1, r2, r3, tdps); 1809 1834 // insert the decompressed data 1810 1835 size_t k = 0; … … 1847 1872 else 1848 1873 { 1849 decompressDataSeries_float_3D_pwr(data, r1*r2, r3, r4, tdps); 1874 //decompressDataSeries_float_3D_pwr(data, r1*r2, r3, r4, tdps); 1875 decompressDataSeries_float_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps); 1850 1876 //ToDO 1851 1877 //decompressDataSeries_float_4D_pwr(data, r1, r2, r3, r4, tdps); … … 1869 1895 decompressDataSeries_float_4D(&decmpData, r1, r2, r3, r4, tdps); 1870 1896 else 1871 decompressDataSeries_float_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); 1897 //decompressDataSeries_float_3D_pwr(&decmpData, r1*r2, r3, r4, tdps); 1898 decompressDataSeries_float_3D_pwr_pre_log(&decmpData, r1*r2, r3, r4, tdps); 1872 1899 //ToDO 1873 1900 //decompressDataSeries_float_4D_pwr(&decompData, r1, r2, r3, r4, tdps); … … 2178 2205 } 2179 2206 2207 void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data){ 2208 2209 size_t dim0_offset = r2; 2210 size_t num_elements = r1 * r2; 2211 2212 *data = (float*)malloc(sizeof(float)*num_elements); 2213 2214 unsigned char * comp_data_pos = comp_data; 2215 2216 size_t block_size = bytesToInt_bigEndian(comp_data_pos); 2217 comp_data_pos += sizeof(int); 2218 // calculate block dims 2219 size_t num_x, num_y; 2220 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 2221 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 2222 2223 size_t split_index_x, split_index_y; 2224 size_t early_blockcount_x, early_blockcount_y; 2225 size_t late_blockcount_x, late_blockcount_y; 2226 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 2227 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 2228 2229 size_t num_blocks = num_x * num_y; 2230 2231 double realPrecision = bytesToDouble(comp_data_pos); 2232 comp_data_pos += sizeof(double); 2233 unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); 2234 comp_data_pos += sizeof(int); 2235 2236 updateQuantizationInfo(intervals); 2237 2238 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 2239 comp_data_pos += sizeof(int); 2240 2241 int stateNum = 2*intervals; 2242 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 2243 2244 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 2245 2246 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); 2247 comp_data_pos += sizeof(int) + tree_size; 2248 2249 float mean; 2250 unsigned char use_mean; 2251 memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); 2252 comp_data_pos += sizeof(unsigned char); 2253 memcpy(&mean, comp_data_pos, sizeof(float)); 2254 comp_data_pos += sizeof(float); 2255 size_t reg_count = 0; 2256 2257 unsigned char * indicator; 2258 size_t indicator_bitlength = (num_blocks - 1)/8 + 1; 2259 convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); 2260 comp_data_pos += indicator_bitlength; 2261 for(size_t i=0; i<num_blocks; i++){ 2262 if(!indicator[i]) reg_count ++; 2263 } 2264 //printf("reg_count: %ld\n", reg_count); 2265 2266 int coeff_intvRadius[3]; 2267 int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); 2268 int * coeff_type[3]; 2269 double precision[3]; 2270 float * coeff_unpred_data[3]; 2271 if(reg_count > 0){ 2272 for(int i=0; i<3; i++){ 2273 precision[i] = bytesToDouble(comp_data_pos); 2274 comp_data_pos += sizeof(double); 2275 coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); 2276 comp_data_pos += sizeof(int); 2277 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 2278 comp_data_pos += sizeof(int); 2279 int stateNum = 2*coeff_intvRadius[i]*2; 2280 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 2281 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 2282 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); 2283 comp_data_pos += sizeof(int) + tree_size; 2284 2285 coeff_type[i] = coeff_result_type + i * num_blocks; 2286 size_t typeArray_size = bytesToSize(comp_data_pos); 2287 decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); 2288 comp_data_pos += sizeof(size_t) + typeArray_size; 2289 int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); 2290 comp_data_pos += sizeof(int); 2291 coeff_unpred_data[i] = (float *) comp_data_pos; 2292 comp_data_pos += coeff_unpred_count * sizeof(float); 2293 SZ_ReleaseHuffman(huffmanTree); 2294 } 2295 } 2296 float last_coefficients[3] = {0.0}; 2297 int coeff_unpred_data_count[3] = {0}; 2298 int coeff_index = 0; 2299 updateQuantizationInfo(intervals); 2300 2301 size_t total_unpred; 2302 memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); 2303 comp_data_pos += sizeof(size_t); 2304 float * unpred_data = (float *) comp_data_pos; 2305 comp_data_pos += total_unpred * sizeof(float); 2306 2307 int * result_type = (int *) malloc(num_elements * sizeof(int)); 2308 decode(comp_data_pos, num_elements, root, result_type); 2309 SZ_ReleaseHuffman(huffmanTree); 2310 2311 int intvRadius = exe_params->intvRadius; 2312 2313 int * type; 2314 2315 float * data_pos = *data; 2316 size_t offset_x, offset_y; 2317 size_t current_blockcount_x, current_blockcount_y; 2318 size_t cur_unpred_count; 2319 2320 unsigned char * indicator_pos = indicator; 2321 if(use_mean){ 2322 type = result_type; 2323 for(size_t i=0; i<num_x; i++){ 2324 for(size_t j=0; j<num_y; j++){ 2325 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 2326 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 2327 data_pos = *data + offset_x * dim0_offset + offset_y; 2328 2329 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 2330 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 2331 2332 size_t current_block_elements = current_blockcount_x * current_blockcount_y; 2333 if(*indicator_pos){ 2334 // decompress by SZ 2335 2336 float * block_data_pos = data_pos; 2337 float pred; 2338 size_t index = 0; 2339 int type_; 2340 // d11 is current data 2341 size_t unpredictable_count = 0; 2342 float d00, d01, d10; 2343 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2344 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2345 type_ = type[index]; 2346 if(type_ == intvRadius){ 2347 *block_data_pos = mean; 2348 } 2349 else if(type_ == 0){ 2350 *block_data_pos = unpred_data[unpredictable_count ++]; 2351 } 2352 else{ 2353 d00 = d01 = d10 = 1; 2354 if(i == 0 && ii == 0){ 2355 d00 = d01 = 0; 2356 } 2357 if(j == 0 && jj == 0){ 2358 d00 = d10 = 0; 2359 } 2360 if(d00){ 2361 d00 = block_data_pos[- dim0_offset - 1]; 2362 } 2363 if(d01){ 2364 d01 = block_data_pos[- dim0_offset]; 2365 } 2366 if(d10){ 2367 d10 = block_data_pos[- 1]; 2368 } 2369 if(type_ < intvRadius) type_ += 1; 2370 pred = d10 + d01 - d00; 2371 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2372 } 2373 index ++; 2374 block_data_pos ++; 2375 } 2376 block_data_pos += dim0_offset - current_blockcount_y; 2377 } 2378 cur_unpred_count = unpredictable_count; 2379 } 2380 else{ 2381 // decompress by regression 2382 { 2383 //restore regression coefficients 2384 float pred; 2385 int type_; 2386 for(int e=0; e<3; e++){ 2387 type_ = coeff_type[e][coeff_index]; 2388 if (type_ != 0){ 2389 pred = last_coefficients[e]; 2390 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2391 } 2392 else{ 2393 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2394 coeff_unpred_data_count[e] ++; 2395 } 2396 } 2397 coeff_index ++; 2398 } 2399 { 2400 float * block_data_pos = data_pos; 2401 float pred; 2402 int type_; 2403 size_t index = 0; 2404 size_t unpredictable_count = 0; 2405 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2406 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2407 type_ = type[index]; 2408 if (type_ != 0){ 2409 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; 2410 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2411 } 2412 else{ 2413 *block_data_pos = unpred_data[unpredictable_count ++]; 2414 } 2415 2416 index ++; 2417 block_data_pos ++; 2418 } 2419 block_data_pos += dim0_offset - current_blockcount_y; 2420 } 2421 cur_unpred_count = unpredictable_count; 2422 } 2423 } 2424 2425 type += current_block_elements; 2426 indicator_pos ++; 2427 unpred_data += cur_unpred_count; 2428 } 2429 } 2430 } 2431 else{ 2432 type = result_type; 2433 for(size_t i=0; i<num_x; i++){ 2434 for(size_t j=0; j<num_y; j++){ 2435 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 2436 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 2437 data_pos = *data + offset_x * dim0_offset + offset_y; 2438 2439 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 2440 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 2441 2442 size_t current_block_elements = current_blockcount_x * current_blockcount_y; 2443 if(*indicator_pos){ 2444 // decompress by SZ 2445 2446 float * block_data_pos = data_pos; 2447 float pred; 2448 size_t index = 0; 2449 int type_; 2450 // d11 is current data 2451 size_t unpredictable_count = 0; 2452 float d00, d01, d10; 2453 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2454 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2455 type_ = type[index]; 2456 if(type_ == 0){ 2457 *block_data_pos = unpred_data[unpredictable_count ++]; 2458 } 2459 else{ 2460 d00 = d01 = d10 = 1; 2461 if(i == 0 && ii == 0){ 2462 d00 = d01 = 0; 2463 } 2464 if(j == 0 && jj == 0){ 2465 d00 = d10 = 0; 2466 } 2467 if(d00){ 2468 d00 = block_data_pos[- dim0_offset - 1]; 2469 } 2470 if(d01){ 2471 d01 = block_data_pos[- dim0_offset]; 2472 } 2473 if(d10){ 2474 d10 = block_data_pos[- 1]; 2475 } 2476 pred = d10 + d01 - d00; 2477 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2478 } 2479 index ++; 2480 block_data_pos ++; 2481 } 2482 block_data_pos += dim0_offset - current_blockcount_y; 2483 } 2484 cur_unpred_count = unpredictable_count; 2485 } 2486 else{ 2487 // decompress by regression 2488 { 2489 //restore regression coefficients 2490 float pred; 2491 int type_; 2492 for(int e=0; e<3; e++){ 2493 type_ = coeff_type[e][coeff_index]; 2494 if (type_ != 0){ 2495 pred = last_coefficients[e]; 2496 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2497 } 2498 else{ 2499 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2500 coeff_unpred_data_count[e] ++; 2501 } 2502 } 2503 coeff_index ++; 2504 } 2505 { 2506 float * block_data_pos = data_pos; 2507 float pred; 2508 int type_; 2509 size_t index = 0; 2510 size_t unpredictable_count = 0; 2511 for(size_t ii=0; ii<current_blockcount_x; ii++){ 2512 for(size_t jj=0; jj<current_blockcount_y; jj++){ 2513 type_ = type[index]; 2514 if (type_ != 0){ 2515 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2]; 2516 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2517 } 2518 else{ 2519 *block_data_pos = unpred_data[unpredictable_count ++]; 2520 } 2521 index ++; 2522 block_data_pos ++; 2523 } 2524 block_data_pos += dim0_offset - current_blockcount_y; 2525 } 2526 cur_unpred_count = unpredictable_count; 2527 } 2528 } 2529 2530 type += current_block_elements; 2531 indicator_pos ++; 2532 unpred_data += cur_unpred_count; 2533 } 2534 } 2535 } 2536 free(coeff_result_type); 2537 2538 free(indicator); 2539 free(result_type); 2540 } 2541 2542 2543 void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ 2544 2545 size_t dim0_offset = r2 * r3; 2546 size_t dim1_offset = r3; 2547 size_t num_elements = r1 * r2 * r3; 2548 2549 *data = (float*)malloc(sizeof(float)*num_elements); 2550 2551 unsigned char * comp_data_pos = comp_data; 2552 2553 size_t block_size = bytesToInt_bigEndian(comp_data_pos); 2554 comp_data_pos += sizeof(int); 2555 // calculate block dims 2556 size_t num_x, num_y, num_z; 2557 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size); 2558 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size); 2559 SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size); 2560 2561 size_t split_index_x, split_index_y, split_index_z; 2562 size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; 2563 size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; 2564 SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); 2565 SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); 2566 SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); 2567 2568 size_t num_blocks = num_x * num_y * num_z; 2569 2570 double realPrecision = bytesToDouble(comp_data_pos); 2571 comp_data_pos += sizeof(double); 2572 unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); 2573 comp_data_pos += sizeof(int); 2574 2575 updateQuantizationInfo(intervals); 2576 2577 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 2578 comp_data_pos += sizeof(int); 2579 2580 int stateNum = 2*intervals; 2581 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 2582 2583 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 2584 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); 2585 comp_data_pos += sizeof(int) + tree_size; 2586 2587 float mean; 2588 unsigned char use_mean; 2589 memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); 2590 comp_data_pos += sizeof(unsigned char); 2591 memcpy(&mean, comp_data_pos, sizeof(float)); 2592 comp_data_pos += sizeof(float); 2593 size_t reg_count = 0; 2594 2595 unsigned char * indicator; 2596 size_t indicator_bitlength = (num_blocks - 1)/8 + 1; 2597 convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); 2598 comp_data_pos += indicator_bitlength; 2599 for(size_t i=0; i<num_blocks; i++){ 2600 if(!indicator[i]) reg_count ++; 2601 } 2602 2603 int coeff_intvRadius[4]; 2604 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 2605 int * coeff_type[4]; 2606 double precision[4]; 2607 float * coeff_unpred_data[4]; 2608 if(reg_count > 0){ 2609 for(int i=0; i<4; i++){ 2610 precision[i] = bytesToDouble(comp_data_pos); 2611 comp_data_pos += sizeof(double); 2612 coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); 2613 comp_data_pos += sizeof(int); 2614 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 2615 comp_data_pos += sizeof(int); 2616 int stateNum = 2*coeff_intvRadius[i]*2; 2617 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 2618 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 2619 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); 2620 comp_data_pos += sizeof(int) + tree_size; 2621 2622 coeff_type[i] = coeff_result_type + i * num_blocks; 2623 size_t typeArray_size = bytesToSize(comp_data_pos); 2624 decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); 2625 comp_data_pos += sizeof(size_t) + typeArray_size; 2626 int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); 2627 comp_data_pos += sizeof(int); 2628 coeff_unpred_data[i] = (float *) comp_data_pos; 2629 comp_data_pos += coeff_unpred_count * sizeof(float); 2630 SZ_ReleaseHuffman(huffmanTree); 2631 } 2632 } 2633 float last_coefficients[4] = {0.0}; 2634 int coeff_unpred_data_count[4] = {0}; 2635 int coeff_index = 0; 2636 updateQuantizationInfo(intervals); 2637 2638 size_t total_unpred; 2639 memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); 2640 comp_data_pos += sizeof(size_t); 2641 float * unpred_data = (float *) comp_data_pos; 2642 comp_data_pos += total_unpred * sizeof(float); 2643 2644 int * result_type = (int *) malloc(num_elements * sizeof(int)); 2645 decode(comp_data_pos, num_elements, root, result_type); 2646 SZ_ReleaseHuffman(huffmanTree); 2647 2648 int intvRadius = exe_params->intvRadius; 2649 2650 int * type; 2651 float * data_pos = *data; 2652 size_t offset_x, offset_y, offset_z; 2653 size_t current_blockcount_x, current_blockcount_y, current_blockcount_z; 2654 size_t cur_unpred_count; 2655 unsigned char * indicator_pos = indicator; 2656 if(use_mean){ 2657 // type = result_type; 2658 2659 // for(size_t i=0; i<num_x; i++){ 2660 // for(size_t j=0; j<num_y; j++){ 2661 // for(size_t k=0; k<num_z; k++){ 2662 // offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 2663 // offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 2664 // offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 2665 // data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 2666 2667 // current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 2668 // current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 2669 // current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 2670 2671 // // type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset + offset_z * current_blockcount_x * current_blockcount_y; 2672 // // type = result_type + type_offset; 2673 // size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 2674 // // index = i * num_y * num_z + j * num_z + k; 2675 2676 // // printf("i j k: %ld %ld %ld\toffset: %ld %ld %ld\tindicator: %ld\n", i, j, k, offset_x, offset_y, offset_z, indicator[index]); 2677 // if(*indicator_pos){ 2678 // // decompress by SZ 2679 // // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); 2680 // float * block_data_pos = data_pos; 2681 // float pred; 2682 // size_t index = 0; 2683 // int type_; 2684 // // d111 is current data 2685 // size_t unpredictable_count = 0; 2686 // float d000, d001, d010, d011, d100, d101, d110; 2687 // for(size_t ii=0; ii<current_blockcount_x; ii++){ 2688 // for(size_t jj=0; jj<current_blockcount_y; jj++){ 2689 // for(size_t kk=0; kk<current_blockcount_z; kk++){ 2690 // type_ = type[index]; 2691 // if(type_ == intvRadius){ 2692 // *block_data_pos = mean; 2693 // } 2694 // else if(type_ == 0){ 2695 // *block_data_pos = unpred_data[unpredictable_count ++]; 2696 // } 2697 // else{ 2698 // d000 = d001 = d010 = d011 = d100 = d101 = d110 = 1; 2699 // if(i == 0 && ii == 0){ 2700 // d000 = d001 = d010 = d011 = 0; 2701 // } 2702 // if(j == 0 && jj == 0){ 2703 // d000 = d001 = d100 = d101 = 0; 2704 // } 2705 // if(k == 0 && kk == 0){ 2706 // d000 = d010 = d100 = d110 = 0; 2707 // } 2708 // if(d000){ 2709 // d000 = block_data_pos[- dim0_offset - dim1_offset - 1]; 2710 // } 2711 // if(d001){ 2712 // d001 = block_data_pos[- dim0_offset - dim1_offset]; 2713 // } 2714 // if(d010){ 2715 // d010 = block_data_pos[- dim0_offset - 1]; 2716 // } 2717 // if(d011){ 2718 // d011 = block_data_pos[- dim0_offset]; 2719 // } 2720 // if(d100){ 2721 // d100 = block_data_pos[- dim1_offset - 1]; 2722 // } 2723 // if(d101){ 2724 // d101 = block_data_pos[- dim1_offset]; 2725 // } 2726 // if(d110){ 2727 // d110 = block_data_pos[- 1]; 2728 // } 2729 // if(type_ < intvRadius) type_ += 1; 2730 // pred = d110 + d101 + d011 - d100 - d010 - d001 + d000; 2731 // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2732 // } 2733 // index ++; 2734 // block_data_pos ++; 2735 // } 2736 // block_data_pos += dim1_offset - current_blockcount_z; 2737 // } 2738 // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2739 // } 2740 // cur_unpred_count = unpredictable_count; 2741 // } 2742 // else{ 2743 // // decompress by regression 2744 // { 2745 // //restore regression coefficients 2746 // float pred; 2747 // int type_; 2748 // for(int e=0; e<4; e++){ 2749 // // if(i == 0 && j == 0 && k == 19){ 2750 // // printf("~\n"); 2751 // // } 2752 // type_ = coeff_type[e][coeff_index]; 2753 // if (type_ != 0){ 2754 // pred = last_coefficients[e]; 2755 // last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2756 // } 2757 // else{ 2758 // last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2759 // coeff_unpred_data_count[e] ++; 2760 // } 2761 // if(fabs(last_coefficients[e]) > 10000){ 2762 // printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]); 2763 // exit(0); 2764 // } 2765 // } 2766 // coeff_index ++; 2767 // } 2768 // { 2769 // float * block_data_pos = data_pos; 2770 // float pred; 2771 // int type_; 2772 // size_t index = 0; 2773 // size_t unpredictable_count = 0; 2774 // for(size_t ii=0; ii<current_blockcount_x; ii++){ 2775 // for(size_t jj=0; jj<current_blockcount_y; jj++){ 2776 // for(size_t kk=0; kk<current_blockcount_z; kk++){ 2777 // if(block_data_pos - (*data) == 19470788){ 2778 // printf("dec stop\n"); 2779 // } 2780 2781 // type_ = type[index]; 2782 // if (type_ != 0){ 2783 // pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 2784 // *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2785 // } 2786 // else{ 2787 // *block_data_pos = unpred_data[unpredictable_count ++]; 2788 // } 2789 // index ++; 2790 // block_data_pos ++; 2791 // } 2792 // block_data_pos += dim1_offset - current_blockcount_z; 2793 // } 2794 // block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2795 // } 2796 // cur_unpred_count = unpredictable_count; 2797 // } 2798 // } 2799 2800 // type += current_block_elements; 2801 // indicator_pos ++; 2802 // unpred_data += cur_unpred_count; 2803 // // decomp_unpred += cur_unpred_count; 2804 // // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); 2805 // // fflush(stdout); 2806 // } 2807 // } 2808 // } 2809 2810 type = result_type; 2811 // i == 0 2812 { 2813 // j == 0 2814 { 2815 // k == 0 2816 { 2817 data_pos = *data; 2818 2819 current_blockcount_x = early_blockcount_x; 2820 current_blockcount_y = early_blockcount_y; 2821 current_blockcount_z = early_blockcount_z; 2822 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 2823 if(*indicator_pos){ 2824 // decompress by SZ 2825 float * block_data_pos = data_pos; 2826 float pred; 2827 size_t index = 0; 2828 int type_; 2829 size_t unpredictable_count = 0; 2830 // ii == 0 2831 { 2832 // jj == 0 2833 { 2834 { 2835 // kk == 0 2836 type_ = type[index]; 2837 if(type_ == intvRadius){ 2838 *block_data_pos = mean; 2839 } 2840 else if(type_ == 0){ 2841 *block_data_pos = unpred_data[unpredictable_count ++]; 2842 } 2843 else{ 2844 if(type_ < intvRadius) type_ += 1; 2845 pred = 0; 2846 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2847 } 2848 index ++; 2849 block_data_pos ++; 2850 } 2851 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2852 type_ = type[index]; 2853 if(type_ == intvRadius){ 2854 *block_data_pos = mean; 2855 } 2856 else if(type_ == 0){ 2857 *block_data_pos = unpred_data[unpredictable_count ++]; 2858 } 2859 else{ 2860 if(type_ < intvRadius) type_ += 1; 2861 pred = block_data_pos[- 1]; 2862 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2863 } 2864 index ++; 2865 block_data_pos ++; 2866 } 2867 block_data_pos += dim1_offset - current_blockcount_z; 2868 } 2869 for(size_t jj=1; jj<current_blockcount_y; jj++){ 2870 { 2871 // kk == 0 2872 type_ = type[index]; 2873 if(type_ == intvRadius){ 2874 *block_data_pos = mean; 2875 } 2876 else if(type_ == 0){ 2877 *block_data_pos = unpred_data[unpredictable_count ++]; 2878 } 2879 else{ 2880 if(type_ < intvRadius) type_ += 1; 2881 pred = block_data_pos[- dim1_offset]; 2882 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2883 } 2884 index ++; 2885 block_data_pos ++; 2886 } 2887 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2888 type_ = type[index]; 2889 if(type_ == intvRadius){ 2890 *block_data_pos = mean; 2891 } 2892 else if(type_ == 0){ 2893 *block_data_pos = unpred_data[unpredictable_count ++]; 2894 } 2895 else{ 2896 if(type_ < intvRadius) type_ += 1; 2897 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 2898 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2899 } 2900 index ++; 2901 block_data_pos ++; 2902 } 2903 block_data_pos += dim1_offset - current_blockcount_z; 2904 } 2905 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2906 } 2907 for(size_t ii=1; ii<current_blockcount_x; ii++){ 2908 // jj == 0 2909 { 2910 { 2911 // kk == 0 2912 type_ = type[index]; 2913 if(type_ == intvRadius){ 2914 *block_data_pos = mean; 2915 } 2916 else if(type_ == 0){ 2917 *block_data_pos = unpred_data[unpredictable_count ++]; 2918 } 2919 else{ 2920 if(type_ < intvRadius) type_ += 1; 2921 pred = block_data_pos[- dim0_offset]; 2922 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2923 } 2924 index ++; 2925 block_data_pos ++; 2926 } 2927 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2928 type_ = type[index]; 2929 if(type_ == intvRadius){ 2930 *block_data_pos = mean; 2931 } 2932 else if(type_ == 0){ 2933 *block_data_pos = unpred_data[unpredictable_count ++]; 2934 } 2935 else{ 2936 if(type_ < intvRadius) type_ += 1; 2937 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 2938 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2939 } 2940 index ++; 2941 block_data_pos ++; 2942 } 2943 block_data_pos += dim1_offset - current_blockcount_z; 2944 } 2945 for(size_t jj=1; jj<current_blockcount_y; jj++){ 2946 { 2947 // kk == 0 2948 type_ = type[index]; 2949 if(type_ == intvRadius){ 2950 *block_data_pos = mean; 2951 } 2952 else if(type_ == 0){ 2953 *block_data_pos = unpred_data[unpredictable_count ++]; 2954 } 2955 else{ 2956 if(type_ < intvRadius) type_ += 1; 2957 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 2958 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2959 } 2960 index ++; 2961 block_data_pos ++; 2962 } 2963 for(size_t kk=1; kk<current_blockcount_z; kk++){ 2964 type_ = type[index]; 2965 if(type_ == intvRadius){ 2966 *block_data_pos = mean; 2967 } 2968 else if(type_ == 0){ 2969 *block_data_pos = unpred_data[unpredictable_count ++]; 2970 } 2971 else{ 2972 if(type_ < intvRadius) type_ += 1; 2973 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 2974 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 2975 } 2976 index ++; 2977 block_data_pos ++; 2978 } 2979 block_data_pos += dim1_offset - current_blockcount_z; 2980 } 2981 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 2982 } 2983 cur_unpred_count = unpredictable_count; 2984 } 2985 else{ 2986 // decompress by regression 2987 { 2988 //restore regression coefficients 2989 float pred; 2990 int type_; 2991 for(int e=0; e<4; e++){ 2992 type_ = coeff_type[e][coeff_index]; 2993 if (type_ != 0){ 2994 pred = last_coefficients[e]; 2995 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 2996 } 2997 else{ 2998 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 2999 coeff_unpred_data_count[e] ++; 3000 } 3001 } 3002 coeff_index ++; 3003 } 3004 { 3005 float * block_data_pos = data_pos; 3006 float pred; 3007 int type_; 3008 size_t index = 0; 3009 size_t unpredictable_count = 0; 3010 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3011 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3012 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3013 type_ = type[index]; 3014 if (type_ != 0){ 3015 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3016 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3017 } 3018 else{ 3019 *block_data_pos = unpred_data[unpredictable_count ++]; 3020 } 3021 index ++; 3022 block_data_pos ++; 3023 } 3024 block_data_pos += dim1_offset - current_blockcount_z; 3025 } 3026 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3027 } 3028 cur_unpred_count = unpredictable_count; 3029 } 3030 } 3031 indicator_pos ++; 3032 type += current_block_elements; 3033 unpred_data += cur_unpred_count; 3034 } // end k == 0 3035 // i == 0 j == 0 k != 0 3036 for(size_t k=1; k<num_z; k++){ 3037 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3038 data_pos = *data + offset_z; 3039 3040 current_blockcount_x = early_blockcount_x; 3041 current_blockcount_y = early_blockcount_y; 3042 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3043 3044 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3045 if(*indicator_pos){ 3046 // decompress by SZ 3047 float * block_data_pos = data_pos; 3048 float pred; 3049 size_t index = 0; 3050 int type_; 3051 size_t unpredictable_count = 0; 3052 // ii == 0 3053 { 3054 // jj == 0 3055 { 3056 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3057 type_ = type[index]; 3058 if(type_ == intvRadius){ 3059 *block_data_pos = mean; 3060 } 3061 else if(type_ == 0){ 3062 *block_data_pos = unpred_data[unpredictable_count ++]; 3063 } 3064 else{ 3065 if(type_ < intvRadius) type_ += 1; 3066 pred = block_data_pos[- 1]; 3067 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3068 } 3069 index ++; 3070 block_data_pos ++; 3071 } 3072 block_data_pos += dim1_offset - current_blockcount_z; 3073 } 3074 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3075 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3076 type_ = type[index]; 3077 if(type_ == intvRadius){ 3078 *block_data_pos = mean; 3079 } 3080 else if(type_ == 0){ 3081 *block_data_pos = unpred_data[unpredictable_count ++]; 3082 } 3083 else{ 3084 if(type_ < intvRadius) type_ += 1; 3085 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3086 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3087 } 3088 index ++; 3089 block_data_pos ++; 3090 } 3091 block_data_pos += dim1_offset - current_blockcount_z; 3092 } 3093 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3094 } 3095 for(size_t ii=1; ii<current_blockcount_x; ii++){ 3096 // jj == 0 3097 { 3098 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3099 type_ = type[index]; 3100 if(type_ == intvRadius){ 3101 *block_data_pos = mean; 3102 } 3103 else if(type_ == 0){ 3104 *block_data_pos = unpred_data[unpredictable_count ++]; 3105 } 3106 else{ 3107 if(type_ < intvRadius) type_ += 1; 3108 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 3109 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3110 } 3111 index ++; 3112 block_data_pos ++; 3113 } 3114 block_data_pos += dim1_offset - current_blockcount_z; 3115 } 3116 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3117 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3118 type_ = type[index]; 3119 if(type_ == intvRadius){ 3120 *block_data_pos = mean; 3121 } 3122 else if(type_ == 0){ 3123 *block_data_pos = unpred_data[unpredictable_count ++]; 3124 } 3125 else{ 3126 if(type_ < intvRadius) type_ += 1; 3127 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3128 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3129 } 3130 index ++; 3131 block_data_pos ++; 3132 } 3133 block_data_pos += dim1_offset - current_blockcount_z; 3134 } 3135 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3136 } 3137 cur_unpred_count = unpredictable_count; 3138 } 3139 else{ 3140 // decompress by regression 3141 { 3142 //restore regression coefficients 3143 float pred; 3144 int type_; 3145 for(int e=0; e<4; e++){ 3146 type_ = coeff_type[e][coeff_index]; 3147 if (type_ != 0){ 3148 pred = last_coefficients[e]; 3149 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3150 } 3151 else{ 3152 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3153 coeff_unpred_data_count[e] ++; 3154 } 3155 } 3156 coeff_index ++; 3157 } 3158 { 3159 float * block_data_pos = data_pos; 3160 float pred; 3161 int type_; 3162 size_t index = 0; 3163 size_t unpredictable_count = 0; 3164 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3165 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3166 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3167 type_ = type[index]; 3168 if (type_ != 0){ 3169 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3170 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3171 } 3172 else{ 3173 *block_data_pos = unpred_data[unpredictable_count ++]; 3174 } 3175 index ++; 3176 block_data_pos ++; 3177 } 3178 block_data_pos += dim1_offset - current_blockcount_z; 3179 } 3180 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3181 } 3182 cur_unpred_count = unpredictable_count; 3183 } 3184 } 3185 indicator_pos ++; 3186 type += current_block_elements; 3187 unpred_data += cur_unpred_count; 3188 } 3189 }// end j==0 3190 for(size_t j=1; j<num_y; j++){ 3191 // k == 0 3192 { 3193 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3194 data_pos = *data + offset_y * dim1_offset; 3195 3196 current_blockcount_x = early_blockcount_x; 3197 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3198 current_blockcount_z = early_blockcount_z; 3199 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3200 if(*indicator_pos){ 3201 // decompress by SZ 3202 float * block_data_pos = data_pos; 3203 float pred; 3204 size_t index = 0; 3205 int type_; 3206 size_t unpredictable_count = 0; 3207 // ii == 0 3208 { 3209 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3210 { 3211 // kk == 0 3212 type_ = type[index]; 3213 if(type_ == intvRadius){ 3214 *block_data_pos = mean; 3215 } 3216 else if(type_ == 0){ 3217 *block_data_pos = unpred_data[unpredictable_count ++]; 3218 } 3219 else{ 3220 if(type_ < intvRadius) type_ += 1; 3221 pred = block_data_pos[- dim1_offset]; 3222 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3223 } 3224 index ++; 3225 block_data_pos ++; 3226 } 3227 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3228 type_ = type[index]; 3229 if(type_ == intvRadius){ 3230 *block_data_pos = mean; 3231 } 3232 else if(type_ == 0){ 3233 *block_data_pos = unpred_data[unpredictable_count ++]; 3234 } 3235 else{ 3236 if(type_ < intvRadius) type_ += 1; 3237 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3238 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3239 } 3240 index ++; 3241 block_data_pos ++; 3242 } 3243 block_data_pos += dim1_offset - current_blockcount_z; 3244 } 3245 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3246 } 3247 for(size_t ii=1; ii<current_blockcount_x; ii++){ 3248 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3249 { 3250 // kk == 0 3251 type_ = type[index]; 3252 if(type_ == intvRadius){ 3253 *block_data_pos = mean; 3254 } 3255 else if(type_ == 0){ 3256 *block_data_pos = unpred_data[unpredictable_count ++]; 3257 } 3258 else{ 3259 if(type_ < intvRadius) type_ += 1; 3260 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 3261 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3262 } 3263 index ++; 3264 block_data_pos ++; 3265 } 3266 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3267 type_ = type[index]; 3268 if(type_ == intvRadius){ 3269 *block_data_pos = mean; 3270 } 3271 else if(type_ == 0){ 3272 *block_data_pos = unpred_data[unpredictable_count ++]; 3273 } 3274 else{ 3275 if(type_ < intvRadius) type_ += 1; 3276 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3277 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3278 } 3279 index ++; 3280 block_data_pos ++; 3281 } 3282 block_data_pos += dim1_offset - current_blockcount_z; 3283 } 3284 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3285 } 3286 cur_unpred_count = unpredictable_count; 3287 } 3288 else{ 3289 // decompress by regression 3290 { 3291 //restore regression coefficients 3292 float pred; 3293 int type_; 3294 for(int e=0; e<4; e++){ 3295 type_ = coeff_type[e][coeff_index]; 3296 if (type_ != 0){ 3297 pred = last_coefficients[e]; 3298 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3299 } 3300 else{ 3301 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3302 coeff_unpred_data_count[e] ++; 3303 } 3304 } 3305 coeff_index ++; 3306 } 3307 { 3308 float * block_data_pos = data_pos; 3309 float pred; 3310 int type_; 3311 size_t index = 0; 3312 size_t unpredictable_count = 0; 3313 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3314 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3315 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3316 type_ = type[index]; 3317 if (type_ != 0){ 3318 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3319 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3320 } 3321 else{ 3322 *block_data_pos = unpred_data[unpredictable_count ++]; 3323 } 3324 index ++; 3325 block_data_pos ++; 3326 } 3327 block_data_pos += dim1_offset - current_blockcount_z; 3328 } 3329 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3330 } 3331 cur_unpred_count = unpredictable_count; 3332 } 3333 } 3334 indicator_pos ++; 3335 type += current_block_elements; 3336 unpred_data += cur_unpred_count; 3337 } // end k == 0 3338 for(size_t k=1; k<num_z; k++){ 3339 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3340 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3341 data_pos = *data + offset_y * dim1_offset + offset_z; 3342 3343 current_blockcount_x = early_blockcount_x; 3344 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3345 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3346 3347 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3348 if(*indicator_pos){ 3349 // decompress by SZ 3350 float * block_data_pos = data_pos; 3351 float pred; 3352 size_t index = 0; 3353 int type_; 3354 size_t unpredictable_count = 0; 3355 // ii == 0 3356 { 3357 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3358 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3359 type_ = type[index]; 3360 if(type_ == intvRadius){ 3361 *block_data_pos = mean; 3362 } 3363 else if(type_ == 0){ 3364 *block_data_pos = unpred_data[unpredictable_count ++]; 3365 } 3366 else{ 3367 if(type_ < intvRadius) type_ += 1; 3368 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3369 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3370 } 3371 index ++; 3372 block_data_pos ++; 3373 } 3374 block_data_pos += dim1_offset - current_blockcount_z; 3375 } 3376 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3377 } 3378 for(size_t ii=1; ii<current_blockcount_x; ii++){ 3379 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3380 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3381 type_ = type[index]; 3382 if(type_ == intvRadius){ 3383 *block_data_pos = mean; 3384 } 3385 else if(type_ == 0){ 3386 *block_data_pos = unpred_data[unpredictable_count ++]; 3387 } 3388 else{ 3389 if(type_ < intvRadius) type_ += 1; 3390 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3391 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3392 } 3393 index ++; 3394 block_data_pos ++; 3395 } 3396 block_data_pos += dim1_offset - current_blockcount_z; 3397 } 3398 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3399 } 3400 cur_unpred_count = unpredictable_count; 3401 } 3402 else{ 3403 // decompress by regression 3404 { 3405 //restore regression coefficients 3406 float pred; 3407 int type_; 3408 for(int e=0; e<4; e++){ 3409 type_ = coeff_type[e][coeff_index]; 3410 if (type_ != 0){ 3411 pred = last_coefficients[e]; 3412 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3413 } 3414 else{ 3415 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3416 coeff_unpred_data_count[e] ++; 3417 } 3418 } 3419 coeff_index ++; 3420 } 3421 { 3422 float * block_data_pos = data_pos; 3423 float pred; 3424 int type_; 3425 size_t index = 0; 3426 size_t unpredictable_count = 0; 3427 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3428 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3429 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3430 type_ = type[index]; 3431 if (type_ != 0){ 3432 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3433 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3434 } 3435 else{ 3436 *block_data_pos = unpred_data[unpredictable_count ++]; 3437 } 3438 index ++; 3439 block_data_pos ++; 3440 } 3441 block_data_pos += dim1_offset - current_blockcount_z; 3442 } 3443 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3444 } 3445 cur_unpred_count = unpredictable_count; 3446 } 3447 } 3448 indicator_pos ++; 3449 type += current_block_elements; 3450 unpred_data += cur_unpred_count; 3451 } 3452 } 3453 } // end i==0 3454 for(size_t i=1; i<num_x; i++){ 3455 // j == 0 3456 { 3457 // k == 0 3458 { 3459 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3460 data_pos = *data + offset_x * dim0_offset; 3461 3462 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3463 current_blockcount_y = early_blockcount_y; 3464 current_blockcount_z = early_blockcount_z; 3465 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3466 if(*indicator_pos){ 3467 // decompress by SZ 3468 float * block_data_pos = data_pos; 3469 float pred; 3470 size_t index = 0; 3471 int type_; 3472 size_t unpredictable_count = 0; 3473 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3474 // jj == 0 3475 { 3476 { 3477 // kk == 0 3478 type_ = type[index]; 3479 if(type_ == intvRadius){ 3480 *block_data_pos = mean; 3481 } 3482 else if(type_ == 0){ 3483 *block_data_pos = unpred_data[unpredictable_count ++]; 3484 } 3485 else{ 3486 if(type_ < intvRadius) type_ += 1; 3487 pred = block_data_pos[- dim0_offset]; 3488 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3489 } 3490 index ++; 3491 block_data_pos ++; 3492 } 3493 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3494 type_ = type[index]; 3495 if(type_ == intvRadius){ 3496 *block_data_pos = mean; 3497 } 3498 else if(type_ == 0){ 3499 *block_data_pos = unpred_data[unpredictable_count ++]; 3500 } 3501 else{ 3502 if(type_ < intvRadius) type_ += 1; 3503 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 3504 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3505 } 3506 index ++; 3507 block_data_pos ++; 3508 } 3509 block_data_pos += dim1_offset - current_blockcount_z; 3510 } 3511 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3512 { 3513 // kk == 0 3514 type_ = type[index]; 3515 if(type_ == intvRadius){ 3516 *block_data_pos = mean; 3517 } 3518 else if(type_ == 0){ 3519 *block_data_pos = unpred_data[unpredictable_count ++]; 3520 } 3521 else{ 3522 if(type_ < intvRadius) type_ += 1; 3523 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 3524 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3525 } 3526 index ++; 3527 block_data_pos ++; 3528 } 3529 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3530 type_ = type[index]; 3531 if(type_ == intvRadius){ 3532 *block_data_pos = mean; 3533 } 3534 else if(type_ == 0){ 3535 *block_data_pos = unpred_data[unpredictable_count ++]; 3536 } 3537 else{ 3538 if(type_ < intvRadius) type_ += 1; 3539 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3540 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3541 } 3542 index ++; 3543 block_data_pos ++; 3544 } 3545 block_data_pos += dim1_offset - current_blockcount_z; 3546 } 3547 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3548 } 3549 cur_unpred_count = unpredictable_count; 3550 } 3551 else{ 3552 // decompress by regression 3553 { 3554 //restore regression coefficients 3555 float pred; 3556 int type_; 3557 for(int e=0; e<4; e++){ 3558 type_ = coeff_type[e][coeff_index]; 3559 if (type_ != 0){ 3560 pred = last_coefficients[e]; 3561 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3562 } 3563 else{ 3564 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3565 coeff_unpred_data_count[e] ++; 3566 } 3567 } 3568 coeff_index ++; 3569 } 3570 { 3571 float * block_data_pos = data_pos; 3572 float pred; 3573 int type_; 3574 size_t index = 0; 3575 size_t unpredictable_count = 0; 3576 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3577 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3578 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3579 type_ = type[index]; 3580 if (type_ != 0){ 3581 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3582 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3583 } 3584 else{ 3585 *block_data_pos = unpred_data[unpredictable_count ++]; 3586 } 3587 index ++; 3588 block_data_pos ++; 3589 } 3590 block_data_pos += dim1_offset - current_blockcount_z; 3591 } 3592 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3593 } 3594 cur_unpred_count = unpredictable_count; 3595 } 3596 } 3597 indicator_pos ++; 3598 type += current_block_elements; 3599 unpred_data += cur_unpred_count; 3600 } // end k == 0 3601 for(size_t k=1; k<num_z; k++){ 3602 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3603 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3604 data_pos = *data + offset_x * dim0_offset + offset_z; 3605 3606 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3607 current_blockcount_y = early_blockcount_y; 3608 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3609 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3610 if(*indicator_pos){ 3611 // decompress by SZ 3612 float * block_data_pos = data_pos; 3613 float pred; 3614 size_t index = 0; 3615 int type_; 3616 size_t unpredictable_count = 0; 3617 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3618 // jj == 0 3619 { 3620 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3621 type_ = type[index]; 3622 if(type_ == intvRadius){ 3623 *block_data_pos = mean; 3624 } 3625 else if(type_ == 0){ 3626 *block_data_pos = unpred_data[unpredictable_count ++]; 3627 } 3628 else{ 3629 if(type_ < intvRadius) type_ += 1; 3630 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 3631 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3632 } 3633 index ++; 3634 block_data_pos ++; 3635 } 3636 block_data_pos += dim1_offset - current_blockcount_z; 3637 } 3638 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3639 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3640 type_ = type[index]; 3641 if(type_ == intvRadius){ 3642 *block_data_pos = mean; 3643 } 3644 else if(type_ == 0){ 3645 *block_data_pos = unpred_data[unpredictable_count ++]; 3646 } 3647 else{ 3648 if(type_ < intvRadius) type_ += 1; 3649 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3650 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3651 } 3652 index ++; 3653 block_data_pos ++; 3654 } 3655 block_data_pos += dim1_offset - current_blockcount_z; 3656 } 3657 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3658 } 3659 cur_unpred_count = unpredictable_count; 3660 } 3661 else{ 3662 // decompress by regression 3663 { 3664 //restore regression coefficients 3665 float pred; 3666 int type_; 3667 for(int e=0; e<4; e++){ 3668 type_ = coeff_type[e][coeff_index]; 3669 if (type_ != 0){ 3670 pred = last_coefficients[e]; 3671 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3672 } 3673 else{ 3674 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3675 coeff_unpred_data_count[e] ++; 3676 } 3677 } 3678 coeff_index ++; 3679 } 3680 { 3681 float * block_data_pos = data_pos; 3682 float pred; 3683 int type_; 3684 size_t index = 0; 3685 size_t unpredictable_count = 0; 3686 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3687 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3688 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3689 type_ = type[index]; 3690 if (type_ != 0){ 3691 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3692 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3693 } 3694 else{ 3695 *block_data_pos = unpred_data[unpredictable_count ++]; 3696 } 3697 index ++; 3698 block_data_pos ++; 3699 } 3700 block_data_pos += dim1_offset - current_blockcount_z; 3701 } 3702 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3703 } 3704 cur_unpred_count = unpredictable_count; 3705 } 3706 } 3707 indicator_pos ++; 3708 type += current_block_elements; 3709 unpred_data += cur_unpred_count; 3710 } 3711 }// end j = 0 3712 for(size_t j=1; j<num_y; j++){ 3713 // k == 0 3714 { 3715 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3716 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3717 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; 3718 3719 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3720 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3721 current_blockcount_z = early_blockcount_z; 3722 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3723 if(*indicator_pos){ 3724 // decompress by SZ 3725 float * block_data_pos = data_pos; 3726 float pred; 3727 size_t index = 0; 3728 int type_; 3729 size_t unpredictable_count = 0; 3730 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3731 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3732 { 3733 // kk == 0 3734 type_ = type[index]; 3735 if(type_ == intvRadius){ 3736 *block_data_pos = mean; 3737 } 3738 else if(type_ == 0){ 3739 *block_data_pos = unpred_data[unpredictable_count ++]; 3740 } 3741 else{ 3742 if(type_ < intvRadius) type_ += 1; 3743 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 3744 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3745 } 3746 index ++; 3747 block_data_pos ++; 3748 } 3749 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3750 type_ = type[index]; 3751 if(type_ == intvRadius){ 3752 *block_data_pos = mean; 3753 } 3754 else if(type_ == 0){ 3755 *block_data_pos = unpred_data[unpredictable_count ++]; 3756 } 3757 else{ 3758 if(type_ < intvRadius) type_ += 1; 3759 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3760 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3761 } 3762 index ++; 3763 block_data_pos ++; 3764 } 3765 block_data_pos += dim1_offset - current_blockcount_z; 3766 } 3767 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3768 } 3769 cur_unpred_count = unpredictable_count; 3770 } 3771 else{ 3772 // decompress by regression 3773 { 3774 //restore regression coefficients 3775 float pred; 3776 int type_; 3777 for(int e=0; e<4; e++){ 3778 type_ = coeff_type[e][coeff_index]; 3779 if (type_ != 0){ 3780 pred = last_coefficients[e]; 3781 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3782 } 3783 else{ 3784 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3785 coeff_unpred_data_count[e] ++; 3786 } 3787 } 3788 coeff_index ++; 3789 } 3790 { 3791 float * block_data_pos = data_pos; 3792 float pred; 3793 int type_; 3794 size_t index = 0; 3795 size_t unpredictable_count = 0; 3796 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3797 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3798 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3799 type_ = type[index]; 3800 if (type_ != 0){ 3801 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3802 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3803 } 3804 else{ 3805 *block_data_pos = unpred_data[unpredictable_count ++]; 3806 } 3807 index ++; 3808 block_data_pos ++; 3809 } 3810 block_data_pos += dim1_offset - current_blockcount_z; 3811 } 3812 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3813 } 3814 cur_unpred_count = unpredictable_count; 3815 } 3816 } 3817 indicator_pos ++; 3818 type += current_block_elements; 3819 unpred_data += cur_unpred_count; 3820 } // end k == 0 3821 for(size_t k=1; k<num_z; k++){ 3822 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 3823 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 3824 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 3825 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 3826 3827 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 3828 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 3829 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 3830 3831 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3832 if(*indicator_pos){ 3833 // decompress by SZ 3834 float * block_data_pos = data_pos; 3835 float pred; 3836 size_t index = 0; 3837 int type_; 3838 size_t unpredictable_count = 0; 3839 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3840 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3841 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3842 type_ = type[index]; 3843 if(type_ == intvRadius){ 3844 *block_data_pos = mean; 3845 } 3846 else if(type_ == 0){ 3847 *block_data_pos = unpred_data[unpredictable_count ++]; 3848 } 3849 else{ 3850 if(type_ < intvRadius) type_ += 1; 3851 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 3852 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3853 } 3854 index ++; 3855 block_data_pos ++; 3856 } 3857 block_data_pos += dim1_offset - current_blockcount_z; 3858 } 3859 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3860 } 3861 cur_unpred_count = unpredictable_count; 3862 } 3863 else{ 3864 // decompress by regression 3865 { 3866 //restore regression coefficients 3867 float pred; 3868 int type_; 3869 for(int e=0; e<4; e++){ 3870 type_ = coeff_type[e][coeff_index]; 3871 if (type_ != 0){ 3872 pred = last_coefficients[e]; 3873 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 3874 } 3875 else{ 3876 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 3877 coeff_unpred_data_count[e] ++; 3878 } 3879 } 3880 coeff_index ++; 3881 } 3882 { 3883 float * block_data_pos = data_pos; 3884 float pred; 3885 int type_; 3886 size_t index = 0; 3887 size_t unpredictable_count = 0; 3888 for(size_t ii=0; ii<current_blockcount_x; ii++){ 3889 for(size_t jj=0; jj<current_blockcount_y; jj++){ 3890 for(size_t kk=0; kk<current_blockcount_z; kk++){ 3891 type_ = type[index]; 3892 if (type_ != 0){ 3893 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 3894 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3895 } 3896 else{ 3897 *block_data_pos = unpred_data[unpredictable_count ++]; 3898 } 3899 index ++; 3900 block_data_pos ++; 3901 } 3902 block_data_pos += dim1_offset - current_blockcount_z; 3903 } 3904 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3905 } 3906 cur_unpred_count = unpredictable_count; 3907 } 3908 } 3909 indicator_pos ++; 3910 type += current_block_elements; 3911 unpred_data += cur_unpred_count; 3912 } 3913 } 3914 } 3915 } 3916 else{ 3917 type = result_type; 3918 // i == 0 3919 { 3920 // j == 0 3921 { 3922 // k == 0 3923 { 3924 data_pos = *data; 3925 3926 current_blockcount_x = early_blockcount_x; 3927 current_blockcount_y = early_blockcount_y; 3928 current_blockcount_z = early_blockcount_z; 3929 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 3930 if(*indicator_pos){ 3931 // decompress by SZ 3932 float * block_data_pos = data_pos; 3933 float pred; 3934 size_t index = 0; 3935 int type_; 3936 size_t unpredictable_count = 0; 3937 // ii == 0 3938 { 3939 // jj == 0 3940 { 3941 { 3942 // kk == 0 3943 type_ = type[index]; 3944 if(type_ == 0){ 3945 *block_data_pos = unpred_data[unpredictable_count ++]; 3946 } 3947 else{ 3948 pred = 0; 3949 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3950 } 3951 index ++; 3952 block_data_pos ++; 3953 } 3954 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3955 type_ = type[index]; 3956 if(type_ == 0){ 3957 *block_data_pos = unpred_data[unpredictable_count ++]; 3958 } 3959 else{ 3960 pred = block_data_pos[- 1]; 3961 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3962 } 3963 index ++; 3964 block_data_pos ++; 3965 } 3966 block_data_pos += dim1_offset - current_blockcount_z; 3967 } 3968 for(size_t jj=1; jj<current_blockcount_y; jj++){ 3969 { 3970 // kk == 0 3971 type_ = type[index]; 3972 if(type_ == 0){ 3973 *block_data_pos = unpred_data[unpredictable_count ++]; 3974 } 3975 else{ 3976 pred = block_data_pos[- dim1_offset]; 3977 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3978 } 3979 index ++; 3980 block_data_pos ++; 3981 } 3982 for(size_t kk=1; kk<current_blockcount_z; kk++){ 3983 type_ = type[index]; 3984 if(type_ == 0){ 3985 *block_data_pos = unpred_data[unpredictable_count ++]; 3986 } 3987 else{ 3988 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 3989 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 3990 } 3991 index ++; 3992 block_data_pos ++; 3993 } 3994 block_data_pos += dim1_offset - current_blockcount_z; 3995 } 3996 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 3997 } 3998 for(size_t ii=1; ii<current_blockcount_x; ii++){ 3999 // jj == 0 4000 { 4001 { 4002 // kk == 0 4003 type_ = type[index]; 4004 if(type_ == 0){ 4005 *block_data_pos = unpred_data[unpredictable_count ++]; 4006 } 4007 else{ 4008 pred = block_data_pos[- dim0_offset]; 4009 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4010 } 4011 index ++; 4012 block_data_pos ++; 4013 } 4014 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4015 type_ = type[index]; 4016 if(type_ == 0){ 4017 *block_data_pos = unpred_data[unpredictable_count ++]; 4018 } 4019 else{ 4020 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 4021 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4022 } 4023 index ++; 4024 block_data_pos ++; 4025 } 4026 block_data_pos += dim1_offset - current_blockcount_z; 4027 } 4028 for(size_t jj=1; jj<current_blockcount_y; jj++){ 4029 { 4030 // kk == 0 4031 type_ = type[index]; 4032 if(type_ == 0){ 4033 *block_data_pos = unpred_data[unpredictable_count ++]; 4034 } 4035 else{ 4036 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 4037 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4038 } 4039 index ++; 4040 block_data_pos ++; 4041 } 4042 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4043 type_ = type[index]; 4044 if(type_ == 0){ 4045 *block_data_pos = unpred_data[unpredictable_count ++]; 4046 } 4047 else{ 4048 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4049 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4050 } 4051 index ++; 4052 block_data_pos ++; 4053 } 4054 block_data_pos += dim1_offset - current_blockcount_z; 4055 } 4056 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4057 } 4058 cur_unpred_count = unpredictable_count; 4059 } 4060 else{ 4061 // decompress by regression 4062 { 4063 //restore regression coefficients 4064 float pred; 4065 int type_; 4066 for(int e=0; e<4; e++){ 4067 type_ = coeff_type[e][coeff_index]; 4068 if (type_ != 0){ 4069 pred = last_coefficients[e]; 4070 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4071 } 4072 else{ 4073 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4074 coeff_unpred_data_count[e] ++; 4075 } 4076 } 4077 coeff_index ++; 4078 } 4079 { 4080 float * block_data_pos = data_pos; 4081 float pred; 4082 int type_; 4083 size_t index = 0; 4084 size_t unpredictable_count = 0; 4085 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4086 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4087 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4088 type_ = type[index]; 4089 if (type_ != 0){ 4090 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4091 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4092 } 4093 else{ 4094 *block_data_pos = unpred_data[unpredictable_count ++]; 4095 } 4096 index ++; 4097 block_data_pos ++; 4098 } 4099 block_data_pos += dim1_offset - current_blockcount_z; 4100 } 4101 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4102 } 4103 cur_unpred_count = unpredictable_count; 4104 } 4105 } 4106 indicator_pos ++; 4107 type += current_block_elements; 4108 unpred_data += cur_unpred_count; 4109 } // end k == 0 4110 // i == 0 j == 0 k != 0 4111 for(size_t k=1; k<num_z; k++){ 4112 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4113 data_pos = *data + offset_z; 4114 4115 current_blockcount_x = early_blockcount_x; 4116 current_blockcount_y = early_blockcount_y; 4117 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4118 4119 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4120 if(*indicator_pos){ 4121 // decompress by SZ 4122 float * block_data_pos = data_pos; 4123 float pred; 4124 size_t index = 0; 4125 int type_; 4126 size_t unpredictable_count = 0; 4127 // ii == 0 4128 { 4129 // jj == 0 4130 { 4131 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4132 type_ = type[index]; 4133 if(type_ == 0){ 4134 *block_data_pos = unpred_data[unpredictable_count ++]; 4135 } 4136 else{ 4137 pred = block_data_pos[- 1]; 4138 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4139 } 4140 index ++; 4141 block_data_pos ++; 4142 } 4143 block_data_pos += dim1_offset - current_blockcount_z; 4144 } 4145 for(size_t jj=1; jj<current_blockcount_y; jj++){ 4146 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4147 type_ = type[index]; 4148 if(type_ == 0){ 4149 *block_data_pos = unpred_data[unpredictable_count ++]; 4150 } 4151 else{ 4152 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 4153 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4154 } 4155 index ++; 4156 block_data_pos ++; 4157 } 4158 block_data_pos += dim1_offset - current_blockcount_z; 4159 } 4160 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4161 } 4162 for(size_t ii=1; ii<current_blockcount_x; ii++){ 4163 // jj == 0 4164 { 4165 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4166 type_ = type[index]; 4167 if(type_ == 0){ 4168 *block_data_pos = unpred_data[unpredictable_count ++]; 4169 } 4170 else{ 4171 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 4172 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4173 } 4174 index ++; 4175 block_data_pos ++; 4176 } 4177 block_data_pos += dim1_offset - current_blockcount_z; 4178 } 4179 for(size_t jj=1; jj<current_blockcount_y; jj++){ 4180 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4181 type_ = type[index]; 4182 if(type_ == 0){ 4183 *block_data_pos = unpred_data[unpredictable_count ++]; 4184 } 4185 else{ 4186 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4187 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4188 } 4189 index ++; 4190 block_data_pos ++; 4191 } 4192 block_data_pos += dim1_offset - current_blockcount_z; 4193 } 4194 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4195 } 4196 cur_unpred_count = unpredictable_count; 4197 } 4198 else{ 4199 // decompress by regression 4200 { 4201 //restore regression coefficients 4202 float pred; 4203 int type_; 4204 for(int e=0; e<4; e++){ 4205 type_ = coeff_type[e][coeff_index]; 4206 if (type_ != 0){ 4207 pred = last_coefficients[e]; 4208 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4209 } 4210 else{ 4211 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4212 coeff_unpred_data_count[e] ++; 4213 } 4214 } 4215 coeff_index ++; 4216 } 4217 { 4218 float * block_data_pos = data_pos; 4219 float pred; 4220 int type_; 4221 size_t index = 0; 4222 size_t unpredictable_count = 0; 4223 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4224 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4225 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4226 type_ = type[index]; 4227 if (type_ != 0){ 4228 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4229 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4230 } 4231 else{ 4232 *block_data_pos = unpred_data[unpredictable_count ++]; 4233 } 4234 index ++; 4235 block_data_pos ++; 4236 } 4237 block_data_pos += dim1_offset - current_blockcount_z; 4238 } 4239 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4240 } 4241 cur_unpred_count = unpredictable_count; 4242 } 4243 } 4244 indicator_pos ++; 4245 type += current_block_elements; 4246 unpred_data += cur_unpred_count; 4247 } 4248 }// end j==0 4249 for(size_t j=1; j<num_y; j++){ 4250 // k == 0 4251 { 4252 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4253 data_pos = *data + offset_y * dim1_offset; 4254 4255 current_blockcount_x = early_blockcount_x; 4256 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4257 current_blockcount_z = early_blockcount_z; 4258 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4259 if(*indicator_pos){ 4260 // decompress by SZ 4261 float * block_data_pos = data_pos; 4262 float pred; 4263 size_t index = 0; 4264 int type_; 4265 size_t unpredictable_count = 0; 4266 // ii == 0 4267 { 4268 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4269 { 4270 // kk == 0 4271 type_ = type[index]; 4272 if(type_ == 0){ 4273 *block_data_pos = unpred_data[unpredictable_count ++]; 4274 } 4275 else{ 4276 pred = block_data_pos[- dim1_offset]; 4277 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4278 } 4279 index ++; 4280 block_data_pos ++; 4281 } 4282 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4283 type_ = type[index]; 4284 if(type_ == 0){ 4285 *block_data_pos = unpred_data[unpredictable_count ++]; 4286 } 4287 else{ 4288 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 4289 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4290 } 4291 index ++; 4292 block_data_pos ++; 4293 } 4294 block_data_pos += dim1_offset - current_blockcount_z; 4295 } 4296 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4297 } 4298 for(size_t ii=1; ii<current_blockcount_x; ii++){ 4299 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4300 { 4301 // kk == 0 4302 type_ = type[index]; 4303 if(type_ == 0){ 4304 *block_data_pos = unpred_data[unpredictable_count ++]; 4305 } 4306 else{ 4307 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 4308 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4309 } 4310 index ++; 4311 block_data_pos ++; 4312 } 4313 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4314 type_ = type[index]; 4315 if(type_ == 0){ 4316 *block_data_pos = unpred_data[unpredictable_count ++]; 4317 } 4318 else{ 4319 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4320 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4321 } 4322 index ++; 4323 block_data_pos ++; 4324 } 4325 block_data_pos += dim1_offset - current_blockcount_z; 4326 } 4327 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4328 } 4329 cur_unpred_count = unpredictable_count; 4330 } 4331 else{ 4332 // decompress by regression 4333 { 4334 //restore regression coefficients 4335 float pred; 4336 int type_; 4337 for(int e=0; e<4; e++){ 4338 type_ = coeff_type[e][coeff_index]; 4339 if (type_ != 0){ 4340 pred = last_coefficients[e]; 4341 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4342 } 4343 else{ 4344 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4345 coeff_unpred_data_count[e] ++; 4346 } 4347 } 4348 coeff_index ++; 4349 } 4350 { 4351 float * block_data_pos = data_pos; 4352 float pred; 4353 int type_; 4354 size_t index = 0; 4355 size_t unpredictable_count = 0; 4356 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4357 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4358 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4359 type_ = type[index]; 4360 if (type_ != 0){ 4361 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4362 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4363 } 4364 else{ 4365 *block_data_pos = unpred_data[unpredictable_count ++]; 4366 } 4367 index ++; 4368 block_data_pos ++; 4369 } 4370 block_data_pos += dim1_offset - current_blockcount_z; 4371 } 4372 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4373 } 4374 cur_unpred_count = unpredictable_count; 4375 } 4376 } 4377 indicator_pos ++; 4378 type += current_block_elements; 4379 unpred_data += cur_unpred_count; 4380 } // end k == 0 4381 for(size_t k=1; k<num_z; k++){ 4382 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4383 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4384 data_pos = *data + offset_y * dim1_offset + offset_z; 4385 4386 current_blockcount_x = early_blockcount_x; 4387 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4388 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4389 4390 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4391 if(*indicator_pos){ 4392 // decompress by SZ 4393 float * block_data_pos = data_pos; 4394 float pred; 4395 size_t index = 0; 4396 int type_; 4397 size_t unpredictable_count = 0; 4398 // ii == 0 4399 { 4400 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4401 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4402 type_ = type[index]; 4403 if(type_ == 0){ 4404 *block_data_pos = unpred_data[unpredictable_count ++]; 4405 } 4406 else{ 4407 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1]; 4408 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4409 } 4410 index ++; 4411 block_data_pos ++; 4412 } 4413 block_data_pos += dim1_offset - current_blockcount_z; 4414 } 4415 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4416 } 4417 for(size_t ii=1; ii<current_blockcount_x; ii++){ 4418 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4419 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4420 type_ = type[index]; 4421 if(type_ == 0){ 4422 *block_data_pos = unpred_data[unpredictable_count ++]; 4423 } 4424 else{ 4425 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4426 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4427 } 4428 index ++; 4429 block_data_pos ++; 4430 } 4431 block_data_pos += dim1_offset - current_blockcount_z; 4432 } 4433 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4434 } 4435 cur_unpred_count = unpredictable_count; 4436 } 4437 else{ 4438 // decompress by regression 4439 { 4440 //restore regression coefficients 4441 float pred; 4442 int type_; 4443 for(int e=0; e<4; e++){ 4444 type_ = coeff_type[e][coeff_index]; 4445 if (type_ != 0){ 4446 pred = last_coefficients[e]; 4447 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4448 } 4449 else{ 4450 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4451 coeff_unpred_data_count[e] ++; 4452 } 4453 } 4454 coeff_index ++; 4455 } 4456 { 4457 float * block_data_pos = data_pos; 4458 float pred; 4459 int type_; 4460 size_t index = 0; 4461 size_t unpredictable_count = 0; 4462 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4463 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4464 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4465 type_ = type[index]; 4466 if (type_ != 0){ 4467 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4468 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4469 } 4470 else{ 4471 *block_data_pos = unpred_data[unpredictable_count ++]; 4472 } 4473 index ++; 4474 block_data_pos ++; 4475 } 4476 block_data_pos += dim1_offset - current_blockcount_z; 4477 } 4478 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4479 } 4480 cur_unpred_count = unpredictable_count; 4481 } 4482 } 4483 indicator_pos ++; 4484 type += current_block_elements; 4485 unpred_data += cur_unpred_count; 4486 } 4487 } 4488 } // end i==0 4489 for(size_t i=1; i<num_x; i++){ 4490 // j == 0 4491 { 4492 // k == 0 4493 { 4494 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4495 data_pos = *data + offset_x * dim0_offset; 4496 4497 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4498 current_blockcount_y = early_blockcount_y; 4499 current_blockcount_z = early_blockcount_z; 4500 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4501 if(*indicator_pos){ 4502 // decompress by SZ 4503 float * block_data_pos = data_pos; 4504 float pred; 4505 size_t index = 0; 4506 int type_; 4507 size_t unpredictable_count = 0; 4508 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4509 // jj == 0 4510 { 4511 { 4512 // kk == 0 4513 type_ = type[index]; 4514 if(type_ == 0){ 4515 *block_data_pos = unpred_data[unpredictable_count ++]; 4516 } 4517 else{ 4518 pred = block_data_pos[- dim0_offset]; 4519 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4520 } 4521 index ++; 4522 block_data_pos ++; 4523 } 4524 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4525 type_ = type[index]; 4526 if(type_ == 0){ 4527 *block_data_pos = unpred_data[unpredictable_count ++]; 4528 } 4529 else{ 4530 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 4531 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4532 } 4533 index ++; 4534 block_data_pos ++; 4535 } 4536 block_data_pos += dim1_offset - current_blockcount_z; 4537 } 4538 for(size_t jj=1; jj<current_blockcount_y; jj++){ 4539 { 4540 // kk == 0 4541 type_ = type[index]; 4542 if(type_ == 0){ 4543 *block_data_pos = unpred_data[unpredictable_count ++]; 4544 } 4545 else{ 4546 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 4547 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4548 } 4549 index ++; 4550 block_data_pos ++; 4551 } 4552 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4553 type_ = type[index]; 4554 if(type_ == 0){ 4555 *block_data_pos = unpred_data[unpredictable_count ++]; 4556 } 4557 else{ 4558 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4559 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4560 } 4561 index ++; 4562 block_data_pos ++; 4563 } 4564 block_data_pos += dim1_offset - current_blockcount_z; 4565 } 4566 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4567 } 4568 cur_unpred_count = unpredictable_count; 4569 } 4570 else{ 4571 // decompress by regression 4572 { 4573 //restore regression coefficients 4574 float pred; 4575 int type_; 4576 for(int e=0; e<4; e++){ 4577 type_ = coeff_type[e][coeff_index]; 4578 if (type_ != 0){ 4579 pred = last_coefficients[e]; 4580 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4581 } 4582 else{ 4583 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4584 coeff_unpred_data_count[e] ++; 4585 } 4586 } 4587 coeff_index ++; 4588 } 4589 { 4590 float * block_data_pos = data_pos; 4591 float pred; 4592 int type_; 4593 size_t index = 0; 4594 size_t unpredictable_count = 0; 4595 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4596 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4597 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4598 type_ = type[index]; 4599 if (type_ != 0){ 4600 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4601 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4602 } 4603 else{ 4604 *block_data_pos = unpred_data[unpredictable_count ++]; 4605 } 4606 index ++; 4607 block_data_pos ++; 4608 } 4609 block_data_pos += dim1_offset - current_blockcount_z; 4610 } 4611 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4612 } 4613 cur_unpred_count = unpredictable_count; 4614 } 4615 } 4616 indicator_pos ++; 4617 type += current_block_elements; 4618 unpred_data += cur_unpred_count; 4619 } // end k == 0 4620 for(size_t k=1; k<num_z; k++){ 4621 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4622 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4623 data_pos = *data + offset_x * dim0_offset + offset_z; 4624 4625 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4626 current_blockcount_y = early_blockcount_y; 4627 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4628 4629 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4630 if(*indicator_pos){ 4631 // decompress by SZ 4632 float * block_data_pos = data_pos; 4633 float pred; 4634 size_t index = 0; 4635 int type_; 4636 size_t unpredictable_count = 0; 4637 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4638 // jj == 0 4639 { 4640 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4641 type_ = type[index]; 4642 if(type_ == 0){ 4643 *block_data_pos = unpred_data[unpredictable_count ++]; 4644 } 4645 else{ 4646 pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1]; 4647 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4648 } 4649 index ++; 4650 block_data_pos ++; 4651 } 4652 block_data_pos += dim1_offset - current_blockcount_z; 4653 } 4654 for(size_t jj=1; jj<current_blockcount_y; jj++){ 4655 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4656 type_ = type[index]; 4657 if(type_ == 0){ 4658 *block_data_pos = unpred_data[unpredictable_count ++]; 4659 } 4660 else{ 4661 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4662 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4663 } 4664 index ++; 4665 block_data_pos ++; 4666 } 4667 block_data_pos += dim1_offset - current_blockcount_z; 4668 } 4669 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4670 } 4671 cur_unpred_count = unpredictable_count; 4672 } 4673 else{ 4674 // decompress by regression 4675 { 4676 //restore regression coefficients 4677 float pred; 4678 int type_; 4679 for(int e=0; e<4; e++){ 4680 type_ = coeff_type[e][coeff_index]; 4681 if (type_ != 0){ 4682 pred = last_coefficients[e]; 4683 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4684 } 4685 else{ 4686 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4687 coeff_unpred_data_count[e] ++; 4688 } 4689 } 4690 coeff_index ++; 4691 } 4692 { 4693 float * block_data_pos = data_pos; 4694 float pred; 4695 int type_; 4696 size_t index = 0; 4697 size_t unpredictable_count = 0; 4698 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4699 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4700 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4701 type_ = type[index]; 4702 if (type_ != 0){ 4703 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4704 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4705 } 4706 else{ 4707 *block_data_pos = unpred_data[unpredictable_count ++]; 4708 } 4709 index ++; 4710 block_data_pos ++; 4711 } 4712 block_data_pos += dim1_offset - current_blockcount_z; 4713 } 4714 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4715 } 4716 cur_unpred_count = unpredictable_count; 4717 } 4718 } 4719 indicator_pos ++; 4720 type += current_block_elements; 4721 unpred_data += cur_unpred_count; 4722 } 4723 }// end j = 0 4724 for(size_t j=1; j<num_y; j++){ 4725 // k == 0 4726 { 4727 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4728 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4729 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset; 4730 4731 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4732 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4733 current_blockcount_z = early_blockcount_z; 4734 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4735 if(*indicator_pos){ 4736 // decompress by SZ 4737 float * block_data_pos = data_pos; 4738 float pred; 4739 size_t index = 0; 4740 int type_; 4741 size_t unpredictable_count = 0; 4742 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4743 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4744 { 4745 // kk == 0 4746 type_ = type[index]; 4747 if(type_ == 0){ 4748 *block_data_pos = unpred_data[unpredictable_count ++]; 4749 } 4750 else{ 4751 pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset]; 4752 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4753 } 4754 index ++; 4755 block_data_pos ++; 4756 } 4757 for(size_t kk=1; kk<current_blockcount_z; kk++){ 4758 type_ = type[index]; 4759 if(type_ == 0){ 4760 *block_data_pos = unpred_data[unpredictable_count ++]; 4761 } 4762 else{ 4763 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4764 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4765 } 4766 index ++; 4767 block_data_pos ++; 4768 } 4769 block_data_pos += dim1_offset - current_blockcount_z; 4770 } 4771 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4772 } 4773 cur_unpred_count = unpredictable_count; 4774 } 4775 else{ 4776 // decompress by regression 4777 { 4778 //restore regression coefficients 4779 float pred; 4780 int type_; 4781 for(int e=0; e<4; e++){ 4782 type_ = coeff_type[e][coeff_index]; 4783 if (type_ != 0){ 4784 pred = last_coefficients[e]; 4785 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4786 } 4787 else{ 4788 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4789 coeff_unpred_data_count[e] ++; 4790 } 4791 } 4792 coeff_index ++; 4793 } 4794 { 4795 float * block_data_pos = data_pos; 4796 float pred; 4797 int type_; 4798 size_t index = 0; 4799 size_t unpredictable_count = 0; 4800 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4801 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4802 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4803 type_ = type[index]; 4804 if (type_ != 0){ 4805 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4806 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4807 } 4808 else{ 4809 *block_data_pos = unpred_data[unpredictable_count ++]; 4810 } 4811 index ++; 4812 block_data_pos ++; 4813 } 4814 block_data_pos += dim1_offset - current_blockcount_z; 4815 } 4816 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4817 } 4818 cur_unpred_count = unpredictable_count; 4819 } 4820 } 4821 indicator_pos ++; 4822 type += current_block_elements; 4823 unpred_data += cur_unpred_count; 4824 } // end k == 0 4825 for(size_t k=1; k<num_z; k++){ 4826 offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x; 4827 offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y; 4828 offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z; 4829 data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z; 4830 4831 current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x; 4832 current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y; 4833 current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z; 4834 4835 size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z; 4836 if(*indicator_pos){ 4837 // decompress by SZ 4838 float * block_data_pos = data_pos; 4839 float pred; 4840 size_t index = 0; 4841 int type_; 4842 size_t unpredictable_count = 0; 4843 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4844 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4845 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4846 type_ = type[index]; 4847 if(type_ == 0){ 4848 *block_data_pos = unpred_data[unpredictable_count ++]; 4849 } 4850 else{ 4851 pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1]; 4852 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4853 } 4854 index ++; 4855 block_data_pos ++; 4856 } 4857 block_data_pos += dim1_offset - current_blockcount_z; 4858 } 4859 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4860 } 4861 cur_unpred_count = unpredictable_count; 4862 } 4863 else{ 4864 // decompress by regression 4865 { 4866 //restore regression coefficients 4867 float pred; 4868 int type_; 4869 for(int e=0; e<4; e++){ 4870 type_ = coeff_type[e][coeff_index]; 4871 if (type_ != 0){ 4872 pred = last_coefficients[e]; 4873 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 4874 } 4875 else{ 4876 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 4877 coeff_unpred_data_count[e] ++; 4878 } 4879 } 4880 coeff_index ++; 4881 } 4882 { 4883 float * block_data_pos = data_pos; 4884 float pred; 4885 int type_; 4886 size_t index = 0; 4887 size_t unpredictable_count = 0; 4888 for(size_t ii=0; ii<current_blockcount_x; ii++){ 4889 for(size_t jj=0; jj<current_blockcount_y; jj++){ 4890 for(size_t kk=0; kk<current_blockcount_z; kk++){ 4891 type_ = type[index]; 4892 if (type_ != 0){ 4893 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 4894 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 4895 } 4896 else{ 4897 *block_data_pos = unpred_data[unpredictable_count ++]; 4898 } 4899 index ++; 4900 block_data_pos ++; 4901 } 4902 block_data_pos += dim1_offset - current_blockcount_z; 4903 } 4904 block_data_pos += dim0_offset - current_blockcount_y * dim1_offset; 4905 } 4906 cur_unpred_count = unpredictable_count; 4907 } 4908 } 4909 indicator_pos ++; 4910 type += current_block_elements; 4911 unpred_data += cur_unpred_count; 4912 } 4913 } 4914 } 4915 } 4916 4917 #ifdef HAVE_TIMECMPR 4918 if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION) 4919 memcpy(multisteps->hist_data, (*data), num_elements*sizeof(float)); 4920 #endif 4921 4922 free(coeff_result_type); 4923 4924 free(indicator); 4925 free(result_type); 4926 } 4927 4928 void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ 4929 4930 size_t dim0_offset = r2 * r3; 4931 size_t dim1_offset = r3; 4932 size_t num_elements = r1 * r2 * r3; 4933 4934 *data = (float*)malloc(sizeof(float)*num_elements); 4935 4936 unsigned char * comp_data_pos = comp_data; 4937 4938 size_t block_size = bytesToInt_bigEndian(comp_data_pos); 4939 comp_data_pos += sizeof(int); 4940 // calculate block dims 4941 size_t num_x, num_y, num_z; 4942 num_x = (r1 - 1) / block_size + 1; 4943 num_y = (r2 - 1) / block_size + 1; 4944 num_z = (r3 - 1) / block_size + 1; 4945 4946 size_t max_num_block_elements = block_size * block_size * block_size; 4947 size_t num_blocks = num_x * num_y * num_z; 4948 4949 double realPrecision = bytesToDouble(comp_data_pos); 4950 comp_data_pos += sizeof(double); 4951 unsigned int intervals = bytesToInt_bigEndian(comp_data_pos); 4952 comp_data_pos += sizeof(int); 4953 4954 updateQuantizationInfo(intervals); 4955 4956 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 4957 comp_data_pos += sizeof(int); 4958 4959 int stateNum = 2*intervals; 4960 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4961 4962 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 4963 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount); 4964 comp_data_pos += sizeof(int) + tree_size; 4965 4966 float mean; 4967 unsigned char use_mean; 4968 memcpy(&use_mean, comp_data_pos, sizeof(unsigned char)); 4969 comp_data_pos += sizeof(unsigned char); 4970 memcpy(&mean, comp_data_pos, sizeof(float)); 4971 comp_data_pos += sizeof(float); 4972 size_t reg_count = 0; 4973 4974 unsigned char * indicator; 4975 size_t indicator_bitlength = (num_blocks - 1)/8 + 1; 4976 convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator); 4977 comp_data_pos += indicator_bitlength; 4978 for(size_t i=0; i<num_blocks; i++){ 4979 if(!indicator[i]) reg_count ++; 4980 } 4981 4982 int coeff_intvRadius[4]; 4983 int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); 4984 int * coeff_type[4]; 4985 double precision[4]; 4986 float * coeff_unpred_data[4]; 4987 if(reg_count > 0){ 4988 for(int i=0; i<4; i++){ 4989 precision[i] = bytesToDouble(comp_data_pos); 4990 comp_data_pos += sizeof(double); 4991 coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos); 4992 comp_data_pos += sizeof(int); 4993 unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); 4994 comp_data_pos += sizeof(int); 4995 int stateNum = 2*coeff_intvRadius[i]*2; 4996 HuffmanTree* huffmanTree = createHuffmanTree(stateNum); 4997 int nodeCount = bytesToInt_bigEndian(comp_data_pos); 4998 node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount); 4999 comp_data_pos += sizeof(int) + tree_size; 5000 5001 coeff_type[i] = coeff_result_type + i * num_blocks; 5002 size_t typeArray_size = bytesToSize(comp_data_pos); 5003 decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]); 5004 comp_data_pos += sizeof(size_t) + typeArray_size; 5005 int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos); 5006 comp_data_pos += sizeof(int); 5007 coeff_unpred_data[i] = (float *) comp_data_pos; 5008 comp_data_pos += coeff_unpred_count * sizeof(float); 5009 SZ_ReleaseHuffman(huffmanTree); 5010 } 5011 } 5012 float last_coefficients[4] = {0.0}; 5013 int coeff_unpred_data_count[4] = {0}; 5014 int coeff_index = 0; 5015 updateQuantizationInfo(intervals); 5016 5017 size_t total_unpred; 5018 memcpy(&total_unpred, comp_data_pos, sizeof(size_t)); 5019 comp_data_pos += sizeof(size_t); 5020 float * unpred_data = (float *) comp_data_pos; 5021 comp_data_pos += total_unpred * sizeof(float); 5022 5023 int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int)); 5024 decode(comp_data_pos, num_blocks*max_num_block_elements, root, result_type); 5025 SZ_ReleaseHuffman(huffmanTree); 5026 5027 int intvRadius = exe_params->intvRadius; 5028 5029 int * type; 5030 float * data_pos = *data; 5031 size_t cur_unpred_count; 5032 unsigned char * indicator_pos = indicator; 5033 int dec_buffer_size = block_size + 1; 5034 float * dec_buffer = (float *) malloc(dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float)); 5035 memset(dec_buffer, 0, dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float)); 5036 float * block_data_pos_x = NULL; 5037 float * block_data_pos_y = NULL; 5038 float * block_data_pos_z = NULL; 5039 int block_dim0_offset = dec_buffer_size*dec_buffer_size; 5040 int block_dim1_offset = dec_buffer_size; 5041 if(use_mean){ 5042 type = result_type; 5043 for(size_t i=0; i<num_x; i++){ 5044 for(size_t j=0; j<num_y; j++){ 5045 for(size_t k=0; k<num_z; k++){ 5046 data_pos = dec_buffer + dec_buffer_size*dec_buffer_size + dec_buffer_size + 1; 5047 if(*indicator_pos){ 5048 // decompress by SZ 5049 // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); 5050 float * block_data_pos; 5051 float pred; 5052 size_t index = 0; 5053 int type_; 5054 size_t unpredictable_count = 0; 5055 for(size_t ii=0; ii<block_size; ii++){ 5056 for(size_t jj=0; jj<block_size; jj++){ 5057 for(size_t kk=0; kk<block_size; kk++){ 5058 block_data_pos = data_pos + ii*block_dim0_offset + jj*block_dim1_offset + kk; 5059 type_ = type[index]; 5060 if(type_ == 1){ 5061 *block_data_pos = mean; 5062 } 5063 else if(type_ == 0){ 5064 *block_data_pos = unpred_data[unpredictable_count ++]; 5065 } 5066 else{ 5067 pred = block_data_pos[-1] + block_data_pos[-block_dim1_offset]+ block_data_pos[-block_dim0_offset] - block_data_pos[-block_dim1_offset - 1] 5068 - block_data_pos[-block_dim0_offset - 1] - block_data_pos[-block_dim0_offset - block_dim1_offset] + block_data_pos[-block_dim0_offset - block_dim1_offset - 1]; 5069 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 5070 } 5071 index ++; 5072 } 5073 } 5074 } 5075 cur_unpred_count = unpredictable_count; 5076 } 5077 else{ 5078 // decompress by regression 5079 { 5080 //restore regression coefficients 5081 float pred; 5082 int type_; 5083 for(int e=0; e<4; e++){ 5084 // if(i == 0 && j == 0 && k == 19){ 5085 // printf("~\n"); 5086 // } 5087 type_ = coeff_type[e][coeff_index]; 5088 if (type_ != 0){ 5089 pred = last_coefficients[e]; 5090 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 5091 } 5092 else{ 5093 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 5094 coeff_unpred_data_count[e] ++; 5095 } 5096 } 5097 coeff_index ++; 5098 } 5099 { 5100 float pred; 5101 int type_; 5102 size_t index = 0; 5103 size_t unpredictable_count = 0; 5104 for(size_t ii=0; ii<block_size; ii++){ 5105 for(size_t jj=0; jj<block_size; jj++){ 5106 for(size_t kk=0; kk<block_size; kk++){ 5107 type_ = type[index]; 5108 if (type_ != 0){ 5109 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 5110 data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = pred + 2 * (type_ - intvRadius) * realPrecision; 5111 } 5112 else{ 5113 data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = unpred_data[unpredictable_count ++]; 5114 } 5115 index ++; 5116 } 5117 } 5118 } 5119 cur_unpred_count = unpredictable_count; 5120 } 5121 } 5122 indicator_pos ++; 5123 unpred_data += cur_unpred_count; 5124 // decomp_unpred += cur_unpred_count; 5125 // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); 5126 // fflush(stdout); 5127 type += block_size * block_size * block_size; 5128 5129 // mv data back 5130 block_data_pos_x = *data + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 5131 for(int ii=0; ii<block_size; ii++){ 5132 if(i*block_size + ii >= r1) break; 5133 block_data_pos_y = block_data_pos_x; 5134 for(int jj=0; jj<block_size; jj++){ 5135 if(j*block_size + jj >= r2) break; 5136 block_data_pos_z = block_data_pos_y; 5137 for(int kk=0; kk<block_size; kk++){ 5138 if(k*block_size + kk >= r3) break; 5139 *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; 5140 block_data_pos_z ++; 5141 } 5142 block_data_pos_y += dim1_offset; 5143 } 5144 block_data_pos_x += dim0_offset; 5145 } 5146 5147 } 5148 } 5149 } 5150 5151 } 5152 else{ 5153 type = result_type; 5154 for(size_t i=0; i<num_x; i++){ 5155 for(size_t j=0; j<num_y; j++){ 5156 for(size_t k=0; k<num_z; k++){ 5157 data_pos = dec_buffer + dec_buffer_size*dec_buffer_size + dec_buffer_size + 1; 5158 if(*indicator_pos){ 5159 // decompress by SZ 5160 // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data); 5161 float * block_data_pos; 5162 float pred; 5163 size_t index = 0; 5164 int type_; 5165 size_t unpredictable_count = 0; 5166 for(size_t ii=0; ii<block_size; ii++){ 5167 for(size_t jj=0; jj<block_size; jj++){ 5168 for(size_t kk=0; kk<block_size; kk++){ 5169 block_data_pos = data_pos + ii*block_dim0_offset + jj*block_dim1_offset + kk; 5170 type_ = type[index]; 5171 if(type_ == 0){ 5172 *block_data_pos = unpred_data[unpredictable_count ++]; 5173 } 5174 else{ 5175 pred = block_data_pos[-1] + block_data_pos[-block_dim1_offset]+ block_data_pos[-block_dim0_offset] - block_data_pos[-block_dim1_offset - 1] 5176 - block_data_pos[-block_dim0_offset - 1] - block_data_pos[-block_dim0_offset - block_dim1_offset] + block_data_pos[-block_dim0_offset - block_dim1_offset - 1]; 5177 *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision; 5178 } 5179 index ++; 5180 } 5181 } 5182 } 5183 cur_unpred_count = unpredictable_count; 5184 } 5185 else{ 5186 // decompress by regression 5187 { 5188 //restore regression coefficients 5189 float pred; 5190 int type_; 5191 for(int e=0; e<4; e++){ 5192 // if(i == 0 && j == 0 && k == 19){ 5193 // printf("~\n"); 5194 // } 5195 type_ = coeff_type[e][coeff_index]; 5196 if (type_ != 0){ 5197 pred = last_coefficients[e]; 5198 last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e]; 5199 } 5200 else{ 5201 last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]]; 5202 coeff_unpred_data_count[e] ++; 5203 } 5204 } 5205 coeff_index ++; 5206 } 5207 { 5208 float pred; 5209 int type_; 5210 size_t index = 0; 5211 size_t unpredictable_count = 0; 5212 for(size_t ii=0; ii<block_size; ii++){ 5213 for(size_t jj=0; jj<block_size; jj++){ 5214 for(size_t kk=0; kk<block_size; kk++){ 5215 type_ = type[index]; 5216 if (type_ != 0){ 5217 pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3]; 5218 data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = pred + 2 * (type_ - intvRadius) * realPrecision; 5219 } 5220 else{ 5221 data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = unpred_data[unpredictable_count ++]; 5222 } 5223 index ++; 5224 } 5225 } 5226 } 5227 cur_unpred_count = unpredictable_count; 5228 } 5229 } 5230 indicator_pos ++; 5231 unpred_data += cur_unpred_count; 5232 // decomp_unpred += cur_unpred_count; 5233 // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data); 5234 // fflush(stdout); 5235 type += block_size * block_size * block_size; 5236 // mv data back 5237 block_data_pos_x = *data + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size; 5238 for(int ii=0; ii<block_size; ii++){ 5239 if(i*block_size + ii >= r1) break; 5240 block_data_pos_y = block_data_pos_x; 5241 for(int jj=0; jj<block_size; jj++){ 5242 if(j*block_size + jj >= r2) break; 5243 block_data_pos_z = block_data_pos_y; 5244 for(int kk=0; kk<block_size; kk++){ 5245 if(k*block_size + kk >= r3) break; 5246 *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk]; 5247 block_data_pos_z ++; 5248 } 5249 block_data_pos_y += dim1_offset; 5250 } 5251 block_data_pos_x += dim0_offset; 5252 } 5253 } 5254 } 5255 } 5256 } 5257 free(dec_buffer); 5258 free(coeff_result_type); 5259 5260 free(indicator); 5261 free(result_type); 5262 } -
TabularUnified thirdparty/SZ/sz/src/szd_float_pwr.c ¶
r2c47b73 r9ee2ce3 17 17 #include "Huffman.h" 18 18 #include "sz_float_pwr.h" 19 #include "utility.h" 19 20 //#include "rw.h" 20 21 // … … 1350 1351 free(groupID); 1351 1352 } 1353 1354 void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps) { 1355 1356 decompressDataSeries_float_1D(data, dataSeriesLength, tdps); 1357 float threshold = tdps->minLogValue; 1358 if(tdps->pwrErrBoundBytes_size > 0){ 1359 unsigned char * signs; 1360 sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); 1361 for(size_t i=0; i<dataSeriesLength; i++){ 1362 if((*data)[i] < threshold) (*data)[i] = 0; 1363 else (*data)[i] = exp2((*data)[i]); 1364 if(signs[i]) (*data)[i] = -((*data)[i]); 1365 } 1366 free(signs); 1367 } 1368 else{ 1369 for(size_t i=0; i<dataSeriesLength; i++){ 1370 if((*data)[i] < threshold) (*data)[i] = 0; 1371 else (*data)[i] = exp2((*data)[i]); 1372 } 1373 } 1374 1375 } 1376 1377 void decompressDataSeries_float_2D_pwr_pre_log(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps) { 1378 1379 size_t dataSeriesLength = r1 * r2; 1380 decompressDataSeries_float_2D(data, r1, r2, tdps); 1381 float threshold = tdps->minLogValue; 1382 if(tdps->pwrErrBoundBytes_size > 0){ 1383 unsigned char * signs; 1384 sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); 1385 for(size_t i=0; i<dataSeriesLength; i++){ 1386 if((*data)[i] < threshold) (*data)[i] = 0; 1387 else (*data)[i] = exp2((*data)[i]); 1388 if(signs[i]) (*data)[i] = -((*data)[i]); 1389 } 1390 free(signs); 1391 } 1392 else{ 1393 for(size_t i=0; i<dataSeriesLength; i++){ 1394 if((*data)[i] < threshold) (*data)[i] = 0; 1395 else (*data)[i] = exp2((*data)[i]); 1396 } 1397 } 1398 1399 } 1400 1401 void decompressDataSeries_float_3D_pwr_pre_log(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps) { 1402 1403 size_t dataSeriesLength = r1 * r2 * r3; 1404 decompressDataSeries_float_3D(data, r1, r2, r3, tdps); 1405 float threshold = tdps->minLogValue; 1406 if(tdps->pwrErrBoundBytes_size > 0){ 1407 unsigned char * signs; 1408 sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength); 1409 for(size_t i=0; i<dataSeriesLength; i++){ 1410 if((*data)[i] < threshold) (*data)[i] = 0; 1411 else (*data)[i] = exp2((*data)[i]); 1412 if(signs[i]) (*data)[i] = -((*data)[i]); 1413 } 1414 free(signs); 1415 } 1416 else{ 1417 for(size_t i=0; i<dataSeriesLength; i++){ 1418 if((*data)[i] < threshold) (*data)[i] = 0; 1419 else (*data)[i] = exp2((*data)[i]); 1420 } 1421 } 1422 } 1352 1423 #pragma GCC diagnostic pop -
TabularUnified thirdparty/SZ/sz/src/szd_int16.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_int16.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 33 34 unsigned char* szTmpBytes; 34 35 35 36 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)36 if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength) 37 { 38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szd_int32.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_int32.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 35 36 if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength) 36 37 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szd_int64.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_int64.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 35 36 if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength) 36 37 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szd_int8.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_int8.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 35 36 if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength) 36 37 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szd_uint16.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_uint16.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 35 36 if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength) 36 37 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szd_uint32.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_uint32.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 35 36 if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength) 36 37 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szd_uint64.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_uint64.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 35 36 if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength) 36 37 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szd_uint8.c ¶
r2c47b73 r9ee2ce3 16 16 #include "szd_uint8.h" 17 17 #include "Huffman.h" 18 #include "utility.h" 18 19 19 20 /** … … 35 36 if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength) 36 37 { 37 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);38 if( isZlib)38 confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize); 39 if(confparams_dec->losslessCompressor!=-1) 39 40 confparams_dec->szMode = SZ_BEST_COMPRESSION; 40 41 else … … 49 50 if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size 50 51 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES; 51 tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize52 tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);// (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize 52 53 //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize); 53 54 //memcpy(szTmpBytes, tmpBytes, tmpSize); -
TabularUnified thirdparty/SZ/sz/src/szf.c ¶
r2c47b73 r9ee2ce3 177 177 void sz_compress_d1_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1) 178 178 { 179 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, 0, 0, 0, *r1);179 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); 180 180 memcpy(bytes, tmp_bytes, *outSize); 181 181 free(tmp_bytes); … … 184 184 void sz_compress_d2_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2) 185 185 { 186 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, 0, 0, *r2, *r1);186 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); 187 187 memcpy(bytes, tmp_bytes, *outSize); 188 188 free(tmp_bytes); … … 191 191 void sz_compress_d3_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) 192 192 { 193 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, 0, *r3, *r2, *r1);193 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); 194 194 memcpy(bytes, tmp_bytes, *outSize); 195 195 free(tmp_bytes); … … 198 198 void sz_compress_d4_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) 199 199 { 200 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, *r4, *r3, *r2, *r1);200 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); 201 201 memcpy(bytes, tmp_bytes, *outSize); 202 202 free(tmp_bytes); … … 205 205 void sz_compress_d5_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) 206 206 { 207 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,*r5, *r4, *r3, *r2, *r1);207 unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); 208 208 memcpy(bytes, tmp_bytes, *outSize); 209 209 free(tmp_bytes); … … 212 212 void sz_compress_d1_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1) 213 213 { 214 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, 0, 0, 0, *r1);214 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); 215 215 memcpy(bytes, tmp_bytes, *outSize); 216 216 free(tmp_bytes); … … 219 219 void sz_compress_d2_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2) 220 220 { 221 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, 0, 0, *r2, *r1);221 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); 222 222 memcpy(bytes, tmp_bytes, *outSize); 223 223 free(tmp_bytes); … … 226 226 void sz_compress_d3_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) 227 227 { 228 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, 0, *r3, *r2, *r1);228 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); 229 229 memcpy(bytes, tmp_bytes, *outSize); 230 230 free(tmp_bytes); … … 233 233 void sz_compress_d4_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) 234 234 { 235 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,0, *r4, *r3, *r2, *r1);235 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); 236 236 memcpy(bytes, tmp_bytes, *outSize); 237 237 free(tmp_bytes); … … 240 240 void sz_compress_d5_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) 241 241 { 242 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1,*r5, *r4, *r3, *r2, *r1);242 unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); 243 243 memcpy(bytes, tmp_bytes, *outSize); 244 244 free(tmp_bytes); … … 412 412 s2[i]=varName[i]; 413 413 s2[*len]='\0'; 414 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , 0, 0, 0, *r1);414 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); 415 415 } 416 416 void sz_batchaddvar_d2_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2) … … 421 421 s2[i]=varName[i]; 422 422 s2[*len]='\0'; 423 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , 0, 0, *r2, *r1);423 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); 424 424 } 425 425 void sz_batchaddvar_d3_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) … … 430 430 s2[i]=varName[i]; 431 431 s2[*len]='\0'; 432 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , 0, *r3, *r2, *r1);432 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); 433 433 } 434 434 void sz_batchaddvar_d4_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) … … 439 439 s2[i]=varName[i]; 440 440 s2[*len]='\0'; 441 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , *r4, *r3, *r2, *r1);441 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); 442 442 } 443 443 void sz_batchaddvar_d5_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) … … 448 448 s2[i]=varName[i]; 449 449 s2[*len]='\0'; 450 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1);450 SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); 451 451 } 452 452 void sz_batchaddvar_d1_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1) … … 457 457 s2[i]=varName[i]; 458 458 s2[*len]='\0'; 459 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , 0, 0, 0, *r1);459 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1); 460 460 } 461 461 void sz_batchaddvar_d2_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2) … … 466 466 s2[i]=varName[i]; 467 467 s2[*len]='\0'; 468 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , 0, 0, *r2, *r1);468 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1); 469 469 } 470 470 void sz_batchaddvar_d3_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3) … … 475 475 s2[i]=varName[i]; 476 476 s2[*len]='\0'; 477 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , 0, *r3, *r2, *r1);477 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1); 478 478 } 479 479 void sz_batchaddvar_d4_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4) … … 484 484 s2[i]=varName[i]; 485 485 s2[*len]='\0'; 486 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0 , *r4, *r3, *r2, *r1);486 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1); 487 487 } 488 488 void sz_batchaddvar_d5_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5) … … 493 493 s2[i]=varName[i]; 494 494 s2[*len]='\0'; 495 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1);495 SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1); 496 496 } 497 497 void sz_batchdelvar_c_(char* varName, int *len, int *errState) … … 504 504 *errState = SZ_batchDelVar(s2); 505 505 } 506 507 /*@deprecated*/ 506 508 void sz_batch_compress_c_(unsigned char* bytes, size_t *outSize) 507 509 { 508 unsigned char* tmp_bytes = SZ_batch_compress(outSize); 509 memcpy(bytes, tmp_bytes, *outSize); 510 free(tmp_bytes); 511 } 510 //unsigned char* tmp_bytes = SZ_batch_compress(outSize); 511 //memcpy(bytes, tmp_bytes, *outSize); 512 //free(tmp_bytes); 513 } 514 /*@deprecated*/ 512 515 void sz_batch_decompress_c_(unsigned char* bytes, size_t *byteLength, int *ierr) 513 516 { 514 SZ_batch_decompress(bytes, *byteLength, ierr);517 //SZ_batch_decompress(bytes, *byteLength, ierr); 515 518 } 516 519
Note: See TracChangeset
for help on using the changeset viewer.