Context Navigation

← Previous Changeset
Next Changeset →

Changeset 9ee2ce3

Timestamp:

09/28/18 16:32:55 (6 years ago)

Author:

Hal Finkel <hfinkel@…>

Branches:

master, pympi

Children:

e6aa0eb

Parents:

abca157

git-author:

Hal Finkel <hfinkel@…> (09/28/18 16:32:55)

git-committer:

Hal Finkel <hfinkel@…> (09/28/18 16:32:55)

Message:

importing new SZ files

Location:

thirdparty/SZ

Files:

: 1 added
: 55 edited

COPYRIGHT.txt (modified) (1 diff)
sz/include/TightDataPointStorageD.h (modified) (2 diffs)
sz/include/TightDataPointStorageF.h (modified) (2 diffs)
sz/include/TypeManager.h (modified) (1 diff)
sz/include/callZlib.h (modified) (1 diff)
sz/include/dataCompression.h (modified) (1 diff)
sz/include/pastriD.h (modified) (56 diffs)
sz/include/pastriF.h (modified) (56 diffs)
sz/include/sz.h (modified) (8 diffs)
sz/include/sz_double.h (modified) (1 diff)
sz/include/sz_double_pwr.h (modified) (1 diff)
sz/include/sz_float.h (modified) (1 diff)
sz/include/sz_float_pwr.h (modified) (1 diff)
sz/include/szd_double.h (modified) (1 diff)
sz/include/szd_double_pwr.h (modified) (1 diff)
sz/include/szd_float.h (modified) (1 diff)
sz/include/szd_float_pwr.h (modified) (1 diff)
sz/include/utility.h (added)
sz/src/ByteToolkit.c (modified) (8 diffs)
sz/src/DynamicDoubleArray.c (modified) (1 diff)
sz/src/DynamicFloatArray.c (modified) (1 diff)
sz/src/DynamicIntArray.c (modified) (1 diff)
sz/src/Huffman.c (modified) (4 diffs)
sz/src/TightDataPointStorageD.c (modified) (13 diffs)
sz/src/TightDataPointStorageF.c (modified) (12 diffs)
sz/src/TypeManager.c (modified) (3 diffs)
sz/src/callZlib.c (modified) (2 diffs)
sz/src/conf.c (modified) (4 diffs)
sz/src/dataCompression.c (modified) (3 diffs)
sz/src/sz.c (modified) (8 diffs)
sz/src/sz_double.c (modified) (18 diffs)
sz/src/sz_double_pwr.c (modified) (2 diffs)
sz/src/sz_double_ts.c (modified) (1 diff)
sz/src/sz_float.c (modified) (24 diffs)
sz/src/sz_float_pwr.c (modified) (2 diffs)
sz/src/sz_int16.c (modified) (3 diffs)
sz/src/sz_int32.c (modified) (3 diffs)
sz/src/sz_int64.c (modified) (3 diffs)
sz/src/sz_int8.c (modified) (3 diffs)
sz/src/sz_uint16.c (modified) (3 diffs)
sz/src/sz_uint32.c (modified) (3 diffs)
sz/src/sz_uint64.c (modified) (3 diffs)
sz/src/sz_uint8.c (modified) (3 diffs)
sz/src/szd_double.c (modified) (13 diffs)
sz/src/szd_double_pwr.c (modified) (2 diffs)
sz/src/szd_float.c (modified) (15 diffs)
sz/src/szd_float_pwr.c (modified) (2 diffs)
sz/src/szd_int16.c (modified) (3 diffs)
sz/src/szd_int32.c (modified) (3 diffs)
sz/src/szd_int64.c (modified) (3 diffs)
sz/src/szd_int8.c (modified) (3 diffs)
sz/src/szd_uint16.c (modified) (3 diffs)
sz/src/szd_uint32.c (modified) (3 diffs)
sz/src/szd_uint64.c (modified) (3 diffs)
sz/src/szd_uint8.c (modified) (3 diffs)
sz/src/szf.c (modified) (21 diffs)

Legend:

: Unmodified
: Added
: Removed

TabularUnified thirdparty/SZ/COPYRIGHT.txt ¶

-                      r2c47b73
+                      r9ee2ce3
 Copyright © 2016 , UChicago Argonne, LLC
 All Rights Reserved
 [SZ, Version 1.3]
+[SZ, Version 1.4]
 Sheng Di
 Dingwen Tao
+Xin Liang
 Franck Cappello
 Argonne National Laboratory

TabularUnified thirdparty/SZ/sz/include/TightDataPointStorageD.h ¶

-                      r2c47b73
+                      r9ee2ce3
         char reqLength;
         char radExpo; //used to compute reqLength based on segmented precisions in "pw_rel_compression"
+        double minLogValue;
         int stateNum;
 …
         unsigned char* pwrErrBoundBytes;
         int pwrErrBoundBytes_size;
+        unsigned char* raBytes;
+        size_t raBytes_size;
 } TightDataPointStorageD;

TabularUnified thirdparty/SZ/sz/include/TightDataPointStorageF.h ¶

-                      r2c47b73
+                      r9ee2ce3
         size_t rtypeArray_size;
+        float minLogValue;
         unsigned char* typeArray; //its size is dataSeriesLength/4 (or xxx/4+1)
         size_t typeArray_size;
 …
         unsigned char* pwrErrBoundBytes;
         int pwrErrBoundBytes_size;
+        unsigned char* raBytes;
+        size_t raBytes_size;
 } TightDataPointStorageF;

TabularUnified thirdparty/SZ/sz/include/TypeManager.h ¶

-                      r2c47b73
+                      r9ee2ce3
 //TypeManager.c
 size_t convertIntArray2ByteArray_fast_1b(unsigned char* intArray, size_t intArrayLength, unsigned char **result);
+size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result);
 void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
 size_t convertIntArray2ByteArray_fast_2b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);
+size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result);
 void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray);
 size_t convertIntArray2ByteArray_fast_3b(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char **result);

TabularUnified thirdparty/SZ/sz/include/callZlib.h ¶

r2c47b73	r9ee2ce3
19	19
20	20	#include <stdio.h>
	21
	22	int isZlibFormat(unsigned char magic1, unsigned char magic2);
21	23
22	24	//callZlib.c

TabularUnified thirdparty/SZ/sz/include/dataCompression.h ¶

-                      r2c47b73
+                      r9ee2ce3
 int initRandomAccessBytes(unsigned char* raBytes);
+int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData);
+int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
+int reqLength, int reqBytesLength, int resiBitsLength, float medianValue);
+void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData);
+int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData);
+int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
+int reqLength, int reqBytesLength, int resiBitsLength, double medianValue);
+void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData);
 #ifdef __cplusplus
+}

TabularUnified thirdparty/SZ/sz/include/pastriD.h ¶

-                      r2c47b73
+                      r9ee2ce3
   half.d=0.5;
   //printf("pastri_double_quantize:\nx=%lf  x=0x%lx\n",x,(*((uint64_t *)(&x))));
   //printf("sign(x):0x%lx\n", x);
   //printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
+//  //printf("pastri_double_quantize:\nx=%lf  x=0x%lx\n",x,(*((uint64_t *)(&x))));
+//  //printf("sign(x):0x%lx\n", x);
+//  //printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
   half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000);
   //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
+//  //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
   return (int64_t)(x + half.d);
+}
 …
   int i,sb;
   for(i=0;i<p->bSize;i++){
     //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
+//    //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
     if(abs_FastD(data[i])>p->usedEb){
       bp->nonZeros++;
       //if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
+      ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
+    }
     if(abs_FastD(data[i])>absExt){
 …
   bp->binSize=2*p->usedEb;
   //if(DEBUG){printf("Extremum  : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
   //if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
   //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize)  );}   }//DEBUG
+  ////if(DEBUG){printf("Extremum  : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
+  ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
+  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_double_quantize(data[patternIdx+i]/binSize)  );}   }//DEBUG
   //int64_t *patternQ=(int64_t*)(outBuf+15);  //Possible Improvement!
 …
   for(i=0;i<p->sbSize;i++){
     patternQ[i]=pastri_double_quantize(data[patternIdx+i],bp->binSize);
     if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
+    //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
+  }
 …
   bp->scaleBits=bp->patternBits;
   bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->scaleBits-1))-1);
   //if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
   //if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
   if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
+  ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
+  ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
+  //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
   //Calculate Scales.
 …
   //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8);  //Possible Improvement!
   int patternExtZero=(patternExt==0);
   //if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
+  ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
   for(sb=0;sb<p->sbNum;sb++){
     //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt;
 …
     //assert(scales[sb]<=1);
     scalesQ[sb]=pastri_double_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize);
     if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
+    //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
+  }
   //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
+  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
   //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it.
 …
       if(absECQ > bp->ECQExt)
         bp->ECQExt=absECQ;
       //if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
+      ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
       switch (ECQ[_1DIdx]){
         case 0:
 …
       double decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize;
       if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){
         printf("p->usedEb=%.6e\n",p->usedEb);
         printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
+        //printf("p->usedEb=%.6e\n",p->usedEb);
+        //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
         assert(0);
+      }
 …
   //*(uint16_t*)(&outBuf[7])=p->idxOffset[3];
   if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
   if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
+  //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
+  //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
   //****************************************************************************************
 …
     //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data
     *numOutBytes=UCSparseBytes;
     if(D_G){printf("UCSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    //if(D_G){printf("UCSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     outBuf[0]=0; //mode
 …
+          }
     if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
+    //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
   //****************************************************************************************
 …
     //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data
     *numOutBytes=UCNonSparseBytes;
     if(D_G){printf("UCNonSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    //if(D_G){printf("UCNonSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     outBuf[0]=1; //mode
 …
     memcpy(&outBuf[1], data, p->bSize*p->dataSize);
     if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
+    //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
     /*
     for(i=0;i<UCNonSparseBytes-17;i++){
       printf("%d ",inBuf[p->bSize*8+i]);
+    }
     printf("\n");
+      //printf("%d ",inBuf[p->bSize*8+i]);
+    }
+    //printf("\n");
     for(i=0;i<UCNonSparseBytes-17;i++){
       printf("%d ",outBuf[17+i]);
+    }
     printf("\n");
+      //printf("%d ",outBuf[17+i]);
+    }
+    //printf("\n");
     */
   //****************************************************************************************
 …
     //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
     *numOutBytes=CSparseBytes;
     if(D_G){printf("CSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
+    //if(D_G){printf("CSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
     outBuf[0]=2; //mode
 …
     bitPos=9*8;
     //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbSize;i++){
       writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
+    }
     //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbNum;i++){
       writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
+    }
     //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
     //if(DEBUG)printf("ECQBits:%d\n",ECQBits);
+    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
     switch(bp->ECQBits){
       case 2:
 …
               break;
             case 1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
               writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
               //writeBits_Fast(outBuf,&bitPos,2,0x10);
 …
               break;
             case -1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
               writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
               //writeBits_Fast(outBuf,&bitPos,2,0x11);
 …
             break;
           case 1:
             //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
+            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
             writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
             //writeBits_Fast(outBuf,&bitPos,3,0);//0x000
 …
             break;
           case -1:
             //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
+            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
             writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
             //writeBits_Fast(outBuf,&bitPos,3,1);//0x001
 …
             break;
           default:
             //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
+            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
             writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
             //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]);
 …
+    }
     //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
     if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
+    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
+    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
 …
     *(uint32_t*)(&outBuf[1])=bytePos;
     if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
+    //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
     if(D_G){assert(bitPos==CSparseBits);}
 …
     //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
     *numOutBytes=CNonSparseBytes;
     if(D_G){printf("CNonSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
+    //if(D_G){printf("CNonSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
     outBuf[0]=3; //mode
 …
     bitPos=7*8; //Currently, we are at the end of 7th byte.
     //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbSize;i++){
       writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
+    }
     //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbNum;i++){
       writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
+    }
     //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
     //if(DEBUG)printf("ECQBits:%d\n",ECQBits);
+    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
     switch(bp->ECQBits){
       case 2:
 …
           switch(ECQ[i]){
             case 0:
               //if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
               writeBits_Fast(outBuf,&bitPos,1,1);//0x1
               break;
             case 1:
               //if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
               //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
               writeBits_Fast(outBuf,&bitPos,1,0);
 …
               break;
             case -1:
               //if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
               //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
               writeBits_Fast(outBuf,&bitPos,1,0);
 …
         break;
       default: //ECQBits>2
         //if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
+        ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
         for(i=0;i<p->bSize;i++){
           //if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
           //if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
           //if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
+          ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+          ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
+          ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
           switch(ECQ[i]){
             case 0:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               writeBits_Fast(outBuf,&bitPos,1,1);  //0x1
               //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
             case 1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               //writeBits_Fast(outBuf,&bitPos,3,0); //0x000
 …
               writeBits_Fast(outBuf,&bitPos,1,0);
               //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
             case -1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               //writeBits_Fast(outBuf,&bitPos,3,8); //0x001
 …
               writeBits_Fast(outBuf,&bitPos,1,1);
               //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
             default:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
 …
               //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01
               writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
+          }
 …
+    }
     //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
     if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
+    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
+    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
 …
     *(uint32_t*)(&outBuf[1])=bytePos;
     if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
+    //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
     if(D_G){assert(bitPos==CNonSparseBits);}
+  }
   //for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
+  ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
+}
 …
   pastri_blockParams bp;
   if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);}  //DEBUG
   if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);}  //DEBUG
   if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
   if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
+  //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);}  //DEBUG
+  //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);}  //DEBUG
+  //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
+  //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
   int64_t patternQ[MAX_PS_SIZE];
 …
     //R:UCSparse
     case 0:
       if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
       //bp->nonZeros=*(uint16_t*)(&inBuf[9]);
       //bytePos=11;
 …
         bytePos+=8;
+      }
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       break;
     //R:UCNonSparse
     case 1:
       if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
       //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8);
       memcpy(data, &inBuf[1], p->bSize*8);
       bytePos=p->bSize*8;
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       break;
     //R:CSparse
     case 2:
       if(D_G){printf("\nDC:CSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:CSparse\n");} //DEBUG
       //for(j=0;j<p->bSize;j++){
       //  data[j]=0;
 …
       bp->ECQBits=inBuf[6];
       if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
+      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
       //bp->numOutliers=*(uint16_t*)(&inBuf[15]);
 …
       bp->numOutliers=*(uint16_t*)(&inBuf[7]);
       bitPos=9*8;
       if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
+      //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
       bp->scalesBinSize=1/(double)(((uint64_t)1<<(bp->patternBits-1))-1);
 …
       bp->binSize=p->usedEb*2;
       if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
+      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
       for(j=0;j<p->sbSize;j++){
         patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
         if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+      }
       for(j=0;j<p->sbNum;j++){
         scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
         if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+      }
 …
         case 2:
           for(j=0;j<bp->numOutliers;j++){
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
             _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
             ECQTemp=readBits_I64(inBuf,&bitPos,1);
             ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
             //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
+            ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
             //continue;
             //sb=_1DIdx/p->sbSize;
 …
             ECQ[_1DIdx]=ECQTemp;
             //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+          }
           break;
         default: //bp->ECQBits>2
           if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
+          //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
           for(j=0;j<bp->numOutliers;j++){
 …
             //localIdx=_1DIdx%p->sbSize;
             temp=readBits_UI64(inBuf,&bitPos,1);
             //if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
+            ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
             switch(temp){
               case 0:  //+-1
                 ECQTemp=readBits_I64(inBuf,&bitPos,1);
                 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
                 //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
                 //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
+                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
                 break;
               case 1: //Others
                 ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
                 //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
                 //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
+                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
                 break;
               //default:
               //  printf("ERROR: Bad 2-bit value: 0x%lx",temp);
+              ////  printf("ERROR: Bad 2-bit value: 0x%lx",temp);
               // assert(0); //AMG
               //  break;
 …
             ECQ[_1DIdx]=ECQTemp;
             //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+          }
           break;
 …
       bytePos=(bitPos+7)/8;
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
 …
     //R:CNonSparse
     case 3:
       if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
       //for(j=0;j<p->bSize;j++){
 …
       bp->ECQBits=inBuf[6];
       if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
+      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
       //bitPos=15*8;
 …
       bp->binSize=p->usedEb*2;
       if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
+      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
       for(j=0;j<p->sbSize;j++){
         patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
         if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+      }
       for(j=0;j<p->sbNum;j++){
         scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
         if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+      }
       /* //Splitting
       for(j=0;j<p->bSize;j++){
         data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
         //if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
+        ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
+      }
       */
 …
         case 2:
           for(j=0;j<p->bSize;j++){
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
             //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
             temp=readBits_UI64(inBuf,&bitPos,1);
 …
+            }
             //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
             //continue;
             //sb=_1DIdx/p->sbSize;
 …
             ECQ[j]=ECQTemp;
             //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+          }
           break;
         default: //bp->ECQBits>2
           //if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
+          ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
           for(j=0;j<p->bSize;j++){
             //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
             //if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
+            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+            ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
             //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
             temp=readBits_UI64(inBuf,&bitPos,1);
             //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
             switch(temp){
               case 0:
                 //if(DEBUG)printf("Read:0");
+                ////if(DEBUG)printf("Read:0");
                 temp2=readBits_UI64(inBuf,&bitPos,1);
                 switch(temp2){
                   case 0:
                     //if(DEBUG)printf("0");
+                    ////if(DEBUG)printf("0");
                     ECQTemp=readBits_I64(inBuf,&bitPos,1);
                     //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
                     //if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
+                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+                    ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
                     ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
                     //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
+                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
                     break;
                   case 1:
                     //if(DEBUG)printf("1\n");
+                    ////if(DEBUG)printf("1\n");
                     ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
                     //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
                     //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
+                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
                     break;
                   default:
 …
                 break;
               case 1:
                 //if(DEBUG)printf("Read:1\n");
+                ////if(DEBUG)printf("Read:1\n");
                 ECQTemp=0;
                 //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
+                ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
                 break;
               default:
 …
+            }
             //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
             //continue;
             //sb=_1DIdx/p->sbSize;
 …
             ECQ[j]=ECQTemp;
             //if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
+            ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
+          }
           break;
 …
       //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
       bytePos=(bitPos+7)/8;
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
 …
   for(i=0;i<p->bSize;i++){
     if(idx0[i]!=idx0_dc[i]){
       printf("idx0[%d]=%d  !=  %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
+      //printf("idx0[%d]=%d  !=  %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
       assert(0);
+    }
     if(idx1[i]!=idx1_dc[i]){
       printf("idx1[%d]=%d  !=  %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
+      //printf("idx1[%d]=%d  !=  %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
       assert(0);
+    }
     if(idx2[i]!=idx2_dc[i]){
       printf("idx2[%d]=%d  !=  %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
+      //printf("idx2[%d]=%d  !=  %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
       assert(0);
+    }
     if(idx3[i]!=idx3_dc[i]){
       printf("idx3[%d]=%d  !=  %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
+      //printf("idx3[%d]=%d  !=  %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
       assert(0);
+    }
 …
   for(i=0;i<p->bSize;i++){
     if(abs_FastD(data[i]-data_dc[i])>p->usedEb){
       printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
+      //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
       assert(0);
+    }

TabularUnified thirdparty/SZ/sz/include/pastriF.h ¶

-                      r2c47b73
+                      r9ee2ce3
   half.d=0.5;
   //printf("pastri_float_quantize:\nx=%lf  x=0x%lx\n",x,(*((uint64_t *)(&x))));
   //printf("sign(x):0x%lx\n", x);
   //printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
+  ////printf("pastri_float_quantize:\nx=%lf  x=0x%lx\n",x,(*((uint64_t *)(&x))));
+  ////printf("sign(x):0x%lx\n", x);
+  ////printf("0.5:0x%lx\n", (*((uint64_t *)(&half))));
   half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000);
   //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
+  ////printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half))));
   return (int64_t)(x + half.d);
+}
 …
   int i,sb;
   for(i=0;i<p->bSize;i++){
     //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
+    ////printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG
     if(abs_FastD(data[i])>p->usedEb){
       bp->nonZeros++;
       //if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
+      ////if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG
+    }
     if(abs_FastD(data[i])>absExt){
 …
   bp->binSize=2*p->usedEb;
   //if(DEBUG){printf("Extremum  : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
   //if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
   //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize)  );}   }//DEBUG
+  ////if(DEBUG){printf("Extremum  : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG
+  ////if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG
+  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize)  );}   }//DEBUG
   //int64_t *patternQ=(int64_t*)(outBuf+15);  //Possible Improvement!
 …
   for(i=0;i<p->sbSize;i++){
     patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize);
     if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
+    //if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);}
+  }
 …
   bp->scaleBits=bp->patternBits;
   bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1);
   //if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
   //if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
   if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
+  ////if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG
+  ////if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG
+  //if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG
   //Calculate Scales.
 …
   //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8);  //Possible Improvement!
   int patternExtZero=(patternExt==0);
   //if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
+  ////if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG
   for(sb=0;sb<p->sbNum;sb++){
     //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt;
 …
     //assert(scales[sb]<=1);
     scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize);
     if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
+    //if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);}
+  }
   //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
+  ////if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG
   //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it.
 …
       if(absECQ > bp->ECQExt)
         bp->ECQExt=absECQ;
       //if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
+      ////if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG
       switch (ECQ[_1DIdx]){
         case 0:
 …
       float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize;
       if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){
         printf("p->usedEb=%.6e\n",p->usedEb);
         printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
+        //printf("p->usedEb=%.6e\n",p->usedEb);
+        //printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed));
         assert(0);
+      }
 …
   //*(uint16_t*)(&outBuf[7])=p->idxOffset[3];
   if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
   if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
+  //if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG
+  //if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG
   //****************************************************************************************
 …
     //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data
     *numOutBytes=UCSparseBytes;
     if(D_G){printf("UCSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    //if(D_G){printf("UCSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     outBuf[0]=0; //mode
 …
+          }
     if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
+    //if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG
   //****************************************************************************************
 …
     //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data
     *numOutBytes=UCNonSparseBytes;
     if(D_G){printf("UCNonSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    //if(D_G){printf("UCNonSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     outBuf[0]=1; //mode
 …
     memcpy(&outBuf[1], data, p->bSize*p->dataSize);
     if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
+    //if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG
     /*
     for(i=0;i<UCNonSparseBytes-17;i++){
       printf("%d ",inBuf[p->bSize*8+i]);
+    }
     printf("\n");
+      //printf("%d ",inBuf[p->bSize*8+i]);
+    }
+    //printf("\n");
     for(i=0;i<UCNonSparseBytes-17;i++){
       printf("%d ",outBuf[17+i]);
+    }
     printf("\n");
+      //printf("%d ",outBuf[17+i]);
+    }
+    //printf("\n");
     */
   //****************************************************************************************
 …
     //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ}
     *numOutBytes=CSparseBytes;
     if(D_G){printf("CSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
+    //if(D_G){printf("CSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
     outBuf[0]=2; //mode
 …
     bitPos=9*8;
     //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbSize;i++){
       writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
+    }
     //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbNum;i++){
       writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
+    }
     //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
     //if(DEBUG)printf("ECQBits:%d\n",ECQBits);
+    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
     switch(bp->ECQBits){
       case 2:
 …
               break;
             case 1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG
               writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
               //writeBits_Fast(outBuf,&bitPos,2,0x10);
 …
               break;
             case -1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
               writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
               //writeBits_Fast(outBuf,&bitPos,2,0x11);
 …
             break;
           case 1:
             //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
+            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG
             writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
             //writeBits_Fast(outBuf,&bitPos,3,0);//0x000
 …
             break;
           case -1:
             //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
+            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG
             writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
             //writeBits_Fast(outBuf,&bitPos,3,1);//0x001
 …
             break;
           default:
             //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
+            ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG
             writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i);
             //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]);
 …
+    }
     //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
     if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
+    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
+    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
 …
     *(uint32_t*)(&outBuf[1])=bytePos;
     if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
+    //if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG
     if(D_G){assert(bitPos==CSparseBits);}
 …
     //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ}
     *numOutBytes=CNonSparseBytes;
     if(D_G){printf("CNonSparse\n");} //DEBUG
     if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
     //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
+    //if(D_G){printf("CNonSparse\n");} //DEBUG
+    //if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG
+    ////if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG
     outBuf[0]=3; //mode
 …
     bitPos=7*8; //Currently, we are at the end of 7th byte.
     //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbSize;i++){
       writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point
+    }
     //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG
     for(i=0;i<p->sbNum;i++){
       writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale
+    }
     //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
     //if(DEBUG)printf("ECQBits:%d\n",ECQBits);
+    ////if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG
+    ////if(DEBUG)printf("ECQBits:%d\n",ECQBits);
     switch(bp->ECQBits){
       case 2:
 …
           switch(ECQ[i]){
             case 0:
               //if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG
               writeBits_Fast(outBuf,&bitPos,1,1);//0x1
               break;
             case 1:
               //if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG
               //writeBits_Fast(outBuf,&bitPos,2,0);//0x00
               writeBits_Fast(outBuf,&bitPos,1,0);
 …
               break;
             case -1:
               //if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG
               //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
               writeBits_Fast(outBuf,&bitPos,1,0);
 …
         break;
       default: //ECQBits>2
         //if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
+        ////if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG
         for(i=0;i<p->bSize;i++){
           //if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
           //if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
           //if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
+          ////if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+          ////if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG
+          ////if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG
           switch(ECQ[i]){
             case 0:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               writeBits_Fast(outBuf,&bitPos,1,1);  //0x1
               //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
             case 1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               //writeBits_Fast(outBuf,&bitPos,3,0); //0x000
 …
               writeBits_Fast(outBuf,&bitPos,1,0);
               //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
             case -1:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               //writeBits_Fast(outBuf,&bitPos,3,8); //0x001
 …
               writeBits_Fast(outBuf,&bitPos,1,1);
               //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
             default:
               //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
               //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
+              ////if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG
+              ////if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG
               //temp1=bitPos;
               //writeBits_Fast(outBuf,&bitPos,2,2); //0x01
 …
               //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01
               writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]);
               //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
+              ////if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG
               break;
+          }
 …
+    }
     //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
     if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
+    ////if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG
+    //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
 …
     *(uint32_t*)(&outBuf[1])=bytePos;
     if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
+    //if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG
     if(D_G){assert(bitPos==CNonSparseBits);}
+  }
   //for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
+  ////for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG
+}
 …
   pastri_blockParams bp;
   if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);}  //DEBUG
   if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);}  //DEBUG
   if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
   if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
+  //if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);}  //DEBUG
+  //if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);}  //DEBUG
+  //if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG
+  //if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG
   int64_t patternQ[MAX_PS_SIZE];
 …
     //R:UCSparse
     case 0:
       if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:UCSparse\n");} //DEBUG
       //bp->nonZeros=*(uint16_t*)(&inBuf[9]);
       //bytePos=11;
 …
         bytePos+=8;
+      }
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       break;
     //R:UCNonSparse
     case 1:
       if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG
       //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8);
       memcpy(data, &inBuf[1], p->bSize*8);
       bytePos=p->bSize*8;
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       break;
     //R:CSparse
     case 2:
       if(D_G){printf("\nDC:CSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:CSparse\n");} //DEBUG
       //for(j=0;j<p->bSize;j++){
       //  data[j]=0;
 …
       bp->ECQBits=inBuf[6];
       if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
+      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
       //bp->numOutliers=*(uint16_t*)(&inBuf[15]);
 …
       bp->numOutliers=*(uint16_t*)(&inBuf[7]);
       bitPos=9*8;
       if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
+      //if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG
       bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1);
 …
       bp->binSize=p->usedEb*2;
       if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
+      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
       for(j=0;j<p->sbSize;j++){
         patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
         if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+      }
       for(j=0;j<p->sbNum;j++){
         scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
         if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+      }
 …
         case 2:
           for(j=0;j<bp->numOutliers;j++){
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
             _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
             ECQTemp=readBits_I64(inBuf,&bitPos,1);
             ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
             //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
+            ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
             //continue;
             //sb=_1DIdx/p->sbSize;
 …
             ECQ[_1DIdx]=ECQTemp;
             //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+          }
           break;
         default: //bp->ECQBits>2
           if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
+          //if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG
           for(j=0;j<bp->numOutliers;j++){
 …
             //localIdx=_1DIdx%p->sbSize;
             temp=readBits_UI64(inBuf,&bitPos,1);
             //if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
+            ////if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG
             switch(temp){
               case 0:  //+-1
                 ECQTemp=readBits_I64(inBuf,&bitPos,1);
                 ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
                 //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
                 //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
+                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
                 break;
               case 1: //Others
                 ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
                 //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
                 //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
+                ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+                ////if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp);
                 break;
               //default:
               //  printf("ERROR: Bad 2-bit value: 0x%lx",temp);
+              ////  printf("ERROR: Bad 2-bit value: 0x%lx",temp);
               // assert(0); //AMG
               //  break;
 …
             ECQ[_1DIdx]=ECQTemp;
             //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+          }
           break;
 …
       bytePos=(bitPos+7)/8;
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
 …
     //R:CNonSparse
     case 3:
       if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
+      //if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG
       //for(j=0;j<p->bSize;j++){
 …
       bp->ECQBits=inBuf[6];
       if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
+      //if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG
       //bitPos=15*8;
 …
       bp->binSize=p->usedEb*2;
       if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
+      //if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG
       for(j=0;j<p->sbSize;j++){
         patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point
         if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+        //if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);}
+      }
       for(j=0;j<p->sbNum;j++){
         scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale
         if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+        //if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);}
+      }
       /* //Splitting
       for(j=0;j<p->bSize;j++){
         data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize;
         //if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
+        ////if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);}
+      }
       */
 …
         case 2:
           for(j=0;j<p->bSize;j++){
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
             //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
             temp=readBits_UI64(inBuf,&bitPos,1);
 …
+            }
             //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
             //continue;
             //sb=_1DIdx/p->sbSize;
 …
             ECQ[j]=ECQTemp;
             //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+            ////if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG
+          }
           break;
         default: //bp->ECQBits>2
           //if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
+          ////if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos);
           for(j=0;j<p->bSize;j++){
             //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
             //if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
             //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
+            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+            ////if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos);
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG
+            ////if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG
             //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits);
             temp=readBits_UI64(inBuf,&bitPos,1);
             //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+            ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
             switch(temp){
               case 0:
                 //if(DEBUG)printf("Read:0");
+                ////if(DEBUG)printf("Read:0");
                 temp2=readBits_UI64(inBuf,&bitPos,1);
                 switch(temp2){
                   case 0:
                     //if(DEBUG)printf("0");
+                    ////if(DEBUG)printf("0");
                     ECQTemp=readBits_I64(inBuf,&bitPos,1);
                     //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
                     //if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
+                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+                    ////if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp);
                     ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1;
                     //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
+                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
                     break;
                   case 1:
                     //if(DEBUG)printf("1\n");
+                    ////if(DEBUG)printf("1\n");
                     ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits);
                     //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
                     //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
+                    ////if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG
+                    ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
                     break;
                   default:
 …
                 break;
               case 1:
                 //if(DEBUG)printf("Read:1\n");
+                ////if(DEBUG)printf("Read:1\n");
                 ECQTemp=0;
                 //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
+                ////if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp);
                 break;
               default:
 …
+            }
             //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
+            ////if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG
             //continue;
             //sb=_1DIdx/p->sbSize;
 …
             ECQ[j]=ECQTemp;
             //if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
+            ////if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG
+          }
           break;
 …
       //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8);
       bytePos=(bitPos+7)/8;
       if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
+      //if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG
       //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION)
 …
   for(i=0;i<p->bSize;i++){
     if(idx0[i]!=idx0_dc[i]){
       printf("idx0[%d]=%d  !=  %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
+      //printf("idx0[%d]=%d  !=  %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i);
       assert(0);
+    }
     if(idx1[i]!=idx1_dc[i]){
       printf("idx1[%d]=%d  !=  %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
+      //printf("idx1[%d]=%d  !=  %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i);
       assert(0);
+    }
     if(idx2[i]!=idx2_dc[i]){
       printf("idx2[%d]=%d  !=  %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
+      //printf("idx2[%d]=%d  !=  %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i);
       assert(0);
+    }
     if(idx3[i]!=idx3_dc[i]){
       printf("idx3[%d]=%d  !=  %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
+      //printf("idx3[%d]=%d  !=  %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i);
       assert(0);
+    }
 …
   for(i=0;i<p->bSize;i++){
     if(abs_FastD(data[i]-data_dc[i])>p->usedEb){
       printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
+      //printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb);
       assert(0);
+    }

TabularUnified thirdparty/SZ/sz/include/sz.h ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "sz_float_ts.h"
 #include "szd_float_ts.h"
+#include "utility.h"
 #ifdef _WIN32
 …
 //typedef unsigned long uint64_t;
 #define SZ_VERNUM 0x0140
 #define SZ_VER_MAJOR 1
 #define SZ_VER_MINOR 4
 #define SZ_VER_BUILD 13
 #define SZ_VER_REVISION 5
+#define SZ_VERNUM 0x0200
+#define SZ_VER_MAJOR 2
+#define SZ_VER_MINOR 0
+#define SZ_VER_BUILD 2
+#define SZ_VER_REVISION 0
 #define PASTRI 103
 …
 #define SZ_TEMPORAL_COMPRESSION 3
+#define SZ_NO_REGRESSION 0
+#define SZ_WITH_LINEAR_REGRESSION 1
 #define SZ_PWR_MIN_TYPE 0
 #define SZ_PWR_AVG_TYPE 1
 …
 #define numOfBufferedSteps 1 //the number of time steps in the buffer
+#define GZIP_COMPRESSOR 0 //i.e., ZLIB_COMPRSSOR
+#define ZSTD_COMPRESSOR 1
 //Note: the following setting should be consistent with stateNum in Huffman.h
 …
         unsigned int maxRangeRadius;
         int sol_ID;// it's always SZ, unless the setting is PASTRI compression mode (./configure --enable-pastri)
+        int losslessCompressor;
         int sampleDistance; //2 bytes
         float predThreshold;  // 2 bytes
 …
         char metadata_filename[256];
         FILE *metadata_file;
+        unsigned char* bit_array; //sihuan added
+        size_t intersect_size; //sihuan added
+        int64_t* hist_index; //sihuan added: prestep index
 } sz_tsc_metadata;
 …
 extern sz_params *confparams_dec;
 extern sz_exedata *exe_params;
+extern int sz_with_regression;
 //------------------------------------------------
 extern SZ_VarSet* sz_varset;
 …
 size_t compute_total_batch_size();
-int isZlibFormat(unsigned char magic1, unsigned char magic2);
 void SZ_registerVar(char* varName, int dataType, void* data,
                         int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio,

TabularUnified thirdparty/SZ/sz/include/sz_double.h ¶

-                      r2c47b73
+                      r9ee2ce3
 size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4);
+unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq);
+unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq);
+unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size);
+unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size);
 #ifdef __cplusplus
+}

TabularUnified thirdparty/SZ/sz/include/sz_double_pwr.h ¶

-                      r2c47b73
+                      r9ee2ce3
 size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f, size_t *outSize);
+void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t dataLength, size_t *outSize, double min, double max);
+void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t *outSize, double min, double max);
+void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max);
 #ifdef __cplusplus
+}

TabularUnified thirdparty/SZ/sz/include/sz_float.h ¶

-                      r2c47b73
+                      r9ee2ce3
 size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4);
+unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq);
+unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq);
+unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size);
+unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size);
+unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size);
 #ifdef __cplusplus
+}

TabularUnified thirdparty/SZ/sz/include/sz_float_pwr.h ¶

-                      r2c47b73
+                      r9ee2ce3
 size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f, size_t *outSize);
+void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max);
+void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max);
+void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max);
 #ifdef __cplusplus
+}

TabularUnified thirdparty/SZ/sz/include/szd_double.h ¶

r2c47b73	r9ee2ce3
25	25	void getSnapshotData_double_3D(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps, int errBoundMode);
26	26	void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, size_t r4, TightDataPointStorageD* tdps, int errBoundMode);
	27	void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data);
	28	void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data);
27	29
28	30	int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize);

TabularUnified thirdparty/SZ/sz/include/szd_double_pwr.h ¶

-                      r2c47b73
+                      r9ee2ce3
 void decompressDataSeries_double_1D_pwrgroup(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps);
+void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps);
+void decompressDataSeries_double_2D_pwr_pre_log(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps);
+void decompressDataSeries_double_3D_pwr_pre_log(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps);
 #ifdef __cplusplus
+}

TabularUnified thirdparty/SZ/sz/include/szd_float.h ¶

-                      r2c47b73
+                      r9ee2ce3
 size_t decompressDataSeries_float_3D_RA_block(float * data, float mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, float * unpredictable_data);
+void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data);
+void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data);
+void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data);
 #ifdef __cplusplus
+}

TabularUnified thirdparty/SZ/sz/include/szd_float_pwr.h ¶

-                      r2c47b73
+                      r9ee2ce3
 char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength);
 void decompressDataSeries_float_1D_pwrgroup(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps);
+void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps);
+void decompressDataSeries_float_2D_pwr_pre_log(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps);
+void decompressDataSeries_float_3D_pwr_pre_log(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps);
 #ifdef __cplusplus

TabularUnified thirdparty/SZ/sz/src/ByteToolkit.c ¶

r2c47b73	r9ee2ce3
431	431
432	432	//the byte to input is in the big-endian format
433		float bytesToFloat(unsigned char* bytes)
	433	inline float bytesToFloat(unsigned char* bytes)
434	434	{
435	435	lfloat buf;
…	…
440	440	}
441	441
442		void floatToBytes(unsigned char *b, float num)
	442	inline void floatToBytes(unsigned char *b, float num)
443	443	{
444	444	lfloat buf;
…	…
450	450
451	451	//the byte to input is in the big-endian format
452		double bytesToDouble(unsigned char* bytes)
	452	inline double bytesToDouble(unsigned char* bytes)
453	453	{
454	454	ldouble buf;
…	…
459	459	}
460	460
461		void doubleToBytes(unsigned char *b, double num)
	461	inline void doubleToBytes(unsigned char *b, double num)
462	462	{
463	463	ldouble buf;
…	…
508	508	}
509	509
510		int getMaskRightCode(int m) {
	510	inline int getMaskRightCode(int m) {
511	511	switch (m) {
512	512	case 1:
…	…
531	531	}
532	532
533		int getLeftMovingCode(int kMod8)
	533	inline int getLeftMovingCode(int kMod8)
534	534	{
535	535	return getMaskRightCode(8 - kMod8);
536	536	}
537	537
538		int getRightMovingSteps(int kMod8, int resiBitLength) {
	538	inline int getRightMovingSteps(int kMod8, int resiBitLength) {
539	539	return 8 - kMod8 - resiBitLength;
540	540	}
541	541
542		int getRightMovingCode(int kMod8, int resiBitLength)
	542	inline int getRightMovingCode(int kMod8, int resiBitLength)
543	543	{
544	544	int rightMovingSteps = 8 - kMod8 - resiBitLength;
…	…
815	815
816	816
817		size_t bytesToSize(unsigned char* bytes)
	817	inline size_t bytesToSize(unsigned char* bytes)
818	818	{
819	819	size_t result = 0;
…	…
825	825	}
826	826
827		void sizeToBytes(unsigned char* outBytes, size_t size)
	827	inline void sizeToBytes(unsigned char* outBytes, size_t size)
828	828	{
829	829	if(exe_params->SZ_SIZE_TYPE==4)

TabularUnified thirdparty/SZ/sz/src/DynamicDoubleArray.c ¶

r2c47b73	r9ee2ce3
22	22	void convertDDAtoDoubles(DynamicDoubleArray dba, double *data)
23	23	{
24		int size = dba->size;
	24	size_t size = dba->size;
25	25	if(size>0)
26	26	data = (double)malloc(size * sizeof(double));

TabularUnified thirdparty/SZ/sz/src/DynamicFloatArray.c ¶

r2c47b73	r9ee2ce3
22	22	void convertDFAtoFloats(DynamicFloatArray dfa, float *data)
23	23	{
24		int size = dfa->size;
	24	size_t size = dfa->size;
25	25	if(size>0)
26	26	data = (float)malloc(size * sizeof(float));

TabularUnified thirdparty/SZ/sz/src/DynamicIntArray.c ¶

r2c47b73	r9ee2ce3
22	22	void convertDIAtoInts(DynamicIntArray dia, unsigned char *data)
23	23	{
24		int size = dia->size;
	24	size_t size = dia->size;
25	25	if(size>0)
26	26	data = (unsigned char)malloc(size * sizeof(char));

TabularUnified thirdparty/SZ/sz/src/Huffman.c ¶

-                      r2c47b73
+                      r9ee2ce3
 void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize)
+{
+        size_t i, nodeCount = 0;
+        size_t i;
+        int nodeCount = 0;
         unsigned char *treeBytes, buffer[4];
         init(huffmanTree, s, length);
         for (i = 0; i < huffmanTree->stateNum; i++)
                 if (huffmanTree->code[i]) nodeCount++;
+                if (huffmanTree->code[i]) nodeCount++;
         nodeCount = nodeCount*2-1;
         unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree,nodeCount, &treeBytes);
 …
         intToBytes_bigEndian(buffer, nodeCount);
         memcpy(*out, buffer, 4);
+        memcpy(*out+4, treeBytes, treeByteSize);
+        intToBytes_bigEndian(buffer, huffmanTree->stateNum/2); //real number of intervals
+        memcpy(*out+4, buffer, 4);
+        memcpy(*out+8, treeBytes, treeByteSize);
         free(treeBytes);
         size_t enCodeSize = 0;
         encode(huffmanTree, s, length, *out+4+treeByteSize, &enCodeSize);
         *outSize = 4+treeByteSize+enCodeSize;
+        encode(huffmanTree, s, length, *out+8+treeByteSize, &enCodeSize);
+        *outSize = 8+treeByteSize+enCodeSize;
         //unsigned short state[length];
 …
         size_t encodeStartIndex;
         size_t nodeCount = bytesToInt_bigEndian(s);
         node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+4, nodeCount);
+        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+8, nodeCount);
         //sdi: Debug
 …
         else
                 encodeStartIndex = 1+3*nodeCount*sizeof(unsigned int)+nodeCount*sizeof(unsigned char);
         decode(s+4+encodeStartIndex, targetLength, root, out);
+        decode(s+8+encodeStartIndex, targetLength, root, out);
+}

TabularUnified thirdparty/SZ/sz/src/TightDataPointStorageD.c ¶

-                      r2c47b73
+                      r9ee2ce3
         (*this)->pwrErrBoundBytes = NULL;
         (*this)->pwrErrBoundBytes_size = 0;
+        (*this)->raBytes = NULL;
+        (*this)->raBytes_size = 0;
+}
 …
         int mode = confparams_dec->szMode;
         int predictionMode = confparams_dec->predictionMode;
+        int losslessCompressor = confparams_dec->losslessCompressor;
         if(confparams_dec!=NULL)
                 free(confparams_dec);
         confparams_dec = params;
         confparams_dec->szMode = mode;
+        confparams_dec->losslessCompressor = losslessCompressor;
         if(mode==SZ_TEMPORAL_COMPRESSION)
+        {
 …
+        }
         index += MetaDataByteLength;
+        int isRandomAccess = (sameRByte >> 7) & 0x01;
         unsigned char dsLengthBytes[8];
 …
                 (*this)->allSameData = 0;
+        if(isRandomAccess == 1)
+        {
+                (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE;
+                (*this)->raBytes = &(flatBytes[index]);
+                return errorBoundMode;
+        }
         int rtype_ = sameRByte & 0x08; //1000
 …
                 (*this)->leadNumArray_size = (logicLeadNumBitsNum >> 3) + 1;
+        }
+        int minLogValueSize = 0;
+        if(errorBoundMode>=PW_REL)
+                minLogValueSize = 8;
         if ((*this)->rtypeArray != NULL)
 …
                 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8
                                 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - 8 - (*this)->rtypeArray_size
                                 - (*this)->typeArray_size - (*this)->leadNumArray_size
+                                - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size
                                 - (*this)->exactMidBytes_size - pwrErrBoundBytes_size;
                 for (i = 0; i < (*this)->rtypeArray_size; i++)
 …
+        {
                 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 8 - 1 - 8
                                 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - (*this)->typeArray_size
+                                - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size
                                 - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size;
+        }
+        if(errorBoundMode >= PW_REL){
+                (*this)->minLogValue = bytesToDouble(&flatBytes[index]);
+                index+=8;
+        }
         (*this)->typeArray = &flatBytes[index];
 …
                 bytes[k++] = exactMidBytesLength[i];
+        if(confparams_cpr->errorBoundMode>=PW_REL)
+        {
+                doubleToBytes(exactMidBytesLength, tdps->minLogValue);
+                for(i = 0;i < 8; i++)
+                        bytes[k++] = exactMidBytesLength[i];
+        }
         memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
         k += tdps->typeArray_size;
 …
         memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size);
         k += tdps->rtypeArray_size;
+        if(confparams_cpr->errorBoundMode>=PW_REL)
+        {
+                doubleToBytes(exactMidBytesLength, tdps->minLogValue);
+                for(i = 0;i < 8; i++)
+                        bytes[k++] = exactMidBytesLength[i];
+        }
         memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
         k += tdps->typeArray_size;
 …
                 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
                 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;
+                int minLogValueSize = 0;
                 if(confparams_cpr->errorBoundMode>=PW_REL)
+                {
 …
                         radExpoL = 1;
                         pwrBoundArrayL = 4;
+                        minLogValueSize = 8;
+                }
                 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8
                                 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE
+                                + minLogValueSize /*max absolute log value*/
                                 + tdps->typeArray_size + tdps->leadNumArray_size
                                 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;
 …
                 size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
                 size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;
+                int minLogValueSize = 0;
                 if(confparams_cpr->errorBoundMode>=PW_REL)
+                {
 …
                         radExpoL = 1;
                         pwrBoundArrayL = 4;
+                        minLogValueSize = 8;
+                }
                 size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 8 + 1 + 8
                                 + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 8 + tdps->rtypeArray_size
                                 + tdps->typeArray_size + tdps->leadNumArray_size
+                                + minLogValueSize + tdps->typeArray_size + tdps->leadNumArray_size
                                 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;

TabularUnified thirdparty/SZ/sz/src/TightDataPointStorageF.c ¶

-                      r2c47b73
+                      r9ee2ce3
         (*this)->pwrErrBoundBytes = NULL;
         (*this)->pwrErrBoundBytes_size = 0;
+        (*this)->raBytes = NULL;
+        (*this)->raBytes_size = 0;
+}
 …
         int mode = confparams_dec->szMode;
         int predictionMode = confparams_dec->predictionMode;
+        int losslessCompressor = confparams_dec->losslessCompressor;
         if(confparams_dec!=NULL)
                 free(confparams_dec);
         confparams_dec = params;
         confparams_dec->szMode = mode;
+        confparams_dec->losslessCompressor = losslessCompressor;
         if(mode==SZ_TEMPORAL_COMPRESSION)
+        {
 …
         index += MetaDataByteLength;
+        int isRandomAccess = (sameRByte >> 7) & 0x01;
         unsigned char dsLengthBytes[8];
 …
         else
                 (*this)->allSameData = 0;
+        if(isRandomAccess == 1)
+        {
+                (*this)->raBytes_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE;
+                (*this)->raBytes = &(flatBytes[index]);
+                return errorBoundMode;
+        }
         int rtype_ = sameRByte & 0x08;          //=00001000
 …
                 for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)
                         byteBuf[i] = flatBytes[index++];
                 (*this)->rtypeArray_size = bytesToSize(byteBuf);//(ST)
+                (*this)->rtypeArray_size = bytesToSize(byteBuf);//(ST)
+        }
         else
 …
+        }
+        int minLogValueSize = 0;
+        if(errorBoundMode>=PW_REL)
+                minLogValueSize = 4;
         if ((*this)->rtypeArray != NULL)
+        {
                 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8
                                 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - 4 - (*this)->rtypeArray_size
                                 - (*this)->typeArray_size - (*this)->leadNumArray_size
+                                - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - exe_params->SZ_SIZE_TYPE - 4 - (*this)->rtypeArray_size
+                                - minLogValueSize - (*this)->typeArray_size - (*this)->leadNumArray_size
                                 - (*this)->exactMidBytes_size - pwrErrBoundBytes_size;
                 for (i = 0; i < (*this)->rtypeArray_size; i++)
 …
+        {
                 (*this)->residualMidBits_size = flatBytesLength - 3 - 1 - MetaDataByteLength - exe_params->SZ_SIZE_TYPE - 4 - radExpoL - segmentL - pwrErrBoundBytesL - 4 - 4 - 1 - 8
                                 - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - (*this)->typeArray_size
+                                - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - exe_params->SZ_SIZE_TYPE - minLogValueSize - (*this)->typeArray_size
                                 - (*this)->leadNumArray_size - (*this)->exactMidBytes_size - pwrErrBoundBytes_size;
+        }
+        }
+        if(errorBoundMode>=PW_REL)
+        {
+                (*this)->minLogValue = bytesToFloat(&flatBytes[index]);
+                index+=4;
+        }
         (*this)->typeArray = &flatBytes[index];
 …
                 bytes[k++] = exactMidBytesLength[i];
+        if(confparams_cpr->errorBoundMode>=PW_REL)
+        {
+                floatToBytes(exactMidBytesLength, tdps->minLogValue);
+                for(i=0;i<4;i++)
+                        bytes[k++] = exactMidBytesLength[i];
+        }
         memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
         k += tdps->typeArray_size;
 …
         memcpy(&(bytes[k]), tdps->rtypeArray, tdps->rtypeArray_size);
         k += tdps->rtypeArray_size;
+        if(confparams_cpr->errorBoundMode>=PW_REL)
+        {
+                floatToBytes(exactMidBytesLength, tdps->minLogValue);
+                for(i=0;i<4;i++)
+                        bytes[k++] = exactMidBytesLength[i];
+        }
         memcpy(&(bytes[k]), tdps->typeArray, tdps->typeArray_size);
         k += tdps->typeArray_size;
 …
                 for (i = 0; i < tdps->exactMidBytes_size; i++)
                         (*bytes)[k++] = tdps->exactMidBytes[i];
+                *size = totalByteLength;
+        }
+        else if (tdps->rtypeArray == NULL)
+        {
+                size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
+                size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;
+                int minLogValueSize = 0;
+                if(confparams_cpr->errorBoundMode>=PW_REL)
+                {
+                        segmentL = exe_params->SZ_SIZE_TYPE;
+                        radExpoL = 1;
+                        pwrBoundArrayL = 4;
+                        minLogValueSize = 4;
+                }
+                size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8
+                                + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + minLogValueSize
+                                + tdps->typeArray_size + tdps->leadNumArray_size
+                                + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;
+                *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);
+                convertTDPStoBytes_float(tdps, *bytes, dsLengthBytes, sameByte);
+                *size = totalByteLength;
+        }
+        else //the case with reserved value
+        {
+                size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
+                size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;
+                int minLogValueSize = 0;
+                if(confparams_cpr->errorBoundMode>=PW_REL)
+                {
+                        segmentL = exe_params->SZ_SIZE_TYPE;
+                        radExpoL = 1;
+                        pwrBoundArrayL = 4;
+                        minLogValueSize = 4;
+                }
+                size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8
+                                + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4 + tdps->rtypeArray_size
+                                + minLogValueSize + tdps->typeArray_size + tdps->leadNumArray_size
+                                + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;
+                sameByte = (unsigned char) (sameByte | 0x08); // 00001000, the 4th bit
+                // denotes whether it is
+                // with "reserved value"
+                if(confparams_cpr->errorBoundMode>=PW_REL)
+                        sameByte = (unsigned char) (sameByte | 0x10); // 00001000, the 5th bit
+                *bytes = (unsigned char*)malloc(sizeof(unsigned char)*totalByteLength);
+                convertTDPStoBytes_float_reserve(tdps, *bytes, dsLengthBytes, sameByte);
+                *size = totalByteLength;
+        }
+}
+void convertTDPStoFlatBytes_float_args(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size)
+{
+        size_t i, k = 0;
+        unsigned char dsLengthBytes[8];
+        if(exe_params->SZ_SIZE_TYPE==4)
+                intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
+        else
+                longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
+        unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0;
+        sameByte = sameByte | (confparams_cpr->szMode << 1);
+        if(tdps->isLossless)
+                sameByte = (unsigned char) (sameByte | 0x10);
+        if(confparams_cpr->errorBoundMode>=PW_REL)
+                sameByte = (unsigned char) (sameByte | 0x20); // 00100000, the 5th bit
+        if(exe_params->SZ_SIZE_TYPE==8)
+                sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit
+        if(tdps->allSameData==1)
+        {
+                size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size;
+                //*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);
+                for (i = 0; i < 3; i++)//3
+                        bytes[k++] = versionNumber[i];
+                bytes[k++] = sameByte;
+                convertSZParamsToBytes(confparams_cpr, &(bytes[k]));
+                k = k + MetaDataByteLength;
+                for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
+                        bytes[k++] = dsLengthBytes[i];
+                for (i = 0; i < tdps->exactMidBytes_size; i++)
+                        bytes[k++] = tdps->exactMidBytes[i];
                 *size = totalByteLength;
 …
                                 + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;
-                *bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);
-                convertTDPStoBytes_float(tdps, *bytes, dsLengthBytes, sameByte);
-                *size = totalByteLength;
+        }
-        else //the case with reserved value
+        {
-                size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
-                size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;
-                if(confparams_cpr->errorBoundMode>=PW_REL)
+                {
-                        segmentL = exe_params->SZ_SIZE_TYPE;
-                        radExpoL = 1;
-                        pwrBoundArrayL = 4;
+                }
-                size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8
-                                + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4 + tdps->rtypeArray_size
-                                + tdps->typeArray_size + tdps->leadNumArray_size
-                                + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;
-                sameByte = (unsigned char) (sameByte | 0x08); // 00001000, the 4th bit
-                // denotes whether it is
-                // with "reserved value"
-                if(confparams_cpr->errorBoundMode>=PW_REL)
-                        sameByte = (unsigned char) (sameByte | 0x10); // 00001000, the 5th bit
-                *bytes = (unsigned char*)malloc(sizeof(unsigned char)*totalByteLength);
-                convertTDPStoBytes_float_reserve(tdps, *bytes, dsLengthBytes, sameByte);
-                *size = totalByteLength;
+        }
+}
-void convertTDPStoFlatBytes_float_args(TightDataPointStorageF *tdps, unsigned char* bytes, size_t *size)
+{
-        size_t i, k = 0;
-        unsigned char dsLengthBytes[8];
-        if(exe_params->SZ_SIZE_TYPE==4)
-                intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
-        else
-                longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
-        unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0;
-        sameByte = sameByte | (confparams_cpr->szMode << 1);
-        if(tdps->isLossless)
-                sameByte = (unsigned char) (sameByte | 0x10);
-        if(confparams_cpr->errorBoundMode>=PW_REL)
-                sameByte = (unsigned char) (sameByte | 0x20); // 00100000, the 5th bit
-        if(exe_params->SZ_SIZE_TYPE==8)
-                sameByte = (unsigned char) (sameByte | 0x40); // 01000000, the 6th bit
-        if(tdps->allSameData==1)
+        {
-                size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + tdps->exactMidBytes_size;
-                //*bytes = (unsigned char *)malloc(sizeof(unsigned char)*totalByteLength);
-                for (i = 0; i < 3; i++)//3
-                        bytes[k++] = versionNumber[i];
-                bytes[k++] = sameByte;
-                convertSZParamsToBytes(confparams_cpr, &(bytes[k]));
-                k = k + MetaDataByteLength;
-                for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
-                        bytes[k++] = dsLengthBytes[i];
-                for (i = 0; i < tdps->exactMidBytes_size; i++)
-                        bytes[k++] = tdps->exactMidBytes[i];
-                *size = totalByteLength;
+        }
-        else if (tdps->rtypeArray == NULL)
+        {
-                size_t residualMidBitsLength = tdps->residualMidBits == NULL ? 0 : tdps->residualMidBits_size;
-                size_t segmentL = 0, radExpoL = 0, pwrBoundArrayL = 0;
-                if(confparams_cpr->errorBoundMode>=PW_REL)
+                {
-                        segmentL = exe_params->SZ_SIZE_TYPE;
-                        radExpoL = 1;
-                        pwrBoundArrayL = 4;
+                }
-                size_t totalByteLength = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrBoundArrayL + 4 + 4 + 1 + 8
-                                + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE
-                                + tdps->typeArray_size + tdps->leadNumArray_size
-                                + tdps->exactMidBytes_size + residualMidBitsLength + tdps->pwrErrBoundBytes_size;
                 convertTDPStoBytes_float(tdps, bytes, dsLengthBytes, sameByte);
 …
  * */
 void free_TightDataPointStorageF(TightDataPointStorageF *tdps)
+{
+{
         if(tdps->rtypeArray!=NULL)
                 free(tdps->rtypeArray);

TabularUnified thirdparty/SZ/sz/src/TypeManager.c ¶

-                      r2c47b73
+                      r9ee2ce3
         return byteLength;
+}
+size_t convertIntArray2ByteArray_fast_1b_to_result(unsigned char* intArray, size_t intArrayLength, unsigned char *result)
+{
+        size_t byteLength = 0;
+        size_t i, j;
+        if(intArrayLength%8==0)
+                byteLength = intArrayLength/8;
+        else
+                byteLength = intArrayLength/8+1;
+        size_t n = 0;
+        int tmp, type;
+        for(i = 0;i<byteLength;i++)
+        {
+                tmp = 0;
+                for(j = 0;j<8&&n<intArrayLength;j++)
+                {
+                        type = intArray[n];
+                        if(type == 1)
+                                tmp = (tmp | (1 << (7-j)));
+                        n++;
+                }
+        result[i] = (unsigned char)tmp;
+        }
+        return byteLength;
+}
 void convertByteArray2IntArray_fast_1b(size_t intArrayLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
+{
 …
+}
+size_t convertIntArray2ByteArray_fast_2b_inplace(unsigned char* timeStepType, size_t timeStepTypeLength, unsigned char *result)
+{
+        size_t i, j, byteLength = 0;
+        if(timeStepTypeLength%4==0)
+                byteLength = timeStepTypeLength*2/8;
+        else
+                byteLength = timeStepTypeLength*2/8+1;
+        size_t n = 0;
+        for(i = 0;i<byteLength;i++)
+        {
+                int tmp = 0;
+                for(j = 0;j<4&&n<timeStepTypeLength;j++)
+                {
+                        int type = timeStepType[n];
+                        switch(type)
+                        {
+                        case 0:
+                                break;
+                        case 1:
+                                tmp = (tmp | (1 << (6-j*2)));
+                                break;
+                        case 2:
+                                tmp = (tmp | (2 << (6-j*2)));
+                                break;
+                        case 3:
+                                tmp = (tmp | (3 << (6-j*2)));
+                                break;
+                        default:
+                                printf("Error: wrong timestep type...: type[%zu]=%d\n", n, type);
+                                exit(0);
+                        }
+                        n++;
+                }
+                result[i] = (unsigned char)tmp;
+        }
+        return byteLength;
+}
 void convertByteArray2IntArray_fast_2b(size_t stepLength, unsigned char* byteArray, size_t byteArrayLength, unsigned char **intArray)
+{
 …
+}
 int getLeftMovingSteps(size_t k, unsigned char resiBitLength)
+inline int getLeftMovingSteps(size_t k, unsigned char resiBitLength)
+{
         return 8 - k%8 - resiBitLength;

TabularUnified thirdparty/SZ/sz/src/callZlib.c ¶

-                      r2c47b73
+                      r9ee2ce3
         return SZ_NSCS; \
     } \
+}
+int isZlibFormat(unsigned char magic1, unsigned char magic2)
+{
+        if(magic1==104&&magic2==5) //DC+BS
+                return 1;
+        if(magic1==104&&magic2==129) //DC+DC
+                return 1;
+        if(magic1==104&&magic2==222) //DC+BC
+                return 1;
+        if(magic1==120&&magic2==1) //BC+BS
+                return 1;
+        if(magic1==120&&magic2==94) //BC+?
+                return 1;
+        if(magic1==120&&magic2==156) //BC+DC
+                return 1;
+        if(magic1==120&&magic2==218) //BC+BS
+                return 1;
+        return 0;
+}
 …
         strm.opaque = Z_NULL;
         ret = deflateInit(&strm, level);
+        //int windowBits = 15;
+    //ret = deflateInit2(&strm, level, Z_DEFLATED, windowBits, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);//Z_FIXED); //Z_DEFAULT_STRATEGY
         if (ret != Z_OK)
                 return ret;

TabularUnified thirdparty/SZ/sz/src/conf.c ¶

-                      r2c47b73
+                      r9ee2ce3
                 confparams_cpr->szMode = SZ_BEST_COMPRESSION;
+                confparams_cpr->gzipMode = 1; //fast mode
+                confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; //other option: GZIP_COMPRESSOR;
+                if(confparams_cpr->losslessCompressor==ZSTD_COMPRESSOR)
+                        confparams_cpr->gzipMode = 3; //fast mode
+                else
+                        confparams_cpr->gzipMode = 1; //high speed mode
                 confparams_cpr->errorBoundMode = PSNR;
                 confparams_cpr->psnr = 90;
+                confparams_cpr->absErrBound = 1E-4;
+                confparams_cpr->relBoundRatio = 1E-4;
                 confparams_cpr->pw_relBoundRatio = 1E-3;
 …
                 confparams_cpr->snapshotCmprStep = 5;
+                sz_with_regression = SZ_WITH_LINEAR_REGRESSION;
                 return SZ_SCES;
 …
+                }
+                modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", NULL);
+                modeBuf = iniparser_getstring(ini, "PARAMETER:losslessCompressor", "ZSTD_COMPRESSOR");
+                if(strcmp(modeBuf, "GZIP_COMPRESSOR")==0)
+                        confparams_cpr->losslessCompressor = GZIP_COMPRESSOR;
+                else if(strcmp(modeBuf, "ZSTD_COMPRESSOR")==0)
+                        confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR;
+                else
+                {
+                        printf("[SZ] Error: Wrong losslessCompressor setting (please check sz.config file)\n");\
+                        printf("No Such a lossless compressor: %s\n", modeBuf);
+                        iniparser_freedict(ini);
+                        return SZ_NSCS;
+                }
+                modeBuf = iniparser_getstring(ini, "PARAMETER:withLinearRegression", "YES");
+                if(strcmp(modeBuf, "YES")==0 || strcmp(modeBuf, "yes")==0)
+                        sz_with_regression = SZ_WITH_LINEAR_REGRESSION;
+                else
+                        sz_with_regression = SZ_NO_REGRESSION;
+                modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", "Gzip_BEST_SPEED");
                 if(modeBuf==NULL)
+                {
 …
                         return SZ_NSCS;
+                }
+                modeBuf = iniparser_getstring(ini, "PARAMETER:zstdMode", "Zstd_HIGH_SPEED");
+                if(modeBuf==NULL)
+                {
+                        printf("[SZ] Error: Null Zstd mode setting (please check sz.config file)\n");
+                        iniparser_freedict(ini);
+                        return SZ_NSCS;
+                }
+                else if(strcmp(modeBuf, "Zstd_BEST_SPEED")==0)
+                        confparams_cpr->gzipMode = 1;
+                else if(strcmp(modeBuf, "Zstd_HIGH_SPEED")==0)
+                        confparams_cpr->gzipMode = 3;
+                else if(strcmp(modeBuf, "Zstd_HIGH_COMPRESSION")==0)
+                        confparams_cpr->gzipMode = 19;
+                else if(strcmp(modeBuf, "Zstd_BEST_COMPRESSION")==0)
+                        confparams_cpr->gzipMode = 22;
+                else if(strcmp(modeBuf, "Zstd_DEFAULT_COMPRESSION")==0)
+                        confparams_cpr->gzipMode = 3;
+                else
+                {
+                        printf("[SZ] Error: Wrong zstd Mode (please check sz.config file)\n");
+                        return SZ_NSCS;
+                }
                 //TODO

TabularUnified thirdparty/SZ/sz/src/dataCompression.c ¶

-                      r2c47b73
+                      r9ee2ce3
+        {
                 unsigned int* data = (unsigned int*)oriData;
                 int data_;
+                unsigned int data_;
                 min = data[0], max = min;
                 computeMinMax(data);
 …
+        {
                 int* data = (int*)oriData;
                 unsigned int data_;
+                int data_;
                 min = data[0], max = min;
                 computeMinMax(data);
 …
         return k;
+}
+//The following functions are float-precision version of dealing with the unpredictable data points
+int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData)
+{
+        float valueRangeSize;
+        computeRangeSize_float(oriData, nbEle, &valueRangeSize, medianValue);
+        short radExpo = getExponent_float(valueRangeSize/2);
+        int reqLength;
+        computeReqLength_float(precision, radExpo, &reqLength, medianValue);
+        *reqBytesLength = reqLength/8;
+        *resiBitsLength = reqLength%8;
+        size_t i = 0;
+        for(i = 0;i < nbEle;i++)
+        {
+                float normValue = oriData[i] - *medianValue;
+                lfloat lfBuf;
+                lfBuf.value = normValue;
+                int ignBytesLength = 32 - reqLength;
+                if(ignBytesLength<0)
+                        ignBytesLength = 0;
+                lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength;
+                //float tmpValue = lfBuf.value;
+                decData[i] = lfBuf.value + *medianValue;
+        }
+        return reqLength;
+}
+/**
+ * @param float* oriData: inplace argument (input / output)
+ *
+ * */
+int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
+int reqLength, int reqBytesLength, int resiBitsLength, float medianValue)
+{
+        //allocate memory for coefficient compression arrays
+        DynamicIntArray *exactLeadNumArray;
+        new_DIA(&exactLeadNumArray, DynArrayInitLen);
+        DynamicByteArray *exactMidByteArray;
+        new_DBA(&exactMidByteArray, DynArrayInitLen);
+        DynamicIntArray *resiBitArray;
+        new_DIA(&resiBitArray, DynArrayInitLen);
+        unsigned char preDataBytes[4] = {0,0,0,0};
+        //allocate memory for vce and lce
+        FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
+        LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
+        size_t i = 0;
+        for(i = 0;i < nbEle;i++)
+        {
+                compressSingleFloatValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength);
+                updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
+                memcpy(preDataBytes,vce->curBytes,4);
+                addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+                oriData[i] = vce->data;
+        }
+        convertDIAtoInts(exactLeadNumArray, leadArray);
+        convertDBAtoBytes(exactMidByteArray,midArray);
+        convertDIAtoInts(resiBitArray, resiArray);
+        size_t midArraySize = exactMidByteArray->size;
+        free(vce);
+        free(lce);
+        free_DIA(exactLeadNumArray);
+        free_DBA(exactMidByteArray);
+        free_DIA(resiBitArray);
+        return midArraySize;
+}
+void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData)
+{
+        *decData = (float*)malloc(nbEle*sizeof(float));
+        size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0;
+        float exactData = 0;
+        unsigned char preBytes[4] = {0,0,0,0};
+        unsigned char curBytes[4];
+        int resiBits;
+        unsigned char leadingNum;
+        int reqBytesLength = reqLength/8;
+        int resiBitsLength = reqLength%8;
+        for(i = 0; i<nbEle;i++)
+        {
+                // compute resiBits
+                resiBits = 0;
+                if (resiBitsLength != 0) {
+                        int kMod8 = k % 8;
+                        int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength);
+                        if (rightMovSteps > 0) {
+                                int code = getRightMovingCode(kMod8, resiBitsLength);
+                                resiBits = (residualMidBits[p] & code) >> rightMovSteps;
+                        } else if (rightMovSteps < 0) {
+                                int code1 = getLeftMovingCode(kMod8);
+                                int code2 = getRightMovingCode(kMod8, resiBitsLength);
+                                int leftMovSteps = -rightMovSteps;
+                                rightMovSteps = 8 - leftMovSteps;
+                                resiBits = (residualMidBits[p] & code1) << leftMovSteps;
+                                p++;
+                                resiBits = resiBits
+                                                | ((residualMidBits[p] & code2) >> rightMovSteps);
+                        } else // rightMovSteps == 0
+                        {
+                                int code = getRightMovingCode(kMod8, resiBitsLength);
+                                resiBits = (residualMidBits[p] & code);
+                                p++;
+                        }
+                        k += resiBitsLength;
+                }
+                // recover the exact data
+                memset(curBytes, 0, 4);
+                leadingNum = leadNum[l++];
+                memcpy(curBytes, preBytes, leadingNum);
+                for (j = leadingNum; j < reqBytesLength; j++)
+                        curBytes[j] = exactMidBytes[curByteIndex++];
+                if (resiBitsLength != 0) {
+                        unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength));
+                        curBytes[reqBytesLength] = resiByte;
+                }
+                exactData = bytesToFloat(curBytes);
+                (*decData)[i] = exactData + medianValue;
+                memcpy(preBytes,curBytes,4);
+        }
+}
+//double-precision version of dealing with unpredictable data points in sz 2.0
+int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData)
+{
+        double valueRangeSize;
+        computeRangeSize_double(oriData, nbEle, &valueRangeSize, medianValue);
+        short radExpo = getExponent_double(valueRangeSize/2);
+        int reqLength;
+        computeReqLength_double(precision, radExpo, &reqLength, medianValue);
+        *reqBytesLength = reqLength/8;
+        *resiBitsLength = reqLength%8;
+        size_t i = 0;
+        for(i = 0;i < nbEle;i++)
+        {
+                double normValue = oriData[i] - *medianValue;
+                ldouble ldBuf;
+                ldBuf.value = normValue;
+                int ignBytesLength = 64 - reqLength;
+                if(ignBytesLength<0)
+                        ignBytesLength = 0;
+                ldBuf.lvalue = (ldBuf.lvalue >> ignBytesLength) << ignBytesLength;
+                decData[i] = ldBuf.value + *medianValue;
+        }
+        return reqLength;
+}
+/**
+ * @param double* oriData: inplace argument (input / output)
+ *
+ * */
+int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
+int reqLength, int reqBytesLength, int resiBitsLength, double medianValue)
+{
+        //allocate memory for coefficient compression arrays
+        DynamicIntArray *exactLeadNumArray;
+        new_DIA(&exactLeadNumArray, DynArrayInitLen);
+        DynamicByteArray *exactMidByteArray;
+        new_DBA(&exactMidByteArray, DynArrayInitLen);
+        DynamicIntArray *resiBitArray;
+        new_DIA(&resiBitArray, DynArrayInitLen);
+        unsigned char preDataBytes[8] = {0,0,0,0,0,0,0,0};
+        //allocate memory for vce and lce
+        DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
+        LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
+        size_t i = 0;
+        for(i = 0;i < nbEle;i++)
+        {
+                compressSingleDoubleValue(vce, oriData[i], precision, medianValue, reqLength, reqBytesLength, resiBitsLength);
+                updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
+                memcpy(preDataBytes,vce->curBytes,8);
+                addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+                oriData[i] = vce->data;
+        }
+        convertDIAtoInts(exactLeadNumArray, leadArray);
+        convertDBAtoBytes(exactMidByteArray,midArray);
+        convertDIAtoInts(resiBitArray, resiArray);
+        size_t midArraySize = exactMidByteArray->size;
+        free(vce);
+        free(lce);
+        free_DIA(exactLeadNumArray);
+        free_DBA(exactMidByteArray);
+        free_DIA(resiBitArray);
+        return midArraySize;
+}
+void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData)
+{
+        *decData = (double*)malloc(nbEle*sizeof(double));
+        size_t i = 0, j = 0, k = 0, l = 0, p = 0, curByteIndex = 0;
+        double exactData = 0;
+        unsigned char preBytes[8] = {0,0,0,0,0,0,0,0};
+        unsigned char curBytes[8];
+        int resiBits;
+        unsigned char leadingNum;
+        int reqBytesLength = reqLength/8;
+        int resiBitsLength = reqLength%8;
+        for(i = 0; i<nbEle;i++)
+        {
+                // compute resiBits
+                resiBits = 0;
+                if (resiBitsLength != 0) {
+                        int kMod8 = k % 8;
+                        int rightMovSteps = getRightMovingSteps(kMod8, resiBitsLength);
+                        if (rightMovSteps > 0) {
+                                int code = getRightMovingCode(kMod8, resiBitsLength);
+                                resiBits = (residualMidBits[p] & code) >> rightMovSteps;
+                        } else if (rightMovSteps < 0) {
+                                int code1 = getLeftMovingCode(kMod8);
+                                int code2 = getRightMovingCode(kMod8, resiBitsLength);
+                                int leftMovSteps = -rightMovSteps;
+                                rightMovSteps = 8 - leftMovSteps;
+                                resiBits = (residualMidBits[p] & code1) << leftMovSteps;
+                                p++;
+                                resiBits = resiBits
+                                                | ((residualMidBits[p] & code2) >> rightMovSteps);
+                        } else // rightMovSteps == 0
+                        {
+                                int code = getRightMovingCode(kMod8, resiBitsLength);
+                                resiBits = (residualMidBits[p] & code);
+                                p++;
+                        }
+                        k += resiBitsLength;
+                }
+                // recover the exact data
+                memset(curBytes, 0, 8);
+                leadingNum = leadNum[l++];
+                memcpy(curBytes, preBytes, leadingNum);
+                for (j = leadingNum; j < reqBytesLength; j++)
+                        curBytes[j] = exactMidBytes[curByteIndex++];
+                if (resiBitsLength != 0) {
+                        unsigned char resiByte = (unsigned char) (resiBits << (8 - resiBitsLength));
+                        curBytes[reqBytesLength] = resiByte;
+                }
+                exactData = bytesToDouble(curBytes);
+                (*decData)[i] = exactData + medianValue;
+                memcpy(preBytes,curBytes,8);
+        }
+}

TabularUnified thirdparty/SZ/sz/src/sz.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "Huffman.h"
 #include "conf.h"
+#include "utility.h"
 //#include "CurveFillingCompressStorage.h"
 …
 sz_exedata *exe_params = NULL;
+int sz_with_regression = SZ_WITH_LINEAR_REGRESSION; //SZ_NO_REGRESSION
 /*following global variables are desgined for time-series based compression*/
 …
 int SZ_Init_Params(sz_params *params)
+{
+        int x = 1;
+        char *y = (char*)&x;
+        int endianType = BIG_ENDIAN_SYSTEM;
+        if(*y==1) endianType = LITTLE_ENDIAN_SYSTEM;
+        sysEndianType = endianType;
+        exe_params->SZ_SIZE_TYPE = sizeof(size_t);
+        // set default values
+        if(params->max_quant_intervals > 0)
+        SZ_Init(NULL);
+        if(params->losslessCompressor!=GZIP_COMPRESSOR && params->losslessCompressor!=ZSTD_COMPRESSOR)
+                params->losslessCompressor = ZSTD_COMPRESSOR;
+        if(params->max_quant_intervals > 0)
                 params->maxRangeRadius = params->max_quant_intervals/2;
+        else
+                params->max_quant_intervals = params->maxRangeRadius*2;
+        exe_params->intvCapacity = params->maxRangeRadius*2;
+        exe_params->intvRadius = params->maxRangeRadius;
+        if(params->quantization_intervals>0)
+        {
+                updateQuantizationInfo(params->quantization_intervals);
+                exe_params->optQuantMode = 0;
+        }
+        else
+                exe_params->optQuantMode = 1;
+        memcpy(confparams_cpr, params, sizeof(sz_params));
         if(params->quantization_intervals%2!=0)
 …
                 return SZ_NSCS;
+        }
-        confparams_cpr = (sz_params*)malloc(sizeof(sz_params));
-        memcpy(confparams_cpr, params, sizeof(sz_params));
         return SZ_SCES;
 …
         //confparams_dec->szMode = (sameRByte & 0x06)>>1;
         isLossless = (sameRByte & 0x10)>>4;
+        int isRandomAccess = (sameRByte >> 7) & 0x01;
+        if(exe_params==NULL)
+        {
+                exe_params = (sz_exedata *)malloc(sizeof(struct sz_exedata));
+                memset(exe_params, 0, sizeof(struct sz_exedata));
+        }
         exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4;
 …
         dataSeriesLength = bytesToSize(&(bytes[index]));// 4 or 8
         index += exe_params->SZ_SIZE_TYPE;
         index += 4; //max_quant_intervals
+        //index += 4; //max_quant_intervals
         sz_metadata* metadata = (sz_metadata*)malloc(sizeof(struct sz_metadata));
 …
         if(isConstant==0 && isLossless==0)
+        {
+                int radExpoL = 0, segmentL = 0, pwrErrBoundBytesL = 0;
+                if(metadata->conf_params->errorBoundMode >= PW_REL)
+                if(isRandomAccess==1)
+                {
+                        radExpoL = 1;
+                        segmentL = exe_params->SZ_SIZE_TYPE;
+                        pwrErrBoundBytesL = 4;
+                        unsigned char* raBytes = &(bytes[index]);
+                        defactoNBBins = bytesToInt_bigEndian(raBytes + sizeof(int) + sizeof(double));
+                }
+                int offset_typearray = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + 4 + 1 + 8
+                                + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE;
+                size_t nodeCount = bytesToInt_bigEndian(bytes+offset_typearray);
+                defactoNBBins = (nodeCount+1)/2;
+        }
+                else
+                {
+                        int radExpoL = 0, segmentL = 0, pwrErrBoundBytesL = 0;
+                        if(metadata->conf_params->errorBoundMode >= PW_REL)
+                        {
+                                radExpoL = 1;
+                                segmentL = exe_params->SZ_SIZE_TYPE;
+                                pwrErrBoundBytesL = 4;
+                        }
+                        int offset_typearray = 3 + 1 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + (4 + params->dataType*4) + 1 + 8
+                                        + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4;
+                        defactoNBBins = bytesToInt_bigEndian(bytes+offset_typearray);
+                }
+        }
         metadata->defactoNBBins = defactoNBBins;
 …
+        }
         return totalSize;
+}
-int isZlibFormat(unsigned char magic1, unsigned char magic2)
+{
-        if(magic1==104&&magic2==5) //DC+BS
-                return 1;
-        if(magic1==104&&magic2==129) //DC+DC
-                return 1;
-        if(magic1==104&&magic2==222) //DC+BC
-                return 1;
-        if(magic1==120&&magic2==1) //BC+BS
-                return 1;
-        if(magic1==120&&magic2==156) //BC+DC
-                return 1;
-        if(magic1==120&&magic2==218) //BC+BS
-                return 1;
-        return 0;
+}

TabularUnified thirdparty/SZ/sz/src/sz_double.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "rw.h"
 #include "sz_double_ts.h"
+#include "utility.h"
 unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = fabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                         if(errBoundMode>=PW_REL)
+                        {
                                 //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max);
                                 SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
+                                SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
+                                //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
+                        }
                         else
 …
                         return SZ_NSCS;
+                }
+        }
+        }
         int status = SZ_SCES;
 …
                         if(confparams_cpr->errorBoundMode>=PW_REL)
+                        {
+                                //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio,
+                                valueRangeSize, medianValue, &tmpOutSize);
+                                SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
+                                //SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
+                        }
                         else
 #ifdef HAVE_TIMECMPR
                                 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
                                         multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
                                 else
 …
+                {
                         if(confparams_cpr->errorBoundMode>=PW_REL)
                                 SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
                         else
 #ifdef HAVE_TIMECMPR
 …
                                 else
 #endif
+                                        SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                {
+                                        if(sz_with_regression == SZ_NO_REGRESSION)
+                                                SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                        else
+                                                tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
+                                }
+                }
                 else
 …
+                {
                         if(confparams_cpr->errorBoundMode>=PW_REL)
                                 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
                         else
 #ifdef HAVE_TIMECMPR
 …
                                 else
 #endif
+                                        SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                {
+                                        if(sz_with_regression == SZ_NO_REGRESSION)
+                                                SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                        else
+                                                tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
+                                }
+                }
                 else
 …
+                {
                         if(confparams_cpr->errorBoundMode>=PW_REL)
                                 SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
                         else
 #ifdef HAVE_TIMECMPR
 …
                                         multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
                                 else
+#endif
+                                        SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+#endif
+                                {
+                                        if(sz_with_regression == SZ_NO_REGRESSION)
+                                                SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                        else
+                                                tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
+                                }
+                }
                 else
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }
 …
+                {
                         radiusIndex = confparams_cpr->maxRangeRadius - 1;
-                        //printf("radiusIndex=%d\n", radiusIndex);
+                }
                 intervals[radiusIndex]++;
-                // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData);
-                // fflush(stdout);
                 offset_count += confparams_cpr->sampleDistance;
                 if(offset_count >= r3){
 …
                 else data_pos += confparams_cpr->sampleDistance;
+        }
-        // printf("sample_count: %ld\n", sample_count);
-        // fflush(stdout);
-        // if(*max_freq < 0.15) *max_freq *= 2;
         //compute the appropriate number
         size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
 …
                 powerOf2 = 32;
         free(intervals);
-        //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
         return powerOf2;
+}
 …
         size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
         memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
         size_t totalSampleSize = 0;//(r1-1)*(r2-1)/confparams_cpr->sampleDistance;
+        size_t totalSampleSize = 0;
         size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
 …
         size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
         memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
         size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance;
+        size_t totalSampleSize = 0;
         double * data_pos = oriData + 2;
         while(data_pos - oriData < dataLength){
                 totalSampleSize++;
-                //pred_value = 2*data_pos[-1] - data_pos[-2];
                 pred_value = data_pos[-1];
                 pred_err = fabs(pred_value - *data_pos);
 …
         free(intervals);
-        //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
         return powerOf2;
+}
+/*The above code is for sz 1.4.13; the following code is for sz 2.0*/
+unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq)
+{
+        double mean = 0.0;
+        size_t len = r1 * r2;
+        size_t mean_distance = (int) (sqrt(len));
+        double * data_pos = oriData;
+        size_t mean_count = 0;
+        while(data_pos - oriData < len){
+                mean += *data_pos;
+                mean_count ++;
+                data_pos += mean_distance;
+        }
+        if(mean_count > 0) mean /= mean_count;
+        size_t range = 8192;
+        size_t radius = 4096;
+        size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t));
+        memset(freq_intervals, 0, range*sizeof(size_t));
+        unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
+        int sampleDistance = confparams_cpr->sampleDistance;
+        double predThreshold = confparams_cpr->predThreshold;
+        size_t i;
+        size_t radiusIndex;
+        double pred_value = 0, pred_err;
+        size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t));
+        memset(intervals, 0, maxRangeRadius*sizeof(size_t));
+        double mean_diff;
+        ptrdiff_t freq_index;
+        size_t freq_count = 0;
+        size_t n1_count = 1;
+        size_t offset_count = sampleDistance - 1;
+        size_t offset_count_2 = 0;
+        size_t sample_count = 0;
+        data_pos = oriData + r2 + offset_count;
+        while(data_pos - oriData < len){
+                pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
+                pred_err = fabs(pred_value - *data_pos);
+                if(pred_err < realPrecision) freq_count ++;
+                radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
+                if(radiusIndex>=maxRangeRadius)
+                        radiusIndex = maxRangeRadius - 1;
+                intervals[radiusIndex]++;
+                mean_diff = *data_pos - mean;
+                if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
+                else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
+                if(freq_index <= 0){
+                        freq_intervals[0] ++;
+                }
+                else if(freq_index >= range){
+                        freq_intervals[range - 1] ++;
+                }
+                else{
+                        freq_intervals[freq_index] ++;
+                }
+                offset_count += sampleDistance;
+                if(offset_count >= r2){
+                        n1_count ++;
+                        offset_count_2 = n1_count % sampleDistance;
+                        data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
+                        offset_count = (sampleDistance - offset_count_2);
+                        if(offset_count == 0) offset_count ++;
+                }
+                else data_pos += sampleDistance;
+                sample_count ++;
+        }
+        *max_freq = freq_count * 1.0/ sample_count;
+        //compute the appropriate number
+        size_t targetCount = sample_count*predThreshold;
+        size_t sum = 0;
+        for(i=0;i<maxRangeRadius;i++)
+        {
+                sum += intervals[i];
+                if(sum>targetCount)
+                        break;
+        }
+        if(i>=maxRangeRadius)
+                i = maxRangeRadius-1;
+        unsigned int accIntervals = 2*(i+1);
+        unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
+        if(powerOf2<32)
+                powerOf2 = 32;
+        // collect frequency
+        size_t max_sum = 0;
+        size_t max_index = 0;
+        size_t tmp_sum;
+        size_t * freq_pos = freq_intervals + 1;
+        for(size_t i=1; i<range-2; i++){
+                tmp_sum = freq_pos[0] + freq_pos[1];
+                if(tmp_sum > max_sum){
+                        max_sum = tmp_sum;
+                        max_index = i;
+                }
+                freq_pos ++;
+        }
+        *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius);
+        *mean_freq = max_sum * 1.0 / sample_count;
+        free(freq_intervals);
+        free(intervals);
+        return powerOf2;
+}
+unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq)
+{
+        double mean = 0.0;
+        size_t len = r1 * r2 * r3;
+        size_t mean_distance = (int) (sqrt(len));
+        double * data_pos = oriData;
+        size_t offset_count = 0;
+        size_t offset_count_2 = 0;
+        size_t mean_count = 0;
+        while(data_pos - oriData < len){
+                mean += *data_pos;
+                mean_count ++;
+                data_pos += mean_distance;
+                offset_count += mean_distance;
+                offset_count_2 += mean_distance;
+                if(offset_count >= r3){
+                        offset_count = 0;
+                        data_pos -= 1;
+                }
+                if(offset_count_2 >= r2 * r3){
+                        offset_count_2 = 0;
+                        data_pos -= 1;
+                }
+        }
+        if(mean_count > 0) mean /= mean_count;
+        size_t range = 8192;
+        size_t radius = 4096;
+        size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t));
+        memset(freq_intervals, 0, range*sizeof(size_t));
+        unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
+        int sampleDistance = confparams_cpr->sampleDistance;
+        double predThreshold = confparams_cpr->predThreshold;
+        size_t i;
+        size_t radiusIndex;
+        size_t r23=r2*r3;
+        double pred_value = 0, pred_err;
+        size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t));
+        memset(intervals, 0, maxRangeRadius*sizeof(size_t));
+        double mean_diff;
+        ptrdiff_t freq_index;
+        size_t freq_count = 0;
+        size_t sample_count = 0;
+        offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
+        data_pos = oriData + r23 + r3 + offset_count;
+        size_t n1_count = 1, n2_count = 1; // count i,j sum
+        while(data_pos - oriData < len){
+                pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
+                pred_err = fabs(pred_value - *data_pos);
+                if(pred_err < realPrecision) freq_count ++;
+                radiusIndex = (pred_err/realPrecision+1)/2;
+                if(radiusIndex>=maxRangeRadius)
+                {
+                        radiusIndex = maxRangeRadius - 1;
+                }
+                intervals[radiusIndex]++;
+                mean_diff = *data_pos - mean;
+                if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
+                else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
+                if(freq_index <= 0){
+                        freq_intervals[0] ++;
+                }
+                else if(freq_index >= range){
+                        freq_intervals[range - 1] ++;
+                }
+                else{
+                        freq_intervals[freq_index] ++;
+                }
+                offset_count += sampleDistance;
+                if(offset_count >= r3){
+                        n2_count ++;
+                        if(n2_count == r2){
+                                n1_count ++;
+                                n2_count = 1;
+                                data_pos += r3;
+                        }
+                        offset_count_2 = (n1_count + n2_count) % sampleDistance;
+                        data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
+                        offset_count = (sampleDistance - offset_count_2);
+                        if(offset_count == 0) offset_count ++;
+                }
+                else data_pos += sampleDistance;
+                sample_count ++;
+        }
+        *max_freq = freq_count * 1.0/ sample_count;
+        //compute the appropriate number
+        size_t targetCount = sample_count*predThreshold;
+        size_t sum = 0;
+        for(i=0;i<maxRangeRadius;i++)
+        {
+                sum += intervals[i];
+                if(sum>targetCount)
+                        break;
+        }
+        if(i>=maxRangeRadius)
+                i = maxRangeRadius-1;
+        unsigned int accIntervals = 2*(i+1);
+        unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
+        if(powerOf2<32)
+                powerOf2 = 32;
+        // collect frequency
+        size_t max_sum = 0;
+        size_t max_index = 0;
+        size_t tmp_sum;
+        size_t * freq_pos = freq_intervals + 1;
+        for(size_t i=1; i<range-2; i++){
+                tmp_sum = freq_pos[0] + freq_pos[1];
+                if(tmp_sum > max_sum){
+                        max_sum = tmp_sum;
+                        max_index = i;
+                }
+                freq_pos ++;
+        }
+        *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius);
+        *mean_freq = max_sum * 1.0 / sample_count;
+        free(freq_intervals);
+        free(intervals);
+        return powerOf2;
+}
+#define MIN(a, b) a<b? a : b
+unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){
+        unsigned int quantization_intervals;
+        double sz_sample_correct_freq = -1;//0.5; //-1
+        double dense_pos;
+        double mean_flush_freq;
+        unsigned char use_mean = 0;
+        if(exe_params->optQuantMode==1)
+        {
+                quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
+                if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
+                updateQuantizationInfo(quantization_intervals);
+        }
+        else{
+                quantization_intervals = exe_params->intvCapacity;
+        }
+        // calculate block dims
+        size_t num_x, num_y;
+        size_t block_size = 16;
+        SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        size_t split_index_x, split_index_y;
+        size_t early_blockcount_x, early_blockcount_y;
+        size_t late_blockcount_x, late_blockcount_y;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
+        size_t num_blocks = num_x * num_y;
+        size_t num_elements = r1 * r2;
+        size_t dim0_offset = r2;
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        size_t unpred_data_max_size = max_num_block_elements;
+        double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks);
+        size_t total_unpred = 0;
+        size_t unpredictable_count;
+        double * data_pos = oriData;
+        int * type = result_type;
+        size_t offset_x, offset_y;
+        size_t current_blockcount_x, current_blockcount_y;
+        double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double));
+        double * reg_params_pos = reg_params;
+        // move regression part out
+        size_t params_offset_b = num_blocks;
+        size_t params_offset_c = 2*num_blocks;
+        for(size_t i=0; i<num_x; i++){
+                for(size_t j=0; j<num_y; j++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                        data_pos = oriData + offset_x * dim0_offset + offset_y;
+                        {
+                                double * cur_data_pos = data_pos;
+                                double fx = 0.0;
+                                double fy = 0.0;
+                                double f = 0;
+                                double sum_x;
+                                double curData;
+                                for(size_t i=0; i<current_blockcount_x; i++){
+                                        sum_x = 0;
+                                        for(size_t j=0; j<current_blockcount_y; j++){
+                                                curData = *cur_data_pos;
+                                                sum_x += curData;
+                                                fy += curData * j;
+                                                cur_data_pos ++;
+                                        }
+                                        fx += sum_x * i;
+                                        f += sum_x;
+                                        cur_data_pos += dim0_offset - current_blockcount_y;
+                                }
+                                double coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
+                                reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
+                                reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
+                                reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
+                        }
+                        reg_params_pos ++;
+                }
+        }
+        //Compress coefficient arrays
+        double precision_a, precision_b, precision_c;
+        double rel_param_err = 0.15/3;
+        precision_a = rel_param_err * realPrecision / late_blockcount_x;
+        precision_b = rel_param_err * realPrecision / late_blockcount_y;
+        precision_c = rel_param_err * realPrecision;
+        double mean = 0;
+        use_mean = 0;
+        if(use_mean){
+                // compute mean
+                double sum = 0.0;
+                size_t mean_count = 0;
+                for(size_t i=0; i<num_elements; i++){
+                        if(fabs(oriData[i] - dense_pos) < realPrecision){
+                                sum += oriData[i];
+                                mean_count ++;
+                        }
+                }
+                if(mean_count > 0) mean = sum / mean_count;
+        }
+        double tmp_realPrecision = realPrecision;
+        // use two prediction buffers for higher performance
+        double * unpredictable_data = result_unpredictable_data;
+        unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char));
+        memset(indicator, 0, num_blocks * sizeof(unsigned char));
+        size_t reg_count = 0;
+        size_t strip_dim_0 = early_blockcount_x + 1;
+        size_t strip_dim_1 = r2 + 1;
+        size_t strip_dim0_offset = strip_dim_1;
+        unsigned char * indicator_pos = indicator;
+        size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
+        double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_1, 0, prediction_buffer_size);
+        double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_2, 0, prediction_buffer_size);
+        double * cur_pb_buf = prediction_buffer_1;
+        double * next_pb_buf = prediction_buffer_2;
+        double * cur_pb_buf_pos;
+        double * next_pb_buf_pos;
+        int intvCapacity = exe_params->intvCapacity;
+        int intvRadius = exe_params->intvRadius;
+        int use_reg = 0;
+        reg_params_pos = reg_params;
+        // compress the regression coefficients on the fly
+        double last_coeffcients[3] = {0.0};
+        int coeff_intvCapacity_sz = 65536;
+        int coeff_intvRadius = coeff_intvCapacity_sz / 2;
+        int * coeff_type[3];
+        int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int));
+        double * coeff_unpred_data[3];
+        double * coeff_unpredictable_data = (double *) malloc(num_blocks*3*sizeof(double));
+        double precision[3];
+        precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
+        for(int i=0; i<3; i++){
+                coeff_type[i] = coeff_result_type + i * num_blocks;
+                coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
+        }
+        int coeff_index = 0;
+        unsigned int coeff_unpredictable_count[3] = {0};
+        if(use_mean){
+                type = result_type;
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        data_pos = oriData + offset_x * dim0_offset;
+                        cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
+                        next_pb_buf_pos = next_pb_buf + 1;
+                        double * pb_pos = cur_pb_buf_pos;
+                        double * next_pb_pos = next_pb_buf_pos;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                /*sampling: decide which predictor to use (regression or lorenzo)*/
+                                {
+                                        double * cur_data_pos;
+                                        double curData;
+                                        double pred_reg, pred_sz;
+                                        double err_sz = 0.0, err_reg = 0.0;
+                                        // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
+                                        // [1, 9] [3, 7]                [7, 3] [9, 1]
+                                        int count = 0;
+                                        for(int i=1; i<current_blockcount_x; i+=2){
+                                                cur_data_pos = data_pos + i * dim0_offset + i;
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
+                                                err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
+                                                err_reg += fabs(pred_reg - curData);
+                                                cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
+                                                err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
+                                                err_reg += fabs(pred_reg - curData);
+                                                count += 2;
+                                        }
+                                        use_reg = (err_reg < err_sz);
+                                }
+                                if(use_reg)
+                                {
+                                        {
+                                                /*predict coefficients in current block via previous reg_block*/
+                                                double cur_coeff;
+                                                double diff, itvNum;
+                                                for(int e=0; e<3; e++){
+                                                        cur_coeff = reg_params_pos[e*num_blocks];
+                                                        diff = cur_coeff - last_coeffcients[e];
+                                                        itvNum = fabs(diff)/precision[e] + 1;
+                                                        if (itvNum < coeff_intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        else{
+                                                                coeff_type[e][coeff_index] = 0;
+                                                                last_coeffcients[e] = cur_coeff;
+                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        double curData;
+                                        double pred;
+                                        double itvNum;
+                                        double diff;
+                                        size_t index = 0;
+                                        size_t block_unpredictable_count = 0;
+                                        double * cur_data_pos = data_pos;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                size_t ii = current_blockcount_x - 1;
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        } // end ii == -1
+                                        unpredictable_count = block_unpredictable_count;
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        reg_count ++;
+                                }// end use_reg
+                                else{
+                                        // use SZ
+                                        // SZ predication
+                                        unpredictable_count = 0;
+                                        double * cur_pb_pos = pb_pos;
+                                        double * cur_data_pos = data_pos;
+                                        double curData;
+                                        double pred2D;
+                                        double itvNum, diff;
+                                        size_t index = 0;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                type[index] = intvRadius;
+                                                                *cur_pb_pos = mean;
+                                                        }
+                                                        else
+                                                        {
+                                                                pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                                diff = curData - pred2D;
+                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                if (itvNum < intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                        *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_pb_pos += strip_dim0_offset - current_blockcount_y;
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                // ii == current_blockcount_x - 1
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                type[index] = intvRadius;
+                                                                *cur_pb_pos = mean;
+                                                        }
+                                                        else
+                                                        {
+                                                                pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                                diff = curData - pred2D;
+                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                if (itvNum < intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                        *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        next_pb_pos[jj] = *cur_pb_pos;
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        }
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        // change indicator
+                                        indicator_pos[j] = 1;
+                                }// end SZ
+                                reg_params_pos ++;
+                                data_pos += current_blockcount_y;
+                                pb_pos += current_blockcount_y;
+                                next_pb_pos += current_blockcount_y;
+                                type += current_blockcount_x * current_blockcount_y;
+                        }// end j
+                        indicator_pos += num_y;
+                        double * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }// end i
+        }// end use mean
+        else{
+                type = result_type;
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        data_pos = oriData + offset_x * dim0_offset;
+                        cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
+                        next_pb_buf_pos = next_pb_buf + 1;
+                        double * pb_pos = cur_pb_buf_pos;
+                        double * next_pb_pos = next_pb_buf_pos;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                /*sampling*/
+                                {
+                                        // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
+                                        double * cur_data_pos;
+                                        double curData;
+                                        double pred_reg, pred_sz;
+                                        double err_sz = 0.0, err_reg = 0.0;
+                                        // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
+                                        // [1, 9] [3, 7]                [7, 3] [9, 1]
+                                        int count = 0;
+                                        for(int i=1; i<current_blockcount_x; i+=2){
+                                                cur_data_pos = data_pos + i * dim0_offset + i;
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
+                                                err_sz += fabs(pred_sz - curData);
+                                                err_reg += fabs(pred_reg - curData);
+                                                cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
+                                                err_sz += fabs(pred_sz - curData);
+                                                err_reg += fabs(pred_reg - curData);
+                                                count += 2;
+                                        }
+                                        err_sz += realPrecision * count * 0.81;
+                                        use_reg = (err_reg < err_sz);
+                                }
+                                if(use_reg)
+                                {
+                                        {
+                                                /*predict coefficients in current block via previous reg_block*/
+                                                double cur_coeff;
+                                                double diff, itvNum;
+                                                for(int e=0; e<3; e++){
+                                                        cur_coeff = reg_params_pos[e*num_blocks];
+                                                        diff = cur_coeff - last_coeffcients[e];
+                                                        itvNum = fabs(diff)/precision[e] + 1;
+                                                        if (itvNum < coeff_intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        else{
+                                                                coeff_type[e][coeff_index] = 0;
+                                                                last_coeffcients[e] = cur_coeff;
+                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        double curData;
+                                        double pred;
+                                        double itvNum;
+                                        double diff;
+                                        size_t index = 0;
+                                        size_t block_unpredictable_count = 0;
+                                        double * cur_data_pos = data_pos;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        // jj == current_blockcount_y - 1
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                size_t ii = current_blockcount_x - 1;
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        // jj == current_blockcount_y - 1
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        } // end ii == -1
+                                        unpredictable_count = block_unpredictable_count;
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        reg_count ++;
+                                }// end use_reg
+                                else{
+                                        // use SZ
+                                        // SZ predication
+                                        unpredictable_count = 0;
+                                        double * cur_pb_pos = pb_pos;
+                                        double * cur_data_pos = data_pos;
+                                        double curData;
+                                        double pred2D;
+                                        double itvNum, diff;
+                                        size_t index = 0;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                        diff = curData - pred2D;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                *cur_pb_pos = curData;
+                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                        }
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_pb_pos += strip_dim0_offset - current_blockcount_y;
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                // ii == current_blockcount_x - 1
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                        diff = curData - pred2D;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                *cur_pb_pos = curData;
+                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                        }
+                                                        next_pb_pos[jj] = *cur_pb_pos;
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        }
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        // change indicator
+                                        indicator_pos[j] = 1;
+                                }// end SZ
+                                reg_params_pos ++;
+                                data_pos += current_blockcount_y;
+                                pb_pos += current_blockcount_y;
+                                next_pb_pos += current_blockcount_y;
+                                type += current_blockcount_x * current_blockcount_y;
+                        }// end j
+                        indicator_pos += num_y;
+                        double * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }// end i
+        }
+        free(prediction_buffer_1);
+        free(prediction_buffer_2);
+        int stateNum = 2*quantization_intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        size_t nodeCount = 0;
+        size_t i = 0;
+        init(huffmanTree, result_type, num_elements);
+        for (i = 0; i < stateNum; i++)
+                if (huffmanTree->code[i]) nodeCount++;
+        nodeCount = nodeCount*2-1;
+        unsigned char *treeBytes;
+        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+        unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
+        // total size                                                                           metadata                  # elements   real precision           intervals       nodeCount               huffman                 block index                                             unpredicatable count                                            mean                                            unpred size                             elements
+        unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
+        unsigned char * result_pos = result;
+        initRandomAccessBytes(result_pos);
+        result_pos += meta_data_offset;
+        sizeToBytes(result_pos, num_elements);
+        result_pos += exe_params->SZ_SIZE_TYPE;
+        intToBytes_bigEndian(result_pos, block_size);
+        result_pos += sizeof(int);
+        doubleToBytes(result_pos, realPrecision);
+        result_pos += sizeof(double);
+        intToBytes_bigEndian(result_pos, quantization_intervals);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, treeByteSize);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, nodeCount);
+        result_pos += sizeof(int);
+        memcpy(result_pos, treeBytes, treeByteSize);
+        result_pos += treeByteSize;
+        free(treeBytes);
+        memcpy(result_pos, &use_mean, sizeof(unsigned char));
+        result_pos += sizeof(unsigned char);
+        memcpy(result_pos, &mean, sizeof(double));
+        result_pos += sizeof(double);
+        size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
+        result_pos += indicator_size;
+        //convert the lead/mid/resi to byte stream
+        if(reg_count>0){
+                for(int e=0; e<3; e++){
+                        int stateNum = 2*coeff_intvCapacity_sz;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        size_t nodeCount = 0;
+                        init(huffmanTree, coeff_type[e], reg_count);
+                        size_t i = 0;
+                        for (i = 0; i < huffmanTree->stateNum; i++)
+                                if (huffmanTree->code[i]) nodeCount++;
+                        nodeCount = nodeCount*2-1;
+                        unsigned char *treeBytes;
+                        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+                        doubleToBytes(result_pos, precision[e]);
+                        result_pos += sizeof(double);
+                        intToBytes_bigEndian(result_pos, coeff_intvRadius);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, treeByteSize);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, nodeCount);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, treeBytes, treeByteSize);
+                        result_pos += treeByteSize;
+                        free(treeBytes);
+                        size_t typeArray_size = 0;
+                        encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
+                        sizeToBytes(result_pos, typeArray_size);
+                        result_pos += sizeof(size_t) + typeArray_size;
+                        intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
+                        result_pos += coeff_unpredictable_count[e]*sizeof(double);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        free(coeff_result_type);
+        free(coeff_unpredictable_data);
+        //record the number of unpredictable data and also store them
+        memcpy(result_pos, &total_unpred, sizeof(size_t));
+        result_pos += sizeof(size_t);
+        memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
+        result_pos += total_unpred * sizeof(double);
+        size_t typeArray_size = 0;
+        encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
+        result_pos += typeArray_size;
+        size_t totalEncodeSize = result_pos - result;
+        free(indicator);
+        free(result_unpredictable_data);
+        free(result_type);
+        free(reg_params);
+        SZ_ReleaseHuffman(huffmanTree);
+        *comp_size = totalEncodeSize;
+        return result;
+}
+unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){
+        unsigned int quantization_intervals;
+        double sz_sample_correct_freq = -1;//0.5; //-1
+        double dense_pos;
+        double mean_flush_freq;
+        unsigned char use_mean = 0;
+        // calculate block dims
+        size_t num_x, num_y, num_z;
+        size_t block_size = 6;
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
+        size_t split_index_x, split_index_y, split_index_z;
+        size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
+        size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
+        size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
+        size_t num_blocks = num_x * num_y * num_z;
+        size_t num_elements = r1 * r2 * r3;
+        size_t dim0_offset = r2 * r3;
+        size_t dim1_offset = r3;
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        size_t unpred_data_max_size = max_num_block_elements;
+        double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks);
+        size_t total_unpred = 0;
+        size_t unpredictable_count;
+        size_t max_unpred_count = 0;
+        double * data_pos = oriData;
+        int * type = result_type;
+        size_t type_offset;
+        size_t offset_x, offset_y, offset_z;
+        size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
+        double * reg_params = (double *) malloc(num_blocks * 4 * sizeof(double));
+        double * reg_params_pos = reg_params;
+        // move regression part out
+        size_t params_offset_b = num_blocks;
+        size_t params_offset_c = 2*num_blocks;
+        size_t params_offset_d = 3*num_blocks;
+        for(size_t i=0; i<num_x; i++){
+                for(size_t j=0; j<num_y; j++){
+                        for(size_t k=0; k<num_z; k++){
+                                current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                                /*Calculate regression coefficients*/
+                                {
+                                        double * cur_data_pos = data_pos;
+                                        double fx = 0.0;
+                                        double fy = 0.0;
+                                        double fz = 0.0;
+                                        double f = 0;
+                                        double sum_x, sum_y;
+                                        double curData;
+                                        for(size_t i=0; i<current_blockcount_x; i++){
+                                                sum_x = 0;
+                                                for(size_t j=0; j<current_blockcount_y; j++){
+                                                        sum_y = 0;
+                                                        for(size_t k=0; k<current_blockcount_z; k++){
+                                                                curData = *cur_data_pos;
+                                                                // f += curData;
+                                                                // fx += curData * i;
+                                                                // fy += curData * j;
+                                                                // fz += curData * k;
+                                                                sum_y += curData;
+                                                                fz += curData * k;
+                                                                cur_data_pos ++;
+                                                        }
+                                                        fy += sum_y * j;
+                                                        sum_x += sum_y;
+                                                        cur_data_pos += dim1_offset - current_blockcount_z;
+                                                }
+                                                fx += sum_x * i;
+                                                f += sum_x;
+                                                cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                        }
+                                        double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
+                                        reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
+                                        reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
+                                        reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
+                                        reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
+                                }
+                                reg_params_pos ++;
+                        }
+                }
+        }
+        //Compress coefficient arrays
+        double precision_a, precision_b, precision_c, precision_d;
+        double rel_param_err = 0.025;
+        precision_a = rel_param_err * realPrecision / late_blockcount_x;
+        precision_b = rel_param_err * realPrecision / late_blockcount_y;
+        precision_c = rel_param_err * realPrecision / late_blockcount_z;
+        precision_d = rel_param_err * realPrecision;
+        if(exe_params->optQuantMode==1)
+        {
+                quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
+                if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
+                updateQuantizationInfo(quantization_intervals);
+        }
+        else{
+                quantization_intervals = exe_params->intvCapacity;
+        }
+        double mean = 0;
+        if(use_mean){
+                // compute mean
+                double sum = 0.0;
+                size_t mean_count = 0;
+                for(size_t i=0; i<num_elements; i++){
+                        if(fabs(oriData[i] - dense_pos) < realPrecision){
+                                sum += oriData[i];
+                                mean_count ++;
+                        }
+                }
+                if(mean_count > 0) mean = sum / mean_count;
+        }
+        double tmp_realPrecision = realPrecision;
+        // use two prediction buffers for higher performance
+        double * unpredictable_data = result_unpredictable_data;
+        unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char));
+        memset(indicator, 0, num_blocks * sizeof(unsigned char));
+        size_t reg_count = 0;
+        size_t strip_dim_0 = early_blockcount_x + 1;
+        size_t strip_dim_1 = r2 + 1;
+        size_t strip_dim_2 = r3 + 1;
+        size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
+        size_t strip_dim1_offset = strip_dim_2;
+        unsigned char * indicator_pos = indicator;
+        size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
+        double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_1, 0, prediction_buffer_size);
+        double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_2, 0, prediction_buffer_size);
+        double * cur_pb_buf = prediction_buffer_1;
+        double * next_pb_buf = prediction_buffer_2;
+        double * cur_pb_buf_pos;
+        double * next_pb_buf_pos;
+        int intvCapacity = exe_params->intvCapacity;
+        int intvRadius = exe_params->intvRadius;
+        int use_reg = 0;
+        double noise = realPrecision * 1.22;
+        reg_params_pos = reg_params;
+        // compress the regression coefficients on the fly
+        double last_coeffcients[4] = {0.0};
+        int coeff_intvCapacity_sz = 65536;
+        int coeff_intvRadius = coeff_intvCapacity_sz / 2;
+        int * coeff_type[4];
+        int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int));
+        double * coeff_unpred_data[4];
+        double * coeff_unpredictable_data = (double *) malloc(num_blocks*4*sizeof(double));
+        double precision[4];
+        precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
+        for(int i=0; i<4; i++){
+                coeff_type[i] = coeff_result_type + i * num_blocks;
+                coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
+        }
+        int coeff_index = 0;
+        unsigned int coeff_unpredictable_count[4] = {0};
+        if(use_mean){
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
+                                type_offset = offset_x * dim0_offset +  offset_y * current_blockcount_x * dim1_offset;
+                                type = result_type + type_offset;
+                                // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
+                                cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
+                                next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
+                                size_t current_blockcount_z;
+                                double * pb_pos = cur_pb_buf_pos;
+                                double * next_pb_pos = next_pb_buf_pos;
+                                size_t strip_unpredictable_count = 0;
+                                for(size_t k=0; k<num_z; k++){
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        /*sampling and decide which predictor*/
+                                        {
+                                                // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
+                                                double * cur_data_pos;
+                                                double curData;
+                                                double pred_reg, pred_sz;
+                                                double err_sz = 0.0, err_reg = 0.0;
+                                                int bmi = 0;
+                                                if(i>0 && j>0 && k>0){
+                                                        for(int i=0; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                else{
+                                                        for(int i=1; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                use_reg = (err_reg < err_sz);
+                                        }
+                                        if(use_reg){
+                                                {
+                                                        /*predict coefficients in current block via previous reg_block*/
+                                                        double cur_coeff;
+                                                        double diff, itvNum;
+                                                        for(int e=0; e<4; e++){
+                                                                cur_coeff = reg_params_pos[e*num_blocks];
+                                                                diff = cur_coeff - last_coeffcients[e];
+                                                                itvNum = fabs(diff)/precision[e] + 1;
+                                                                if (itvNum < coeff_intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                        last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                                coeff_type[e][coeff_index] = 0;
+                                                                                last_coeffcients[e] = cur_coeff;
+                                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                double curData;
+                                                double pred;
+                                                double itvNum;
+                                                double diff;
+                                                size_t index = 0;
+                                                size_t block_unpredictable_count = 0;
+                                                double * cur_data_pos = data_pos;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        size_t ii = current_blockcount_x - 1;
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        // assign value to next prediction buffer
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = pred;
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                unpredictable_count = block_unpredictable_count;
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                reg_count ++;
+                                        }
+                                        else{
+                                                // use SZ
+                                                // SZ predication
+                                                unpredictable_count = 0;
+                                                double * cur_pb_pos = pb_pos;
+                                                double * cur_data_pos = data_pos;
+                                                double curData;
+                                                double pred3D;
+                                                double itvNum, diff;
+                                                size_t index = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                                type[index] = intvRadius;
+                                                                                *cur_pb_pos = mean;
+                                                                        }
+                                                                        else
+                                                                        {
+                                                                                pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                                 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                                diff = curData - pred3D;
+                                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                                if (itvNum < intvCapacity_sz){
+                                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                        *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                                type[index] = 0;
+                                                                                                *cur_pb_pos = curData;
+                                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                                        }
+                                                                                }
+                                                                                else{
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                                type[index] = intvRadius;
+                                                                                *cur_pb_pos = mean;
+                                                                        }
+                                                                        else
+                                                                        {
+                                                                                pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                                 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                                diff = curData - pred3D;
+                                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                                if (itvNum < intvCapacity_sz){
+                                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                        *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                                type[index] = 0;
+                                                                                                *cur_pb_pos = curData;
+                                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                                        }
+                                                                                }
+                                                                                else{
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                // change indicator
+                                                indicator_pos[k] = 1;
+                                        }// end SZ
+                                        reg_params_pos ++;
+                                        data_pos += current_blockcount_z;
+                                        pb_pos += current_blockcount_z;
+                                        next_pb_pos += current_blockcount_z;
+                                        type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                } // end k
+                                if(strip_unpredictable_count > max_unpred_count){
+                                        max_unpred_count = strip_unpredictable_count;
+                                }
+                                total_unpred += strip_unpredictable_count;
+                                indicator_pos += num_z;
+                        }// end j
+                        double * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }// end i
+        }
+        else{
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
+                                // copy bottom plane from plane buffer
+                                // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double));
+                                type_offset = offset_x * dim0_offset +  offset_y * current_blockcount_x * dim1_offset;
+                                type = result_type + type_offset;
+                                // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
+                                cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
+                                next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
+                                size_t current_blockcount_z;
+                                double * pb_pos = cur_pb_buf_pos;
+                                double * next_pb_pos = next_pb_buf_pos;
+                                size_t strip_unpredictable_count = 0;
+                                for(size_t k=0; k<num_z; k++){
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        /*sampling*/
+                                        {
+                                                // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
+                                                double * cur_data_pos;
+                                                double curData;
+                                                double pred_reg, pred_sz;
+                                                double err_sz = 0.0, err_reg = 0.0;
+                                                int bmi;
+                                                if(i>0 && j>0 && k>0){
+                                                        for(int i=0; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                else{
+                                                        for(int i=1; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                use_reg = (err_reg < err_sz);
+                                        }
+                                        if(use_reg)
+                                        {
+                                                {
+                                                        /*predict coefficients in current block via previous reg_block*/
+                                                        double cur_coeff;
+                                                        double diff, itvNum;
+                                                        for(int e=0; e<4; e++){
+                                                                cur_coeff = reg_params_pos[e*num_blocks];
+                                                                diff = cur_coeff - last_coeffcients[e];
+                                                                itvNum = fabs(diff)/precision[e] + 1;
+                                                                if (itvNum < coeff_intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                        last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                                coeff_type[e][coeff_index] = 0;
+                                                                                last_coeffcients[e] = cur_coeff;
+                                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                double curData;
+                                                double pred;
+                                                double itvNum;
+                                                double diff;
+                                                size_t index = 0;
+                                                size_t block_unpredictable_count = 0;
+                                                double * cur_data_pos = data_pos;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        size_t ii = current_blockcount_x - 1;
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        // assign value to next prediction buffer
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = pred;
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                unpredictable_count = block_unpredictable_count;
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                reg_count ++;
+                                        }
+                                        else{
+                                                // use SZ
+                                                // SZ predication
+                                                unpredictable_count = 0;
+                                                double * cur_pb_pos = pb_pos;
+                                                double * cur_data_pos = data_pos;
+                                                double curData;
+                                                double pred3D;
+                                                double itvNum, diff;
+                                                size_t index = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                         - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                        diff = curData - pred3D;
+                                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                                        if (itvNum < intvCapacity_sz){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                         - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                        diff = curData - pred3D;
+                                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                                        if (itvNum < intvCapacity_sz){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+                                                                        // assign value to next prediction buffer
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                // change indicator
+                                                indicator_pos[k] = 1;
+                                        }// end SZ
+                                        reg_params_pos ++;
+                                        data_pos += current_blockcount_z;
+                                        pb_pos += current_blockcount_z;
+                                        next_pb_pos += current_blockcount_z;
+                                        type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                }
+                                if(strip_unpredictable_count > max_unpred_count){
+                                        max_unpred_count = strip_unpredictable_count;
+                                }
+                                total_unpred += strip_unpredictable_count;
+                                indicator_pos += num_z;
+                        }
+                        double * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }
+        }
+        free(prediction_buffer_1);
+        free(prediction_buffer_2);
+        int stateNum = 2*quantization_intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        size_t nodeCount = 0;
+        init(huffmanTree, result_type, num_elements);
+        size_t i = 0;
+        for (i = 0; i < huffmanTree->stateNum; i++)
+                if (huffmanTree->code[i]) nodeCount++;
+        nodeCount = nodeCount*2-1;
+        unsigned char *treeBytes;
+        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+        unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
+        // total size                                                                           metadata                  # elements     real precision         intervals       nodeCount               huffman                 block index                                             unpredicatable count                                            mean                                            unpred size                             elements
+        unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
+        unsigned char * result_pos = result;
+        initRandomAccessBytes(result_pos);
+        result_pos += meta_data_offset;
+        sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
+        result_pos += exe_params->SZ_SIZE_TYPE;
+        intToBytes_bigEndian(result_pos, block_size);
+        result_pos += sizeof(int);
+        doubleToBytes(result_pos, realPrecision);
+        result_pos += sizeof(double);
+        intToBytes_bigEndian(result_pos, quantization_intervals);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, treeByteSize);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, nodeCount);
+        result_pos += sizeof(int);
+        memcpy(result_pos, treeBytes, treeByteSize);
+        result_pos += treeByteSize;
+        free(treeBytes);
+        memcpy(result_pos, &use_mean, sizeof(unsigned char));
+        result_pos += sizeof(unsigned char);
+        memcpy(result_pos, &mean, sizeof(double));
+        result_pos += sizeof(double);
+        size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
+        result_pos += indicator_size;
+        //convert the lead/mid/resi to byte stream
+        if(reg_count > 0){
+                for(int e=0; e<4; e++){
+                        int stateNum = 2*coeff_intvCapacity_sz;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        size_t nodeCount = 0;
+                        init(huffmanTree, coeff_type[e], reg_count);
+                        size_t i = 0;
+                        for (i = 0; i < huffmanTree->stateNum; i++)
+                                if (huffmanTree->code[i]) nodeCount++;
+                        nodeCount = nodeCount*2-1;
+                        unsigned char *treeBytes;
+                        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+                        doubleToBytes(result_pos, precision[e]);
+                        result_pos += sizeof(double);
+                        intToBytes_bigEndian(result_pos, coeff_intvRadius);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, treeByteSize);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, nodeCount);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, treeBytes, treeByteSize);
+                        result_pos += treeByteSize;
+                        free(treeBytes);
+                        size_t typeArray_size = 0;
+                        encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
+                        sizeToBytes(result_pos, typeArray_size);
+                        result_pos += sizeof(size_t) + typeArray_size;
+                        intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
+                        result_pos += coeff_unpredictable_count[e]*sizeof(double);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        free(coeff_result_type);
+        free(coeff_unpredictable_data);
+        //record the number of unpredictable data and also store them
+        memcpy(result_pos, &total_unpred, sizeof(size_t));
+        result_pos += sizeof(size_t);
+        memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
+        result_pos += total_unpred * sizeof(double);
+        size_t typeArray_size = 0;
+        encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
+        result_pos += typeArray_size;
+        size_t totalEncodeSize = result_pos - result;
+        free(indicator);
+        free(result_unpredictable_data);
+        free(result_type);
+        free(reg_params);
+        SZ_ReleaseHuffman(huffmanTree);
+        *comp_size = totalEncodeSize;
+        return result;
+}

TabularUnified thirdparty/SZ/sz/src/sz_double_pwr.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "zlib.h"
 #include "rw.h"
+#include "utility.h"
 void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, double* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision)
 …
         free_TightDataPointStorageD(tdps);
+}
+#include <stdbool.h>
+void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, double min, double max){
+        double * log_data = (double *) malloc(dataLength * sizeof(double));
+        unsigned char * signs = (unsigned char *) malloc(dataLength);
+        memset(signs, 0, dataLength);
+        // preprocess
+        double max_abs_log_data;
+    if(min == 0) max_abs_log_data = fabs(log2(fabs(max)));
+    else if(max == 0) max_abs_log_data = fabs(log2(fabs(min)));
+    else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max)));
+    double min_log_data = max_abs_log_data;
+        bool positive = true;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] < 0){
+                        signs[i] = 1;
+                        log_data[i] = -oriData[i];
+                        positive = false;
+                }
+                else
+                        log_data[i] = oriData[i];
+                if(log_data[i] > 0){
+                        log_data[i] = log2(log_data[i]);
+                        if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i];
+                        if(log_data[i] < min_log_data) min_log_data = log_data[i];
+                }
+        }
+        double valueRangeSize, medianValue_f;
+        computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);
+        if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
+        double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] == 0){
+                        log_data[i] = min_log_data - 2.0001*realPrecision;
+                }
+        }
+    TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ(log_data, dataLength, realPrecision, valueRangeSize, medianValue_f);
+    tdps->minLogValue = min_log_data - 1.0001*realPrecision;
+    free(log_data);
+    if(!positive){
+            unsigned char * comp_signs;
+                // compress signs
+                unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs);
+                tdps->pwrErrBoundBytes = comp_signs;
+                tdps->pwrErrBoundBytes_size = signSize;
+        }
+        else{
+                tdps->pwrErrBoundBytes = NULL;
+                tdps->pwrErrBoundBytes_size = 0;
+        }
+        free(signs);
+    convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
+    if(*outSize>dataLength*sizeof(double))
+            SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
+    free_TightDataPointStorageD(tdps);
+}
+void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, double min, double max){
+        size_t dataLength = r1 * r2;
+        double * log_data = (double *) malloc(dataLength * sizeof(double));
+        unsigned char * signs = (unsigned char *) malloc(dataLength);
+        memset(signs, 0, dataLength);
+        // preprocess
+        double max_abs_log_data;
+    if(min == 0) max_abs_log_data = fabs(log2(fabs(max)));
+    else if(max == 0) max_abs_log_data = fabs(log2(fabs(min)));
+    else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max)));
+    double min_log_data = max_abs_log_data;
+        bool positive = true;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] < 0){
+                        signs[i] = 1;
+                        log_data[i] = -oriData[i];
+                        positive = false;
+                }
+                else
+                        log_data[i] = oriData[i];
+                if(log_data[i] > 0){
+                        log_data[i] = log2(log_data[i]);
+                        if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i];
+                        if(log_data[i] < min_log_data) min_log_data = log_data[i];
+                }
+        }
+        double valueRangeSize, medianValue_f;
+        computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);
+        if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
+        double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] == 0){
+                        log_data[i] = min_log_data - 2.0001*realPrecision;
+                }
+        }
+    TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ(log_data, r1, r2, realPrecision, valueRangeSize, medianValue_f);
+    tdps->minLogValue = min_log_data - 1.0001*realPrecision;
+    free(log_data);
+    if(!positive){
+            unsigned char * comp_signs;
+                // compress signs
+                unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs);
+                tdps->pwrErrBoundBytes = comp_signs;
+                tdps->pwrErrBoundBytes_size = signSize;
+        }
+        else{
+                tdps->pwrErrBoundBytes = NULL;
+                tdps->pwrErrBoundBytes_size = 0;
+        }
+        free(signs);
+    convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
+    if(*outSize>dataLength*sizeof(double))
+            SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
+    free_TightDataPointStorageD(tdps);
+}
+void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, double *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max){
+        size_t dataLength = r1 * r2 * r3;
+        double * log_data = (double *) malloc(dataLength * sizeof(double));
+        unsigned char * signs = (unsigned char *) malloc(dataLength);
+        memset(signs, 0, dataLength);
+        // preprocess
+        double max_abs_log_data;
+    if(min == 0) max_abs_log_data = fabs(log2(fabs(max)));
+    else if(max == 0) max_abs_log_data = fabs(log2(fabs(min)));
+    else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max)));
+    double min_log_data = max_abs_log_data;
+        bool positive = true;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] < 0){
+                        signs[i] = 1;
+                        log_data[i] = -oriData[i];
+                        positive = false;
+                }
+                else
+                        log_data[i] = oriData[i];
+                if(log_data[i] > 0){
+                        log_data[i] = log2(log_data[i]);
+                        if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i];
+                        if(log_data[i] < min_log_data) min_log_data = log_data[i];
+                }
+        }
+        double valueRangeSize, medianValue_f;
+        computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);
+        if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
+        double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] == 0){
+                        log_data[i] = min_log_data - 2.0001*realPrecision;
+                }
+        }
+    TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ(log_data, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
+    tdps->minLogValue = min_log_data - 1.0001*realPrecision;
+    free(log_data);
+    if(!positive){
+            unsigned char * comp_signs;
+                // compress signs
+                unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs);
+                tdps->pwrErrBoundBytes = comp_signs;
+                tdps->pwrErrBoundBytes_size = signSize;
+        }
+        else{
+                tdps->pwrErrBoundBytes = NULL;
+                tdps->pwrErrBoundBytes_size = 0;
+        }
+        free(signs);
+    convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
+    if(*outSize>dataLength*sizeof(double))
+            SZ_compress_args_double_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
+    free_TightDataPointStorageD(tdps);
+}

TabularUnified thirdparty/SZ/sz/src/sz_double_ts.c ¶

r2c47b73	r9ee2ce3
67	67	double realPrecision, double valueRangeSize, double medianValue_d)
68	68	{
69		double* preStepData = (double*)(multisteps->hist_data);
	69	double* preStepData = (double*)(multisteps->hist_data);
70	70	//store the decompressed data
71	71	double* decData = (double)malloc(sizeof(double)dataLength);

TabularUnified thirdparty/SZ/sz/src/sz_float.c ¶

-                      r2c47b73
+                      r9ee2ce3
 /**
  *  @file sz_float.c
  *  @author Sheng Di and Dingwen Tao
+ *  @author Sheng Di, Dingwen Tao, Xin Liang
  *  @date Aug, 2016
  *  @brief SZ_Init, Compression and Decompression functions
 …
 #include "rw.h"
 #include "sz_float_ts.h"
+#include "utility.h"
 unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = fabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
+                }
                 else
+                {
+                        tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
+                {
+                        if(sz_with_regression == SZ_NO_REGRESSION)
+                                tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
+                        else
+                                *newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
                         compressionType = 0; //snapshot-based compression
                         multisteps->lastSnapshotStep = timestep;
 …
                 tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
         convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
         if(*outSize>dataLength*sizeof(float))
                 SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
         free_TightDataPointStorageF(tdps);
+        if(tdps!=NULL)
+        {
+                convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
+                if(*outSize>dataLength*sizeof(float))
+                        SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
+                free_TightDataPointStorageF(tdps);
+        }
         return compressionType;
+}
 …
                         if(errBoundMode>=PW_REL)
+                        {
                                 //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, realPrecision, r1, outSize, min, max);
                                 SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
+                                SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
+                                //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
+                        }
                         else
 …
+                {
                         if(errBoundMode>=PW_REL)
                                 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr(newByteData, oriData, realPrecision, r2, r1, outSize, min, max);
+                                SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max);
                         else
                                 SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
 …
+                {
                         if(errBoundMode>=PW_REL)
                                 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max);
+                                SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max);
                         else
                                 SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
 …
+                {
                         if(errBoundMode>=PW_REL)
                                 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max);
+                                SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max);
                         else
                                 SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
 …
                         if(confparams_cpr->errorBoundMode>=PW_REL)
+                        {
+                                //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(&tmpByteData, oriData, realPrecision, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio,
+                                valueRangeSize, medianValue, &tmpOutSize);
+                                SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
+                                //SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
+                        }
                         else
 …
+                {
                         if(confparams_cpr->errorBoundMode>=PW_REL)
                                 SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr(&tmpByteData, oriData, realPrecision, r2, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
                         else
 #ifdef HAVE_TIMECMPR
 …
                                 else
 #endif
+                                        SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                {
+                                        if(sz_with_regression == SZ_NO_REGRESSION)
+                                                SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                        else
+                                                tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
+                                }
+                }
                 else
 …
+                {
                         if(confparams_cpr->errorBoundMode>=PW_REL)
                                 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r3, r2, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
                         else
 #ifdef HAVE_TIMECMPR
                                 if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
                                         multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                                multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
                                 else
 #endif
+                                        SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                {
+                                        if(sz_with_regression == SZ_NO_REGRESSION)
+                                                SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                        else
+                                                tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
+                                }
+                }
                 else
 …
+                {
                         if(confparams_cpr->errorBoundMode>=PW_REL)
                                 SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(&tmpByteData, oriData, realPrecision, r4*r3, r2, r1, &tmpOutSize, min, max);
+                                SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
                                 //ToDO
                                 //SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr(&tmpByteData, oriData, r4, r3, r2, r1, &tmpOutSize, min, max);
 …
                                 else
 #endif
+                                        SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                {
+                                        if(sz_with_regression == SZ_NO_REGRESSION)
+                                                SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
+                                        else
+                                                tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
+                                }
+                }
                 else
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION || confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }
 …
         size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
         memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
         size_t totalSampleSize = 0;//(r1-1)*(r2-1)*(r3-1)/confparams_cpr->sampleDistance;
+        size_t totalSampleSize = 0;
         size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
 …
+                {
                         radiusIndex = confparams_cpr->maxRangeRadius - 1;
-                        //printf("radiusIndex=%d\n", radiusIndex);
+                }
                 intervals[radiusIndex]++;
-                // printf("TEST: %ld, i: %ld\tj: %ld\tk: %ld\n", data_pos - oriData);
-                // fflush(stdout);
                 offset_count += confparams_cpr->sampleDistance;
                 if(offset_count >= r3){
 …
                 else data_pos += confparams_cpr->sampleDistance;
+        }
-        // printf("sample_count: %ld\n", sample_count);
-        // fflush(stdout);
-        // if(*max_freq < 0.15) *max_freq *= 2;
         //compute the appropriate number
         size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
 …
                 powerOf2 = 32;
         free(intervals);
-        //printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
         return powerOf2;
+}
 …
         size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
         memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
+        size_t totalSampleSize = 0;//(r1-1)*(r2-1)/confparams_cpr->sampleDistance;
+        //float max = oriData[0];
+        //float min = oriData[0];
+        size_t totalSampleSize = 0;
         size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
 …
         while(data_pos - oriData < dataLength){
                 totalSampleSize++;
-                //pred_value = 2*data_pos[-1] - data_pos[-2];
                 pred_value = data_pos[-1];
                 pred_err = fabs(pred_value - *data_pos);
 …
         free(intervals);
-        //printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
         return powerOf2;
+}
 …
+}
+/*The above code is for sz 1.4.13; the following code is for sz 2.0*/
+unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq)
+{
+        float mean = 0.0;
+        size_t len = r1 * r2;
+        size_t mean_distance = (int) (sqrt(len));
+        float * data_pos = oriData;
+        size_t mean_count = 0;
+        while(data_pos - oriData < len){
+                mean += *data_pos;
+                mean_count ++;
+                data_pos += mean_distance;
+        }
+        if(mean_count > 0) mean /= mean_count;
+        size_t range = 8192;
+        size_t radius = 4096;
+        size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t));
+        memset(freq_intervals, 0, range*sizeof(size_t));
+        unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
+        int sampleDistance = confparams_cpr->sampleDistance;
+        float predThreshold = confparams_cpr->predThreshold;
+        size_t i;
+        size_t radiusIndex;
+        float pred_value = 0, pred_err;
+        size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t));
+        memset(intervals, 0, maxRangeRadius*sizeof(size_t));
+        float mean_diff;
+        ptrdiff_t freq_index;
+        size_t freq_count = 0;
+        size_t n1_count = 1;
+        size_t offset_count = sampleDistance - 1;
+        size_t offset_count_2 = 0;
+        size_t sample_count = 0;
+        data_pos = oriData + r2 + offset_count;
+        while(data_pos - oriData < len){
+                pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
+                pred_err = fabs(pred_value - *data_pos);
+                if(pred_err < realPrecision) freq_count ++;
+                radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
+                if(radiusIndex>=maxRangeRadius)
+                        radiusIndex = maxRangeRadius - 1;
+                intervals[radiusIndex]++;
+                mean_diff = *data_pos - mean;
+                if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
+                else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
+                if(freq_index <= 0){
+                        freq_intervals[0] ++;
+                }
+                else if(freq_index >= range){
+                        freq_intervals[range - 1] ++;
+                }
+                else{
+                        freq_intervals[freq_index] ++;
+                }
+                offset_count += sampleDistance;
+                if(offset_count >= r2){
+                        n1_count ++;
+                        offset_count_2 = n1_count % sampleDistance;
+                        data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
+                        offset_count = (sampleDistance - offset_count_2);
+                        if(offset_count == 0) offset_count ++;
+                }
+                else data_pos += sampleDistance;
+                sample_count ++;
+        }
+        *max_freq = freq_count * 1.0/ sample_count;
+        //compute the appropriate number
+        size_t targetCount = sample_count*predThreshold;
+        size_t sum = 0;
+        for(i=0;i<maxRangeRadius;i++)
+        {
+                sum += intervals[i];
+                if(sum>targetCount)
+                        break;
+        }
+        if(i>=maxRangeRadius)
+                i = maxRangeRadius-1;
+        unsigned int accIntervals = 2*(i+1);
+        unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
+        if(powerOf2<32)
+                powerOf2 = 32;
+        // collect frequency
+        size_t max_sum = 0;
+        size_t max_index = 0;
+        size_t tmp_sum;
+        size_t * freq_pos = freq_intervals + 1;
+        for(size_t i=1; i<range-2; i++){
+                tmp_sum = freq_pos[0] + freq_pos[1];
+                if(tmp_sum > max_sum){
+                        max_sum = tmp_sum;
+                        max_index = i;
+                }
+                freq_pos ++;
+        }
+        *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius);
+        *mean_freq = max_sum * 1.0 / sample_count;
+        free(freq_intervals);
+        free(intervals);
+        return powerOf2;
+}
+// 2D:  modified for higher performance
+#define MIN(a, b) a<b? a : b
+unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){
+        unsigned int quantization_intervals;
+        float sz_sample_correct_freq = -1;//0.5; //-1
+        float dense_pos;
+        float mean_flush_freq;
+        unsigned char use_mean = 0;
+        if(exe_params->optQuantMode==1)
+        {
+                quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
+                if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
+                updateQuantizationInfo(quantization_intervals);
+        }
+        else{
+                quantization_intervals = exe_params->intvCapacity;
+        }
+        // calculate block dims
+        size_t num_x, num_y;
+        size_t block_size = 16;
+        SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        size_t split_index_x, split_index_y;
+        size_t early_blockcount_x, early_blockcount_y;
+        size_t late_blockcount_x, late_blockcount_y;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
+        size_t num_blocks = num_x * num_y;
+        size_t num_elements = r1 * r2;
+        size_t dim0_offset = r2;
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        size_t unpred_data_max_size = max_num_block_elements;
+        float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks);
+        size_t total_unpred = 0;
+        size_t unpredictable_count;
+        float * data_pos = oriData;
+        int * type = result_type;
+        size_t offset_x, offset_y;
+        size_t current_blockcount_x, current_blockcount_y;
+        float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float));
+        float * reg_params_pos = reg_params;
+        // move regression part out
+        size_t params_offset_b = num_blocks;
+        size_t params_offset_c = 2*num_blocks;
+        for(size_t i=0; i<num_x; i++){
+                for(size_t j=0; j<num_y; j++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                        data_pos = oriData + offset_x * dim0_offset + offset_y;
+                        {
+                                float * cur_data_pos = data_pos;
+                                float fx = 0.0;
+                                float fy = 0.0;
+                                float f = 0;
+                                double sum_x;
+                                float curData;
+                                for(size_t i=0; i<current_blockcount_x; i++){
+                                        sum_x = 0;
+                                        for(size_t j=0; j<current_blockcount_y; j++){
+                                                curData = *cur_data_pos;
+                                                sum_x += curData;
+                                                fy += curData * j;
+                                                cur_data_pos ++;
+                                        }
+                                        fx += sum_x * i;
+                                        f += sum_x;
+                                        cur_data_pos += dim0_offset - current_blockcount_y;
+                                }
+                                float coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
+                                reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
+                                reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
+                                reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
+                        }
+                        reg_params_pos ++;
+                }
+        }
+        //Compress coefficient arrays
+        double precision_a, precision_b, precision_c;
+        float rel_param_err = 0.15/3;
+        precision_a = rel_param_err * realPrecision / late_blockcount_x;
+        precision_b = rel_param_err * realPrecision / late_blockcount_y;
+        precision_c = rel_param_err * realPrecision;
+        float mean = 0;
+        use_mean = 0;
+        if(use_mean){
+                // compute mean
+                double sum = 0.0;
+                size_t mean_count = 0;
+                for(size_t i=0; i<num_elements; i++){
+                        if(fabs(oriData[i] - dense_pos) < realPrecision){
+                                sum += oriData[i];
+                                mean_count ++;
+                        }
+                }
+                if(mean_count > 0) mean = sum / mean_count;
+        }
+        double tmp_realPrecision = realPrecision;
+        // use two prediction buffers for higher performance
+        float * unpredictable_data = result_unpredictable_data;
+        unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char));
+        memset(indicator, 0, num_blocks * sizeof(unsigned char));
+        size_t reg_count = 0;
+        size_t strip_dim_0 = early_blockcount_x + 1;
+        size_t strip_dim_1 = r2 + 1;
+        size_t strip_dim0_offset = strip_dim_1;
+        unsigned char * indicator_pos = indicator;
+        size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
+        float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_1, 0, prediction_buffer_size);
+        float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_2, 0, prediction_buffer_size);
+        float * cur_pb_buf = prediction_buffer_1;
+        float * next_pb_buf = prediction_buffer_2;
+        float * cur_pb_buf_pos;
+        float * next_pb_buf_pos;
+        int intvCapacity = exe_params->intvCapacity;
+        int intvRadius = exe_params->intvRadius;
+        int use_reg = 0;
+        reg_params_pos = reg_params;
+        // compress the regression coefficients on the fly
+        float last_coeffcients[3] = {0.0};
+        int coeff_intvCapacity_sz = 65536;
+        int coeff_intvRadius = coeff_intvCapacity_sz / 2;
+        int * coeff_type[3];
+        int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int));
+        float * coeff_unpred_data[3];
+        float * coeff_unpredictable_data = (float *) malloc(num_blocks*3*sizeof(float));
+        double precision[3];
+        precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
+        for(int i=0; i<3; i++){
+                coeff_type[i] = coeff_result_type + i * num_blocks;
+                coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
+        }
+        int coeff_index = 0;
+        unsigned int coeff_unpredictable_count[3] = {0};
+        if(use_mean){
+                type = result_type;
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        data_pos = oriData + offset_x * dim0_offset;
+                        cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
+                        next_pb_buf_pos = next_pb_buf + 1;
+                        float * pb_pos = cur_pb_buf_pos;
+                        float * next_pb_pos = next_pb_buf_pos;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                /*sampling: decide which predictor to use (regression or lorenzo)*/
+                                {
+                                        float * cur_data_pos;
+                                        float curData;
+                                        float pred_reg, pred_sz;
+                                        float err_sz = 0.0, err_reg = 0.0;
+                                        // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
+                                        // [1, 9] [3, 7]                [7, 3] [9, 1]
+                                        int count = 0;
+                                        for(int i=1; i<current_blockcount_x; i+=2){
+                                                cur_data_pos = data_pos + i * dim0_offset + i;
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
+                                                err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
+                                                err_reg += fabs(pred_reg - curData);
+                                                cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
+                                                err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
+                                                err_reg += fabs(pred_reg - curData);
+                                                count += 2;
+                                        }
+                                        use_reg = (err_reg < err_sz);
+                                }
+                                if(use_reg)
+                                {
+                                        {
+                                                /*predict coefficients in current block via previous reg_block*/
+                                                float cur_coeff;
+                                                double diff, itvNum;
+                                                for(int e=0; e<3; e++){
+                                                        cur_coeff = reg_params_pos[e*num_blocks];
+                                                        diff = cur_coeff - last_coeffcients[e];
+                                                        itvNum = fabs(diff)/precision[e] + 1;
+                                                        if (itvNum < coeff_intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        else{
+                                                                coeff_type[e][coeff_index] = 0;
+                                                                last_coeffcients[e] = cur_coeff;
+                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        float curData;
+                                        float pred;
+                                        double itvNum;
+                                        double diff;
+                                        size_t index = 0;
+                                        size_t block_unpredictable_count = 0;
+                                        float * cur_data_pos = data_pos;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                size_t ii = current_blockcount_x - 1;
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        } // end ii == -1
+                                        unpredictable_count = block_unpredictable_count;
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        reg_count ++;
+                                }// end use_reg
+                                else{
+                                        // use SZ
+                                        // SZ predication
+                                        unpredictable_count = 0;
+                                        float * cur_pb_pos = pb_pos;
+                                        float * cur_data_pos = data_pos;
+                                        float curData;
+                                        float pred2D;
+                                        double itvNum, diff;
+                                        size_t index = 0;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                type[index] = intvRadius;
+                                                                *cur_pb_pos = mean;
+                                                        }
+                                                        else
+                                                        {
+                                                                pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                                diff = curData - pred2D;
+                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                if (itvNum < intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                        *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_pb_pos += strip_dim0_offset - current_blockcount_y;
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                // ii == current_blockcount_x - 1
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                type[index] = intvRadius;
+                                                                *cur_pb_pos = mean;
+                                                        }
+                                                        else
+                                                        {
+                                                                pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                                diff = curData - pred2D;
+                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                if (itvNum < intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                        *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        next_pb_pos[jj] = *cur_pb_pos;
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        }
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        // change indicator
+                                        indicator_pos[j] = 1;
+                                }// end SZ
+                                reg_params_pos ++;
+                                data_pos += current_blockcount_y;
+                                pb_pos += current_blockcount_y;
+                                next_pb_pos += current_blockcount_y;
+                                type += current_blockcount_x * current_blockcount_y;
+                        }// end j
+                        indicator_pos += num_y;
+                        float * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }// end i
+        }// end use mean
+        else{
+                type = result_type;
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        data_pos = oriData + offset_x * dim0_offset;
+                        cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
+                        next_pb_buf_pos = next_pb_buf + 1;
+                        float * pb_pos = cur_pb_buf_pos;
+                        float * next_pb_pos = next_pb_buf_pos;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                /*sampling*/
+                                {
+                                        // sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
+                                        float * cur_data_pos;
+                                        float curData;
+                                        float pred_reg, pred_sz;
+                                        float err_sz = 0.0, err_reg = 0.0;
+                                        // [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
+                                        // [1, 9] [3, 7]                [7, 3] [9, 1]
+                                        int count = 0;
+                                        for(int i=1; i<current_blockcount_x; i+=2){
+                                                cur_data_pos = data_pos + i * dim0_offset + i;
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
+                                                err_sz += fabs(pred_sz - curData);
+                                                err_reg += fabs(pred_reg - curData);
+                                                cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
+                                                curData = *cur_data_pos;
+                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
+                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
+                                                err_sz += fabs(pred_sz - curData);
+                                                err_reg += fabs(pred_reg - curData);
+                                                count += 2;
+                                        }
+                                        err_sz += realPrecision * count * 0.81;
+                                        use_reg = (err_reg < err_sz);
+                                }
+                                if(use_reg)
+                                {
+                                        {
+                                                /*predict coefficients in current block via previous reg_block*/
+                                                float cur_coeff;
+                                                double diff, itvNum;
+                                                for(int e=0; e<3; e++){
+                                                        cur_coeff = reg_params_pos[e*num_blocks];
+                                                        diff = cur_coeff - last_coeffcients[e];
+                                                        itvNum = fabs(diff)/precision[e] + 1;
+                                                        if (itvNum < coeff_intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        else{
+                                                                coeff_type[e][coeff_index] = 0;
+                                                                last_coeffcients[e] = cur_coeff;
+                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        float curData;
+                                        float pred;
+                                        double itvNum;
+                                        double diff;
+                                        size_t index = 0;
+                                        size_t block_unpredictable_count = 0;
+                                        float * cur_data_pos = data_pos;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        // jj == current_blockcount_y - 1
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                size_t ii = current_blockcount_x - 1;
+                                                for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                /*dealing with the last jj (boundary)*/
+                                                {
+                                                        // jj == current_blockcount_y - 1
+                                                        size_t jj = current_blockcount_y - 1;
+                                                        curData = *cur_data_pos;
+                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
+                                                        diff = curData - pred;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - pred)>realPrecision){
+                                                                        type[index] = 0;
+                                                                        pred = curData;
+                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                pred = curData;
+                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                        }
+                                                        // assign value to block surfaces
+                                                        pb_pos[ii * strip_dim0_offset + jj] = pred;
+                                                        // assign value to next prediction buffer
+                                                        next_pb_pos[jj] = pred;
+                                                        index ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        } // end ii == -1
+                                        unpredictable_count = block_unpredictable_count;
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        reg_count ++;
+                                }// end use_reg
+                                else{
+                                        // use SZ
+                                        // SZ predication
+                                        unpredictable_count = 0;
+                                        float * cur_pb_pos = pb_pos;
+                                        float * cur_data_pos = data_pos;
+                                        float curData;
+                                        float pred2D;
+                                        double itvNum, diff;
+                                        size_t index = 0;
+                                        for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                        diff = curData - pred2D;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                *cur_pb_pos = curData;
+                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                        }
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                                cur_pb_pos += strip_dim0_offset - current_blockcount_y;
+                                                cur_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        /*dealing with the last ii (boundary)*/
+                                        {
+                                                // ii == current_blockcount_x - 1
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        curData = *cur_data_pos;
+                                                        pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
+                                                        diff = curData - pred2D;
+                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                        if (itvNum < intvCapacity_sz){
+                                                                if (diff < 0) itvNum = -itvNum;
+                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                        type[index] = 0;
+                                                                        *cur_pb_pos = curData;
+                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                }
+                                                        }
+                                                        else{
+                                                                type[index] = 0;
+                                                                *cur_pb_pos = curData;
+                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                        }
+                                                        next_pb_pos[jj] = *cur_pb_pos;
+                                                        index ++;
+                                                        cur_pb_pos ++;
+                                                        cur_data_pos ++;
+                                                }
+                                        }
+                                        total_unpred += unpredictable_count;
+                                        unpredictable_data += unpredictable_count;
+                                        // change indicator
+                                        indicator_pos[j] = 1;
+                                }// end SZ
+                                reg_params_pos ++;
+                                data_pos += current_blockcount_y;
+                                pb_pos += current_blockcount_y;
+                                next_pb_pos += current_blockcount_y;
+                                type += current_blockcount_x * current_blockcount_y;
+                        }// end j
+                        indicator_pos += num_y;
+                        float * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }// end i
+        }
+        free(prediction_buffer_1);
+        free(prediction_buffer_2);
+        int stateNum = 2*quantization_intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        size_t nodeCount = 0;
+        size_t i = 0;
+        init(huffmanTree, result_type, num_elements);
+        for (i = 0; i < stateNum; i++)
+                if (huffmanTree->code[i]) nodeCount++;
+        nodeCount = nodeCount*2-1;
+        unsigned char *treeBytes;
+        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+        unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
+        // total size                                                                           metadata                  # elements   real precision           intervals       nodeCount               huffman                 block index                                             unpredicatable count                                            mean                                            unpred size                             elements
+        unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
+        unsigned char * result_pos = result;
+        initRandomAccessBytes(result_pos);
+        result_pos += meta_data_offset;
+        sizeToBytes(result_pos, num_elements);
+        result_pos += exe_params->SZ_SIZE_TYPE;
+        intToBytes_bigEndian(result_pos, block_size);
+        result_pos += sizeof(int);
+        doubleToBytes(result_pos, realPrecision);
+        result_pos += sizeof(double);
+        intToBytes_bigEndian(result_pos, quantization_intervals);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, treeByteSize);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, nodeCount);
+        result_pos += sizeof(int);
+        memcpy(result_pos, treeBytes, treeByteSize);
+        result_pos += treeByteSize;
+        free(treeBytes);
+        memcpy(result_pos, &use_mean, sizeof(unsigned char));
+        result_pos += sizeof(unsigned char);
+        memcpy(result_pos, &mean, sizeof(float));
+        result_pos += sizeof(float);
+        size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
+        result_pos += indicator_size;
+        //convert the lead/mid/resi to byte stream
+        if(reg_count>0){
+                for(int e=0; e<3; e++){
+                        int stateNum = 2*coeff_intvCapacity_sz;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        size_t nodeCount = 0;
+                        init(huffmanTree, coeff_type[e], reg_count);
+                        size_t i = 0;
+                        for (i = 0; i < huffmanTree->stateNum; i++)
+                                if (huffmanTree->code[i]) nodeCount++;
+                        nodeCount = nodeCount*2-1;
+                        unsigned char *treeBytes;
+                        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+                        doubleToBytes(result_pos, precision[e]);
+                        result_pos += sizeof(double);
+                        intToBytes_bigEndian(result_pos, coeff_intvRadius);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, treeByteSize);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, nodeCount);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, treeBytes, treeByteSize);
+                        result_pos += treeByteSize;
+                        free(treeBytes);
+                        size_t typeArray_size = 0;
+                        encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
+                        sizeToBytes(result_pos, typeArray_size);
+                        result_pos += sizeof(size_t) + typeArray_size;
+                        intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
+                        result_pos += coeff_unpredictable_count[e]*sizeof(float);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        free(coeff_result_type);
+        free(coeff_unpredictable_data);
+        //record the number of unpredictable data and also store them
+        memcpy(result_pos, &total_unpred, sizeof(size_t));
+        result_pos += sizeof(size_t);
+        memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
+        result_pos += total_unpred * sizeof(float);
+        size_t typeArray_size = 0;
+        encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
+        result_pos += typeArray_size;
+        size_t totalEncodeSize = result_pos - result;
+        free(indicator);
+        free(result_unpredictable_data);
+        free(result_type);
+        free(reg_params);
+        SZ_ReleaseHuffman(huffmanTree);
+        *comp_size = totalEncodeSize;
+        return result;
+}
+unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq)
+{
+        float mean = 0.0;
+        size_t len = r1 * r2 * r3;
+        size_t mean_distance = (int) (sqrt(len));
+        float * data_pos = oriData;
+        size_t offset_count = 0;
+        size_t offset_count_2 = 0;
+        size_t mean_count = 0;
+        while(data_pos - oriData < len){
+                mean += *data_pos;
+                mean_count ++;
+                data_pos += mean_distance;
+                offset_count += mean_distance;
+                offset_count_2 += mean_distance;
+                if(offset_count >= r3){
+                        offset_count = 0;
+                        data_pos -= 1;
+                }
+                if(offset_count_2 >= r2 * r3){
+                        offset_count_2 = 0;
+                        data_pos -= 1;
+                }
+        }
+        if(mean_count > 0) mean /= mean_count;
+        size_t range = 8192;
+        size_t radius = 4096;
+        size_t * freq_intervals = (size_t *) malloc(range*sizeof(size_t));
+        memset(freq_intervals, 0, range*sizeof(size_t));
+        unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
+        int sampleDistance = confparams_cpr->sampleDistance;
+        float predThreshold = confparams_cpr->predThreshold;
+        size_t i;
+        size_t radiusIndex;
+        size_t r23=r2*r3;
+        float pred_value = 0, pred_err;
+        size_t *intervals = (size_t*)malloc(maxRangeRadius*sizeof(size_t));
+        memset(intervals, 0, maxRangeRadius*sizeof(size_t));
+        float mean_diff;
+        ptrdiff_t freq_index;
+        size_t freq_count = 0;
+        size_t sample_count = 0;
+        offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
+        data_pos = oriData + r23 + r3 + offset_count;
+        size_t n1_count = 1, n2_count = 1; // count i,j sum
+        while(data_pos - oriData < len){
+                pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
+                pred_err = fabs(pred_value - *data_pos);
+                if(pred_err < realPrecision) freq_count ++;
+                radiusIndex = (pred_err/realPrecision+1)/2;
+                if(radiusIndex>=maxRangeRadius)
+                {
+                        radiusIndex = maxRangeRadius - 1;
+                }
+                intervals[radiusIndex]++;
+                mean_diff = *data_pos - mean;
+                if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
+                else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
+                if(freq_index <= 0){
+                        freq_intervals[0] ++;
+                }
+                else if(freq_index >= range){
+                        freq_intervals[range - 1] ++;
+                }
+                else{
+                        freq_intervals[freq_index] ++;
+                }
+                offset_count += sampleDistance;
+                if(offset_count >= r3){
+                        n2_count ++;
+                        if(n2_count == r2){
+                                n1_count ++;
+                                n2_count = 1;
+                                data_pos += r3;
+                        }
+                        offset_count_2 = (n1_count + n2_count) % sampleDistance;
+                        data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
+                        offset_count = (sampleDistance - offset_count_2);
+                        if(offset_count == 0) offset_count ++;
+                }
+                else data_pos += sampleDistance;
+                sample_count ++;
+        }
+        *max_freq = freq_count * 1.0/ sample_count;
+        //compute the appropriate number
+        size_t targetCount = sample_count*predThreshold;
+        size_t sum = 0;
+        for(i=0;i<maxRangeRadius;i++)
+        {
+                sum += intervals[i];
+                if(sum>targetCount)
+                        break;
+        }
+        if(i>=maxRangeRadius)
+                i = maxRangeRadius-1;
+        unsigned int accIntervals = 2*(i+1);
+        unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
+        if(powerOf2<32)
+                powerOf2 = 32;
+        // collect frequency
+        size_t max_sum = 0;
+        size_t max_index = 0;
+        size_t tmp_sum;
+        size_t * freq_pos = freq_intervals + 1;
+        for(size_t i=1; i<range-2; i++){
+                tmp_sum = freq_pos[0] + freq_pos[1];
+                if(tmp_sum > max_sum){
+                        max_sum = tmp_sum;
+                        max_index = i;
+                }
+                freq_pos ++;
+        }
+        *dense_pos = mean + realPrecision * (ptrdiff_t)(max_index + 1 - radius);
+        *mean_freq = max_sum * 1.0 / sample_count;
+        free(freq_intervals);
+        free(intervals);
+        return powerOf2;
+}
+// 3D:  modified for higher performance
+unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){
+#ifdef HAVE_TIMECMPR
+        float* decData = NULL;
+        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                decData = (float*)(multisteps->hist_data);
+#endif
+        unsigned int quantization_intervals;
+        float sz_sample_correct_freq = -1;//0.5; //-1
+        float dense_pos;
+        float mean_flush_freq;
+        unsigned char use_mean = 0;
+        // calculate block dims
+        size_t num_x, num_y, num_z;
+        size_t block_size = 6;
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
+        size_t split_index_x, split_index_y, split_index_z;
+        size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
+        size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
+        size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
+        size_t num_blocks = num_x * num_y * num_z;
+        size_t num_elements = r1 * r2 * r3;
+        size_t dim0_offset = r2 * r3;
+        size_t dim1_offset = r3;
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        size_t unpred_data_max_size = max_num_block_elements;
+        float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks);
+        size_t total_unpred = 0;
+        size_t unpredictable_count;
+        size_t max_unpred_count = 0;
+        float * data_pos = oriData;
+        int * type = result_type;
+        size_t type_offset;
+        size_t offset_x, offset_y, offset_z;
+        size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
+        float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float));
+        float * reg_params_pos = reg_params;
+        // move regression part out
+        size_t params_offset_b = num_blocks;
+        size_t params_offset_c = 2*num_blocks;
+        size_t params_offset_d = 3*num_blocks;
+        for(size_t i=0; i<num_x; i++){
+                for(size_t j=0; j<num_y; j++){
+                        for(size_t k=0; k<num_z; k++){
+                                current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                                /*Calculate regression coefficients*/
+                                {
+                                        float * cur_data_pos = data_pos;
+                                        float fx = 0.0;
+                                        float fy = 0.0;
+                                        float fz = 0.0;
+                                        float f = 0;
+                                        float sum_x, sum_y;
+                                        float curData;
+                                        for(size_t i=0; i<current_blockcount_x; i++){
+                                                sum_x = 0;
+                                                for(size_t j=0; j<current_blockcount_y; j++){
+                                                        sum_y = 0;
+                                                        for(size_t k=0; k<current_blockcount_z; k++){
+                                                                curData = *cur_data_pos;
+                                                                // f += curData;
+                                                                // fx += curData * i;
+                                                                // fy += curData * j;
+                                                                // fz += curData * k;
+                                                                sum_y += curData;
+                                                                fz += curData * k;
+                                                                cur_data_pos ++;
+                                                        }
+                                                        fy += sum_y * j;
+                                                        sum_x += sum_y;
+                                                        cur_data_pos += dim1_offset - current_blockcount_z;
+                                                }
+                                                fx += sum_x * i;
+                                                f += sum_x;
+                                                cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                        }
+                                        float coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
+                                        reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
+                                        reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
+                                        reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
+                                        reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
+                                }
+                                reg_params_pos ++;
+                        }
+                }
+        }
+        //Compress coefficient arrays
+        double precision_a, precision_b, precision_c, precision_d;
+        float rel_param_err = 0.025;
+        precision_a = rel_param_err * realPrecision / late_blockcount_x;
+        precision_b = rel_param_err * realPrecision / late_blockcount_y;
+        precision_c = rel_param_err * realPrecision / late_blockcount_z;
+        precision_d = rel_param_err * realPrecision;
+        if(exe_params->optQuantMode==1)
+        {
+                quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
+                if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
+                updateQuantizationInfo(quantization_intervals);
+        }
+        else{
+                quantization_intervals = exe_params->intvCapacity;
+        }
+        float mean = 0;
+        if(use_mean){
+                // compute mean
+                double sum = 0.0;
+                size_t mean_count = 0;
+                for(size_t i=0; i<num_elements; i++){
+                        if(fabs(oriData[i] - dense_pos) < realPrecision){
+                                sum += oriData[i];
+                                mean_count ++;
+                        }
+                }
+                if(mean_count > 0) mean = sum / mean_count;
+        }
+        double tmp_realPrecision = realPrecision;
+        // use two prediction buffers for higher performance
+        float * unpredictable_data = result_unpredictable_data;
+        unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char));
+        memset(indicator, 0, num_blocks * sizeof(unsigned char));
+        size_t reg_count = 0;
+        size_t strip_dim_0 = early_blockcount_x + 1;
+        size_t strip_dim_1 = r2 + 1;
+        size_t strip_dim_2 = r3 + 1;
+        size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
+        size_t strip_dim1_offset = strip_dim_2;
+        unsigned char * indicator_pos = indicator;
+        size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
+        float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_1, 0, prediction_buffer_size);
+        float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
+        memset(prediction_buffer_2, 0, prediction_buffer_size);
+        float * cur_pb_buf = prediction_buffer_1;
+        float * next_pb_buf = prediction_buffer_2;
+        float * cur_pb_buf_pos;
+        float * next_pb_buf_pos;
+        int intvCapacity = exe_params->intvCapacity;
+        int intvRadius = exe_params->intvRadius;
+        int use_reg = 0;
+        float noise = realPrecision * 1.22;
+        reg_params_pos = reg_params;
+        // compress the regression coefficients on the fly
+        float last_coeffcients[4] = {0.0};
+        int coeff_intvCapacity_sz = 65536;
+        int coeff_intvRadius = coeff_intvCapacity_sz / 2;
+        int * coeff_type[4];
+        int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int));
+        float * coeff_unpred_data[4];
+        float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float));
+        double precision[4];
+        precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
+        for(int i=0; i<4; i++){
+                coeff_type[i] = coeff_result_type + i * num_blocks;
+                coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
+        }
+        int coeff_index = 0;
+        unsigned int coeff_unpredictable_count[4] = {0};
+        if(use_mean){
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
+                                type_offset = offset_x * dim0_offset +  offset_y * current_blockcount_x * dim1_offset;
+                                type = result_type + type_offset;
+                                // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
+                                cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
+                                next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
+                                size_t current_blockcount_z;
+                                float * pb_pos = cur_pb_buf_pos;
+                                float * next_pb_pos = next_pb_buf_pos;
+                                size_t strip_unpredictable_count = 0;
+                                for(size_t k=0; k<num_z; k++){
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+#ifdef HAVE_TIMECMPR
+                                        size_t offset_z = 0;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+#endif
+                                        /*sampling and decide which predictor*/
+                                        {
+                                                // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
+                                                float * cur_data_pos;
+                                                float curData;
+                                                float pred_reg, pred_sz;
+                                                float err_sz = 0.0, err_reg = 0.0;
+                                                int bmi = 0;
+                                                if(i>0 && j>0 && k>0){
+                                                        for(int i=0; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                else{
+                                                        for(int i=1; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                use_reg = (err_reg < err_sz);
+                                        }
+                                        if(use_reg){
+                                                {
+                                                        /*predict coefficients in current block via previous reg_block*/
+                                                        float cur_coeff;
+                                                        double diff, itvNum;
+                                                        for(int e=0; e<4; e++){
+                                                                cur_coeff = reg_params_pos[e*num_blocks];
+                                                                diff = cur_coeff - last_coeffcients[e];
+                                                                itvNum = fabs(diff)/precision[e] + 1;
+                                                                if (itvNum < coeff_intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                        last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                                coeff_type[e][coeff_index] = 0;
+                                                                                last_coeffcients[e] = cur_coeff;
+                                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                float curData;
+                                                float pred;
+                                                double itvNum;
+                                                double diff;
+                                                size_t index = 0;
+                                                size_t block_unpredictable_count = 0;
+                                                float * cur_data_pos = data_pos;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = pred;
+#endif
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        size_t ii = current_blockcount_x - 1;
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = pred;
+#endif
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        // assign value to next prediction buffer
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = pred;
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                unpredictable_count = block_unpredictable_count;
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                reg_count ++;
+                                        }
+                                        else{
+                                                // use SZ
+                                                // SZ predication
+                                                unpredictable_count = 0;
+                                                float * cur_pb_pos = pb_pos;
+                                                float * cur_data_pos = data_pos;
+                                                float curData;
+                                                float pred3D;
+                                                double itvNum, diff;
+                                                size_t index = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                                type[index] = intvRadius;
+                                                                                *cur_pb_pos = mean;
+                                                                        }
+                                                                        else
+                                                                        {
+                                                                                pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                                 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                                diff = curData - pred3D;
+                                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                                if (itvNum < intvCapacity_sz){
+                                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                        *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                                type[index] = 0;
+                                                                                                *cur_pb_pos = curData;
+                                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                                        }
+                                                                                }
+                                                                                else{
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = *cur_pb_pos;
+#endif
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                                // adjust type[index] to intvRadius for coherence with freq in reg
+                                                                                type[index] = intvRadius;
+                                                                                *cur_pb_pos = mean;
+                                                                        }
+                                                                        else
+                                                                        {
+                                                                                pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                                 - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                                diff = curData - pred3D;
+                                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                                if (itvNum < intvCapacity_sz){
+                                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                        *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                        if(type[index] <= intvRadius) type[index] -= 1;
+                                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                                        if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                                type[index] = 0;
+                                                                                                *cur_pb_pos = curData;
+                                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                                        }
+                                                                                }
+                                                                                else{
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t ii = current_blockcount_x - 1;
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = *cur_pb_pos;
+#endif
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                // change indicator
+                                                indicator_pos[k] = 1;
+                                        }// end SZ
+                                        reg_params_pos ++;
+                                        data_pos += current_blockcount_z;
+                                        pb_pos += current_blockcount_z;
+                                        next_pb_pos += current_blockcount_z;
+                                        type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                } // end k
+                                if(strip_unpredictable_count > max_unpred_count){
+                                        max_unpred_count = strip_unpredictable_count;
+                                }
+                                total_unpred += strip_unpredictable_count;
+                                indicator_pos += num_z;
+                        }// end j
+                        float * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }// end i
+        }
+        else{
+                int intvCapacity_sz = intvCapacity - 2;
+                for(size_t i=0; i<num_x; i++){
+                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                        for(size_t j=0; j<num_y; j++){
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
+                                // copy bottom plane from plane buffer
+                                // memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(float));
+                                type_offset = offset_x * dim0_offset +  offset_y * current_blockcount_x * dim1_offset;
+                                type = result_type + type_offset;
+                                // prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
+                                cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
+                                next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
+                                size_t current_blockcount_z;
+                                float * pb_pos = cur_pb_buf_pos;
+                                float * next_pb_pos = next_pb_buf_pos;
+                                size_t strip_unpredictable_count = 0;
+                                for(size_t k=0; k<num_z; k++){
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+#ifdef HAVE_TIMECMPR
+                                size_t offset_z = 0;
+                                offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+#endif
+                                        /*sampling*/
+                                        {
+                                                // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
+                                                float * cur_data_pos;
+                                                float curData;
+                                                float pred_reg, pred_sz;
+                                                float err_sz = 0.0, err_reg = 0.0;
+                                                int bmi;
+                                                if(i>0 && j>0 && k>0){
+                                                        for(int i=0; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                else{
+                                                        for(int i=1; i<block_size; i++){
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                bmi = block_size - i;
+                                                                cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                                cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
+                                                                curData = *cur_data_pos;
+                                                                pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
+                                                                pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                                err_sz += fabs(pred_sz - curData) + noise;
+                                                                err_reg += fabs(pred_reg - curData);
+                                                        }
+                                                }
+                                                use_reg = (err_reg < err_sz);
+                                        }
+                                        if(use_reg)
+                                        {
+                                                {
+                                                        /*predict coefficients in current block via previous reg_block*/
+                                                        float cur_coeff;
+                                                        double diff, itvNum;
+                                                        for(int e=0; e<4; e++){
+                                                                cur_coeff = reg_params_pos[e*num_blocks];
+                                                                diff = cur_coeff - last_coeffcients[e];
+                                                                itvNum = fabs(diff)/precision[e] + 1;
+                                                                if (itvNum < coeff_intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                        last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                                coeff_type[e][coeff_index] = 0;
+                                                                                last_coeffcients[e] = cur_coeff;
+                                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                float curData;
+                                                float pred;
+                                                double itvNum;
+                                                double diff;
+                                                size_t index = 0;
+                                                size_t block_unpredictable_count = 0;
+                                                float * cur_data_pos = data_pos;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = pred;
+#endif
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        size_t ii = current_blockcount_x - 1;
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = pred;
+#endif
+                                                                        if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
+                                                                                // assign value to block surfaces
+                                                                                pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
+                                                                        }
+                                                                        // assign value to next prediction buffer
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = pred;
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                unpredictable_count = block_unpredictable_count;
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                reg_count ++;
+                                        }
+                                        else{
+                                                // use SZ
+                                                // SZ predication
+                                                unpredictable_count = 0;
+                                                float * cur_pb_pos = pb_pos;
+                                                float * cur_data_pos = data_pos;
+                                                float curData;
+                                                float pred3D;
+                                                double itvNum, diff;
+                                                size_t index = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                         - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                        diff = curData - pred3D;
+                                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                                        if (itvNum < intvCapacity_sz){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = *cur_pb_pos;
+#endif
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
+                                                        cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                /*dealing with the last ii (boundary)*/
+                                                {
+                                                        // ii == current_blockcount_x - 1
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
+                                                                                         - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                        diff = curData - pred3D;
+                                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                                        if (itvNum < intvCapacity_sz){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        *cur_pb_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                *cur_pb_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+#ifdef HAVE_TIMECMPR
+                                                                        size_t ii = current_blockcount_x - 1;
+                                                                        size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
+                                                                        if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
+                                                                                decData[block_offset + point_offset] = *cur_pb_pos;
+#endif
+                                                                        // assign value to next prediction buffer
+                                                                        next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
+                                                                        index ++;
+                                                                        cur_pb_pos ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_pb_pos += strip_dim1_offset - current_blockcount_z;
+                                                                cur_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                }
+                                                strip_unpredictable_count += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                // change indicator
+                                                indicator_pos[k] = 1;
+                                        }// end SZ
+                                        reg_params_pos ++;
+                                        data_pos += current_blockcount_z;
+                                        pb_pos += current_blockcount_z;
+                                        next_pb_pos += current_blockcount_z;
+                                        type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                }
+                                if(strip_unpredictable_count > max_unpred_count){
+                                        max_unpred_count = strip_unpredictable_count;
+                                }
+                                total_unpred += strip_unpredictable_count;
+                                indicator_pos += num_z;
+                        }
+                        float * tmp;
+                        tmp = cur_pb_buf;
+                        cur_pb_buf = next_pb_buf;
+                        next_pb_buf = tmp;
+                }
+        }
+        free(prediction_buffer_1);
+        free(prediction_buffer_2);
+        int stateNum = 2*quantization_intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        size_t nodeCount = 0;
+        init(huffmanTree, result_type, num_elements);
+        size_t i = 0;
+        for (i = 0; i < huffmanTree->stateNum; i++)
+                if (huffmanTree->code[i]) nodeCount++;
+        nodeCount = nodeCount*2-1;
+        unsigned char *treeBytes;
+        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+        unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
+        // total size                                                                           metadata                  # elements     real precision         intervals       nodeCount               huffman                 block index                                             unpredicatable count                                            mean                                            unpred size                             elements
+        unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
+        unsigned char * result_pos = result;
+        initRandomAccessBytes(result_pos);
+        result_pos += meta_data_offset;
+        sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
+        result_pos += exe_params->SZ_SIZE_TYPE;
+        intToBytes_bigEndian(result_pos, block_size);
+        result_pos += sizeof(int);
+        doubleToBytes(result_pos, realPrecision);
+        result_pos += sizeof(double);
+        intToBytes_bigEndian(result_pos, quantization_intervals);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, treeByteSize);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, nodeCount);
+        result_pos += sizeof(int);
+        memcpy(result_pos, treeBytes, treeByteSize);
+        result_pos += treeByteSize;
+        free(treeBytes);
+        memcpy(result_pos, &use_mean, sizeof(unsigned char));
+        result_pos += sizeof(unsigned char);
+        memcpy(result_pos, &mean, sizeof(float));
+        result_pos += sizeof(float);
+        size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
+        result_pos += indicator_size;
+        //convert the lead/mid/resi to byte stream
+        if(reg_count > 0){
+                for(int e=0; e<4; e++){
+                        int stateNum = 2*coeff_intvCapacity_sz;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        size_t nodeCount = 0;
+                        init(huffmanTree, coeff_type[e], reg_count);
+                        size_t i = 0;
+                        for (i = 0; i < huffmanTree->stateNum; i++)
+                                if (huffmanTree->code[i]) nodeCount++;
+                        nodeCount = nodeCount*2-1;
+                        unsigned char *treeBytes;
+                        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+                        doubleToBytes(result_pos, precision[e]);
+                        result_pos += sizeof(double);
+                        intToBytes_bigEndian(result_pos, coeff_intvRadius);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, treeByteSize);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, nodeCount);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, treeBytes, treeByteSize);
+                        result_pos += treeByteSize;
+                        free(treeBytes);
+                        size_t typeArray_size = 0;
+                        encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
+                        sizeToBytes(result_pos, typeArray_size);
+                        result_pos += sizeof(size_t) + typeArray_size;
+                        intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
+                        result_pos += coeff_unpredictable_count[e]*sizeof(float);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        free(coeff_result_type);
+        free(coeff_unpredictable_data);
+        //record the number of unpredictable data and also store them
+        memcpy(result_pos, &total_unpred, sizeof(size_t));
+        result_pos += sizeof(size_t);
+        memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
+        result_pos += total_unpred * sizeof(float);
+        size_t typeArray_size = 0;
+        encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
+        result_pos += typeArray_size;
+        size_t totalEncodeSize = result_pos - result;
+        free(indicator);
+        free(result_unpredictable_data);
+        free(result_type);
+        free(reg_params);
+        SZ_ReleaseHuffman(huffmanTree);
+        *comp_size = totalEncodeSize;
+        return result;
+}
+unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){
+        unsigned int quantization_intervals;
+        float sz_sample_correct_freq = -1;//0.5; //-1
+        float dense_pos;
+        float mean_flush_freq;
+        unsigned char use_mean = 0;
+        // calculate block dims
+        size_t num_x, num_y, num_z;
+        size_t block_size = 6;
+        num_x = (r1 - 1) / block_size + 1;
+        num_y = (r2 - 1) / block_size + 1;
+        num_z = (r3 - 1) / block_size + 1;
+        size_t max_num_block_elements = block_size * block_size * block_size;
+        size_t num_blocks = num_x * num_y * num_z;
+        size_t num_elements = r1 * r2 * r3;
+        size_t dim0_offset = r2 * r3;
+        size_t dim1_offset = r3;
+        int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int));
+        size_t unpred_data_max_size = max_num_block_elements;
+        float * result_unpredictable_data = (float *) malloc(unpred_data_max_size * sizeof(float) * num_blocks);
+        size_t total_unpred = 0;
+        size_t unpredictable_count;
+        float * data_pos = oriData;
+        int * type = result_type;
+        float * reg_params = (float *) malloc(num_blocks * 4 * sizeof(float));
+        float * reg_params_pos = reg_params;
+        // move regression part out
+        size_t params_offset_b = num_blocks;
+        size_t params_offset_c = 2*num_blocks;
+        size_t params_offset_d = 3*num_blocks;
+        float * pred_buffer = (float *) malloc((block_size+1)*(block_size+1)*(block_size+1)*sizeof(float));
+        float * pred_buffer_pos = NULL;
+        float * block_data_pos_x = NULL;
+        float * block_data_pos_y = NULL;
+        float * block_data_pos_z = NULL;
+        for(size_t i=0; i<num_x; i++){
+                for(size_t j=0; j<num_y; j++){
+                        for(size_t k=0; k<num_z; k++){
+                                data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size;
+                                pred_buffer_pos = pred_buffer;
+                                block_data_pos_x = data_pos;
+                                // use the buffer as block_size*block_size*block_size
+                                for(int ii=0; ii<block_size; ii++){
+                                        block_data_pos_y = block_data_pos_x;
+                                        for(int jj=0; jj<block_size; jj++){
+                                                block_data_pos_z = block_data_pos_y;
+                                                for(int kk=0; kk<block_size; kk++){
+                                                        *pred_buffer_pos = *block_data_pos_z;
+                                                        if(k*block_size + kk + 1 < r3) block_data_pos_z ++;
+                                                        pred_buffer_pos ++;
+                                                }
+                                                if(j*block_size + jj + 1 < r2) block_data_pos_y += dim1_offset;
+                                        }
+                                        if(i*block_size + ii + 1 < r1) block_data_pos_x += dim0_offset;
+                                }
+                                /*Calculate regression coefficients*/
+                                {
+                                        float * cur_data_pos = pred_buffer;
+                                        float fx = 0.0;
+                                        float fy = 0.0;
+                                        float fz = 0.0;
+                                        float f = 0;
+                                        float sum_x, sum_y;
+                                        float curData;
+                                        for(size_t i=0; i<block_size; i++){
+                                                sum_x = 0;
+                                                for(size_t j=0; j<block_size; j++){
+                                                        sum_y = 0;
+                                                        for(size_t k=0; k<block_size; k++){
+                                                                curData = *cur_data_pos;
+                                                                sum_y += curData;
+                                                                fz += curData * k;
+                                                                cur_data_pos ++;
+                                                        }
+                                                        fy += sum_y * j;
+                                                        sum_x += sum_y;
+                                                }
+                                                fx += sum_x * i;
+                                                f += sum_x;
+                                        }
+                                        float coeff = 1.0 / (block_size * block_size * block_size);
+                                        reg_params_pos[0] = (2 * fx / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
+                                        reg_params_pos[params_offset_b] = (2 * fy / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
+                                        reg_params_pos[params_offset_c] = (2 * fz / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
+                                        reg_params_pos[params_offset_d] = f * coeff - ((block_size - 1) * reg_params_pos[0] / 2 + (block_size - 1) * reg_params_pos[params_offset_b] / 2 + (block_size - 1) * reg_params_pos[params_offset_c] / 2);
+                                }
+                                reg_params_pos ++;
+                        }
+                }
+        }
+        //Compress coefficient arrays
+        double precision_a, precision_b, precision_c, precision_d;
+        float rel_param_err = 0.025;
+        precision_a = rel_param_err * realPrecision / block_size;
+        precision_b = rel_param_err * realPrecision / block_size;
+        precision_c = rel_param_err * realPrecision / block_size;
+        precision_d = rel_param_err * realPrecision;
+        if(exe_params->optQuantMode==1)
+        {
+                quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
+                if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
+                updateQuantizationInfo(quantization_intervals);
+        }
+        else{
+                quantization_intervals = exe_params->intvCapacity;
+        }
+        float mean = 0;
+        if(use_mean){
+                // compute mean
+                double sum = 0.0;
+                size_t mean_count = 0;
+                for(size_t i=0; i<num_elements; i++){
+                        if(fabs(oriData[i] - dense_pos) < realPrecision){
+                                sum += oriData[i];
+                                mean_count ++;
+                        }
+                }
+                if(mean_count > 0) mean = sum / mean_count;
+        }
+        double tmp_realPrecision = realPrecision;
+        // use two prediction buffers for higher performance
+        float * unpredictable_data = result_unpredictable_data;
+        unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char));
+        memset(indicator, 0, num_blocks * sizeof(unsigned char));
+        size_t reg_count = 0;
+        unsigned char * indicator_pos = indicator;
+        int intvCapacity = exe_params->intvCapacity;
+        int intvRadius = exe_params->intvRadius;
+        int use_reg = 0;
+        float noise = realPrecision * 1.22;
+        reg_params_pos = reg_params;
+        // compress the regression coefficients on the fly
+        float last_coeffcients[4] = {0.0};
+        int coeff_intvCapacity_sz = 65536;
+        int coeff_intvRadius = coeff_intvCapacity_sz / 2;
+        int * coeff_type[4];
+        int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int));
+        float * coeff_unpred_data[4];
+        float * coeff_unpredictable_data = (float *) malloc(num_blocks*4*sizeof(float));
+        double precision[4];
+        precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
+        for(int i=0; i<4; i++){
+                coeff_type[i] = coeff_result_type + i * num_blocks;
+                coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
+        }
+        int coeff_index = 0;
+        unsigned int coeff_unpredictable_count[4] = {0};
+        memset(pred_buffer, 0, (block_size+1)*(block_size+1)*(block_size+1)*sizeof(float));
+        int pred_buffer_block_size = block_size + 1;
+        int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size;
+        int strip_dim1_offset = pred_buffer_block_size;
+        if(use_mean){
+                int intvCapacity_sz = intvCapacity - 2;
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                for(size_t k=0; k<num_z; k++){
+                                        data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size;
+                                        // add 1 in x, y, z offset
+                                        pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
+                                        block_data_pos_x = data_pos;
+                                        for(int ii=0; ii<block_size; ii++){
+                                                block_data_pos_y = block_data_pos_x;
+                                                for(int jj=0; jj<block_size; jj++){
+                                                        block_data_pos_z = block_data_pos_y;
+                                                        for(int kk=0; kk<block_size; kk++){
+                                                                *pred_buffer_pos = *block_data_pos_z;
+                                                                if(k*block_size + kk + 1< r3) block_data_pos_z ++;
+                                                                pred_buffer_pos ++;
+                                                        }
+                                                        // add 1 in z offset
+                                                        pred_buffer_pos ++;
+                                                        if(j*block_size + jj + 1< r2) block_data_pos_y += dim1_offset;
+                                                }
+                                                // add 1 in y offset
+                                                pred_buffer_pos += pred_buffer_block_size;
+                                                if(i*block_size + ii + 1< r1) block_data_pos_x += dim0_offset;
+                                        }
+                                        /*sampling and decide which predictor*/
+                                        {
+                                                // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
+                                                float * cur_data_pos;
+                                                float curData;
+                                                float pred_reg, pred_sz;
+                                                float err_sz = 0.0, err_reg = 0.0;
+                                                int bmi = 0;
+                                                for(int i=2; i<=block_size; i++){
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                        err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                        err_reg += fabs(pred_reg - curData);
+                                                        bmi = block_size - i;
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                        err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                        err_reg += fabs(pred_reg - curData);
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                        err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                        err_reg += fabs(pred_reg - curData);
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                        err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
+                                                        err_reg += fabs(pred_reg - curData);
+                                                }
+                                                use_reg = (err_reg < err_sz);
+                                        }
+                                        if(use_reg){
+                                                {
+                                                        /*predict coefficients in current block via previous reg_block*/
+                                                        float cur_coeff;
+                                                        double diff, itvNum;
+                                                        for(int e=0; e<4; e++){
+                                                                cur_coeff = reg_params_pos[e*num_blocks];
+                                                                diff = cur_coeff - last_coeffcients[e];
+                                                                itvNum = fabs(diff)/precision[e] + 1;
+                                                                if (itvNum < coeff_intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                        last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                                coeff_type[e][coeff_index] = 0;
+                                                                                last_coeffcients[e] = cur_coeff;
+                                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                float curData;
+                                                float pred;
+                                                double itvNum;
+                                                double diff;
+                                                size_t index = 0;
+                                                size_t block_unpredictable_count = 0;
+                                                float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
+                                                for(size_t ii=0; ii<block_size; ii++){
+                                                        for(size_t jj=0; jj<block_size; jj++){
+                                                                for(size_t kk=0; kk<block_size; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos ++;
+                                                        }
+                                                        cur_data_pos += pred_buffer_block_size;
+                                                }
+                                                total_unpred += block_unpredictable_count;
+                                                unpredictable_data += block_unpredictable_count;
+                                                reg_count ++;
+                                        }
+                                        else{
+                                                // use SZ
+                                                // SZ predication
+                                                unpredictable_count = 0;
+                                                float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
+                                                float curData;
+                                                float pred3D;
+                                                double itvNum, diff;
+                                                size_t index = 0;
+                                                for(size_t ii=0; ii<block_size; ii++){
+                                                        for(size_t jj=0; jj<block_size; jj++){
+                                                                for(size_t kk=0; kk<block_size; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        if(fabs(curData - mean) <= realPrecision){
+                                                                                type[index] = 1;
+                                                                                *cur_data_pos = mean;
+                                                                        }
+                                                                        else
+                                                                        {
+                                                                                pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
+                                                                                                 - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                                diff = curData - pred3D;
+                                                                                itvNum = fabs(diff)/realPrecision + 1;
+                                                                                if (itvNum < intvCapacity_sz){
+                                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                                        type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                        *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                                        if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
+                                                                                                type[index] = 0;
+                                                                                                *cur_data_pos = curData;
+                                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                                        }
+                                                                                }
+                                                                                else{
+                                                                                        type[index] = 0;
+                                                                                        *cur_data_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos ++;
+                                                        }
+                                                        cur_data_pos += pred_buffer_block_size;
+                                                }
+                                                total_unpred += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                // change indicator
+                                                indicator_pos[k] = 1;
+                                        }// end SZ
+                                        reg_params_pos ++;
+                                        type += block_size * block_size * block_size;
+                                } // end k
+                                indicator_pos += num_z;
+                        }// end j
+                }// end i
+        }
+        else{
+                int intvCapacity_sz = intvCapacity - 2;
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                for(size_t k=0; k<num_z; k++){
+                                        data_pos = oriData + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size;
+                                        // add 1 in x, y, z offset
+                                        pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
+                                        block_data_pos_x = data_pos;
+                                        for(int ii=0; ii<block_size; ii++){
+                                                block_data_pos_y = block_data_pos_x;
+                                                for(int jj=0; jj<block_size; jj++){
+                                                        block_data_pos_z = block_data_pos_y;
+                                                        for(int kk=0; kk<block_size; kk++){
+                                                                *pred_buffer_pos = *block_data_pos_z;
+                                                                if(k*block_size + kk < r3) block_data_pos_z ++;
+                                                                pred_buffer_pos ++;
+                                                        }
+                                                        // add 1 in z offset
+                                                        pred_buffer_pos ++;
+                                                        if(j*block_size + jj < r2) block_data_pos_y += dim1_offset;
+                                                }
+                                                // add 1 in y offset
+                                                pred_buffer_pos += pred_buffer_block_size;
+                                                if(i*block_size + ii < r1) block_data_pos_x += dim0_offset;
+                                        }
+                                        /*sampling*/
+                                        {
+                                                // sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
+                                                float * cur_data_pos;
+                                                float curData;
+                                                float pred_reg, pred_sz;
+                                                float err_sz = 0.0, err_reg = 0.0;
+                                                int bmi;
+                                                for(int i=2; i<=block_size; i++){
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                        err_sz += fabs(pred_sz - curData) + noise;
+                                                        err_reg += fabs(pred_reg - curData);
+                                                        bmi = block_size - i;
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                        err_sz += fabs(pred_sz - curData) + noise;
+                                                        err_reg += fabs(pred_reg - curData);
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
+                                                        err_sz += fabs(pred_sz - curData) + noise;
+                                                        err_reg += fabs(pred_reg - curData);
+                                                        cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
+                                                        curData = *cur_data_pos;
+                                                        pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                        pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
+                                                        err_sz += fabs(pred_sz - curData) + noise;
+                                                        err_reg += fabs(pred_reg - curData);
+                                                }
+                                                use_reg = (err_reg < err_sz);
+                                        }
+                                        if(use_reg)
+                                        {
+                                                {
+                                                        /*predict coefficients in current block via previous reg_block*/
+                                                        float cur_coeff;
+                                                        double diff, itvNum;
+                                                        for(int e=0; e<4; e++){
+                                                                cur_coeff = reg_params_pos[e*num_blocks];
+                                                                diff = cur_coeff - last_coeffcients[e];
+                                                                itvNum = fabs(diff)/precision[e] + 1;
+                                                                if (itvNum < coeff_intvCapacity_sz){
+                                                                        if (diff < 0) itvNum = -itvNum;
+                                                                        coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
+                                                                        last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
+                                                                        //ganrantee comporession error against the case of machine-epsilon
+                                                                        if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
+                                                                                coeff_type[e][coeff_index] = 0;
+                                                                                last_coeffcients[e] = cur_coeff;
+                                                                                coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                        }
+                                                                }
+                                                                else{
+                                                                        coeff_type[e][coeff_index] = 0;
+                                                                        last_coeffcients[e] = cur_coeff;
+                                                                        coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                float curData;
+                                                float pred;
+                                                double itvNum;
+                                                double diff;
+                                                size_t index = 0;
+                                                size_t block_unpredictable_count = 0;
+                                                float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
+                                                for(size_t ii=0; ii<block_size; ii++){
+                                                        for(size_t jj=0; jj<block_size; jj++){
+                                                                for(size_t kk=0; kk<block_size; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
+                                                                        diff = curData - pred;
+                                                                        itvNum = fabs(diff)/tmp_realPrecision + 1;
+                                                                        if (itvNum < intvCapacity){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - pred)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        pred = curData;
+                                                                                        unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                pred = curData;
+                                                                                unpredictable_data[block_unpredictable_count ++] = curData;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos ++;
+                                                        }
+                                                        cur_data_pos += pred_buffer_block_size;
+                                                }
+                                                total_unpred += block_unpredictable_count;
+                                                unpredictable_data += block_unpredictable_count;
+                                                reg_count ++;
+                                        }
+                                        else{
+                                                // use SZ
+                                                // SZ predication
+                                                unpredictable_count = 0;
+                                                float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
+                                                float curData;
+                                                float pred3D;
+                                                double itvNum, diff;
+                                                size_t index = 0;
+                                                for(size_t ii=0; ii<block_size; ii++){
+                                                        for(size_t jj=0; jj<block_size; jj++){
+                                                                for(size_t kk=0; kk<block_size; kk++){
+                                                                        curData = *cur_data_pos;
+                                                                        pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
+                                                                                         - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
+                                                                        diff = curData - pred3D;
+                                                                        itvNum = fabs(diff)/realPrecision + 1;
+                                                                        if (itvNum < intvCapacity_sz){
+                                                                                if (diff < 0) itvNum = -itvNum;
+                                                                                type[index] = (int) (itvNum/2) + intvRadius;
+                                                                                *cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
+                                                                                //ganrantee comporession error against the case of machine-epsilon
+                                                                                if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
+                                                                                        type[index] = 0;
+                                                                                        *cur_data_pos = curData;
+                                                                                        unpredictable_data[unpredictable_count ++] = curData;
+                                                                                }
+                                                                        }
+                                                                        else{
+                                                                                type[index] = 0;
+                                                                                *cur_data_pos = curData;
+                                                                                unpredictable_data[unpredictable_count ++] = curData;
+                                                                        }
+                                                                        index ++;
+                                                                        cur_data_pos ++;
+                                                                }
+                                                                cur_data_pos ++;
+                                                        }
+                                                        cur_data_pos += pred_buffer_block_size;
+                                                }
+                                                total_unpred += unpredictable_count;
+                                                unpredictable_data += unpredictable_count;
+                                                // change indicator
+                                                indicator_pos[k] = 1;
+                                        }// end SZ
+                                        reg_params_pos ++;
+                                        type += block_size * block_size * block_size;
+                                }
+                                indicator_pos += num_z;
+                        }
+                }
+        }
+        free(pred_buffer);
+        int stateNum = 2*quantization_intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        size_t nodeCount = 0;
+        init(huffmanTree, result_type, num_blocks*max_num_block_elements);
+        size_t i = 0;
+        for (i = 0; i < huffmanTree->stateNum; i++)
+                if (huffmanTree->code[i]) nodeCount++;
+        nodeCount = nodeCount*2-1;
+        unsigned char *treeBytes;
+        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+        unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
+        // total size                                                                           metadata                  # elements     real precision         intervals       nodeCount               huffman                 block index                                             unpredicatable count                                            mean                                            unpred size                             elements
+        unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
+        unsigned char * result_pos = result;
+        initRandomAccessBytes(result_pos);
+        result_pos += meta_data_offset;
+        sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
+        result_pos += exe_params->SZ_SIZE_TYPE;
+        intToBytes_bigEndian(result_pos, block_size);
+        result_pos += sizeof(int);
+        doubleToBytes(result_pos, realPrecision);
+        result_pos += sizeof(double);
+        intToBytes_bigEndian(result_pos, quantization_intervals);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, treeByteSize);
+        result_pos += sizeof(int);
+        intToBytes_bigEndian(result_pos, nodeCount);
+        result_pos += sizeof(int);
+        memcpy(result_pos, treeBytes, treeByteSize);
+        result_pos += treeByteSize;
+        free(treeBytes);
+        memcpy(result_pos, &use_mean, sizeof(unsigned char));
+        result_pos += sizeof(unsigned char);
+        memcpy(result_pos, &mean, sizeof(float));
+        result_pos += sizeof(float);
+        size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
+        result_pos += indicator_size;
+        //convert the lead/mid/resi to byte stream
+        if(reg_count > 0){
+                for(int e=0; e<4; e++){
+                        int stateNum = 2*coeff_intvCapacity_sz;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        size_t nodeCount = 0;
+                        init(huffmanTree, coeff_type[e], reg_count);
+                        size_t i = 0;
+                        for (i = 0; i < huffmanTree->stateNum; i++)
+                                if (huffmanTree->code[i]) nodeCount++;
+                        nodeCount = nodeCount*2-1;
+                        unsigned char *treeBytes;
+                        unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
+                        doubleToBytes(result_pos, precision[e]);
+                        result_pos += sizeof(double);
+                        intToBytes_bigEndian(result_pos, coeff_intvRadius);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, treeByteSize);
+                        result_pos += sizeof(int);
+                        intToBytes_bigEndian(result_pos, nodeCount);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, treeBytes, treeByteSize);
+                        result_pos += treeByteSize;
+                        free(treeBytes);
+                        size_t typeArray_size = 0;
+                        encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
+                        sizeToBytes(result_pos, typeArray_size);
+                        result_pos += sizeof(size_t) + typeArray_size;
+                        intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
+                        result_pos += sizeof(int);
+                        memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
+                        result_pos += coeff_unpredictable_count[e]*sizeof(float);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        free(coeff_result_type);
+        free(coeff_unpredictable_data);
+        //record the number of unpredictable data and also store them
+        memcpy(result_pos, &total_unpred, sizeof(size_t));
+        result_pos += sizeof(size_t);
+        memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
+        result_pos += total_unpred * sizeof(float);
+        size_t typeArray_size = 0;
+        encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size);
+        result_pos += typeArray_size;
+        size_t totalEncodeSize = result_pos - result;
+        free(indicator);
+        free(result_unpredictable_data);
+        free(result_type);
+        free(reg_params);
+        SZ_ReleaseHuffman(huffmanTree);
+        *comp_size = totalEncodeSize;
+        return result;
+}

TabularUnified thirdparty/SZ/sz/src/sz_float_pwr.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "zlib.h"
 #include "rw.h"
+#include "utility.h"
 void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, float* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision)
 …
         free_TightDataPointStorageF(tdps);
+}
+#include <stdbool.h>
+void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t dataLength, size_t *outSize, float min, float max){
+        float * log_data = (float *) malloc(dataLength * sizeof(float));
+        unsigned char * signs = (unsigned char *) malloc(dataLength);
+        memset(signs, 0, dataLength);
+        // preprocess
+        float max_abs_log_data;
+    if(min == 0) max_abs_log_data = fabs(log2(fabs(max)));
+    else if(max == 0) max_abs_log_data = fabs(log2(fabs(min)));
+    else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max)));
+    float min_log_data = max_abs_log_data;
+        bool positive = true;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] < 0){
+                        signs[i] = 1;
+                        log_data[i] = -oriData[i];
+                        positive = false;
+                }
+                else
+                        log_data[i] = oriData[i];
+                if(log_data[i] > 0){
+                        log_data[i] = log2(log_data[i]);
+                        if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i];
+                        if(log_data[i] < min_log_data) min_log_data = log_data[i];
+                }
+        }
+        float valueRangeSize, medianValue_f;
+        computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);
+        if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
+        double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] == 0){
+                        log_data[i] = min_log_data - 2.0001*realPrecision;
+                }
+        }
+    TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ(log_data, dataLength, realPrecision, valueRangeSize, medianValue_f);
+    tdps->minLogValue = min_log_data - 1.0001*realPrecision;
+    free(log_data);
+    if(!positive){
+            unsigned char * comp_signs;
+                // compress signs
+                unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs);
+                tdps->pwrErrBoundBytes = comp_signs;
+                tdps->pwrErrBoundBytes_size = signSize;
+        }
+        else{
+                tdps->pwrErrBoundBytes = NULL;
+                tdps->pwrErrBoundBytes_size = 0;
+        }
+        free(signs);
+    convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
+    if(*outSize>dataLength*sizeof(float))
+            SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
+    free_TightDataPointStorageF(tdps);
+}
+void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t *outSize, float min, float max){
+        size_t dataLength = r1 * r2;
+        float * log_data = (float *) malloc(dataLength * sizeof(float));
+        unsigned char * signs = (unsigned char *) malloc(dataLength);
+        memset(signs, 0, dataLength);
+        // preprocess
+        float max_abs_log_data;
+    if(min == 0) max_abs_log_data = fabs(log2(fabs(max)));
+    else if(max == 0) max_abs_log_data = fabs(log2(fabs(min)));
+    else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max)));
+    float min_log_data = max_abs_log_data;
+        bool positive = true;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] < 0){
+                        signs[i] = 1;
+                        log_data[i] = -oriData[i];
+                        positive = false;
+                }
+                else
+                        log_data[i] = oriData[i];
+                if(log_data[i] > 0){
+                        log_data[i] = log2(log_data[i]);
+                        if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i];
+                        if(log_data[i] < min_log_data) min_log_data = log_data[i];
+                }
+        }
+        float valueRangeSize, medianValue_f;
+        computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);
+        if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
+        double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] == 0){
+                        log_data[i] = min_log_data - 2.0001*realPrecision;
+                }
+        }
+    TightDataPointStorageF* tdps = SZ_compress_float_2D_MDQ(log_data, r1, r2, realPrecision, valueRangeSize, medianValue_f);
+    tdps->minLogValue = min_log_data - 1.0001*realPrecision;
+    free(log_data);
+    if(!positive){
+            unsigned char * comp_signs;
+                // compress signs
+                unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs);
+                tdps->pwrErrBoundBytes = comp_signs;
+                tdps->pwrErrBoundBytes_size = signSize;
+        }
+        else{
+                tdps->pwrErrBoundBytes = NULL;
+                tdps->pwrErrBoundBytes_size = 0;
+        }
+        free(signs);
+    convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
+    if(*outSize>dataLength*sizeof(float))
+            SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
+    free_TightDataPointStorageF(tdps);
+}
+void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByteData, float *oriData, double pwrErrRatio, size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max){
+        size_t dataLength = r1 * r2 * r3;
+        float * log_data = (float *) malloc(dataLength * sizeof(float));
+        unsigned char * signs = (unsigned char *) malloc(dataLength);
+        memset(signs, 0, dataLength);
+        // preprocess
+        float max_abs_log_data;
+    if(min == 0) max_abs_log_data = fabs(log2(fabs(max)));
+    else if(max == 0) max_abs_log_data = fabs(log2(fabs(min)));
+    else max_abs_log_data = fabs(log2(fabs(min))) > fabs(log2(fabs(max))) ? fabs(log2(fabs(min))) : fabs(log2(fabs(max)));
+    float min_log_data = max_abs_log_data;
+        bool positive = true;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] < 0){
+                        signs[i] = 1;
+                        log_data[i] = -oriData[i];
+                        positive = false;
+                }
+                else
+                        log_data[i] = oriData[i];
+                if(log_data[i] > 0){
+                        log_data[i] = log2(log_data[i]);
+                        if(log_data[i] > max_abs_log_data) max_abs_log_data = log_data[i];
+                        if(log_data[i] < min_log_data) min_log_data = log_data[i];
+                }
+        }
+        float valueRangeSize, medianValue_f;
+        computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);
+        if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
+        double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7;
+        for(size_t i=0; i<dataLength; i++){
+                if(oriData[i] == 0){
+                        log_data[i] = min_log_data - 2.0001*realPrecision;
+                }
+        }
+    TightDataPointStorageF* tdps = SZ_compress_float_3D_MDQ(log_data, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
+    tdps->minLogValue = min_log_data - 1.0001*realPrecision;
+    free(log_data);
+    if(!positive){
+            unsigned char * comp_signs;
+                // compress signs
+                unsigned long signSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, signs, dataLength, &comp_signs);
+                tdps->pwrErrBoundBytes = comp_signs;
+                tdps->pwrErrBoundBytes_size = signSize;
+        }
+        else{
+                tdps->pwrErrBoundBytes = NULL;
+                tdps->pwrErrBoundBytes_size = 0;
+        }
+        free(signs);
+    convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
+    if(*outSize>dataLength*sizeof(float))
+            SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
+    free_TightDataPointStorageF(tdps);
+}

TabularUnified thirdparty/SZ/sz/src/sz_int16.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_int16.h"
+#include "utility.h"
 unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/sz_int32.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_int32.h"
+#include "utility.h"
 unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/sz_int64.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_int64.h"
+#include "utility.h"
 unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/sz_int8.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_int8.h"
+#include "utility.h"
 unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/sz_uint16.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_uint16.h"
+#include "utility.h"
 unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/sz_uint32.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_uint32.h"
+#include "utility.h"
 unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/sz_uint64.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_uint64.h"
+#include "utility.h"
 unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/sz_uint8.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "TightDataPointStorageI.h"
 #include "sz_uint8.h"
+#include "utility.h"
 unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, double realPrecision)
 …
                 pred = last3CmprsData[0];
                 predAbsErr = llabs(curData - pred);
                 if(predAbsErr<=checkRadius)
+                if(predAbsErr<checkRadius)
+                {
                         state = (predAbsErr/realPrecision+1)/2;
 …
                 else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
+                {
                         *outSize = zlib_compress5(tmpByteData, tmpOutSize, newByteData, confparams_cpr->gzipMode);
+                        *outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
                         free(tmpByteData);
+                }

TabularUnified thirdparty/SZ/sz/src/szd_double.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_double_pwr.h"
 #include "szd_double_ts.h"
+#include "utility.h"
 int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize)
 …
         if(cmpSize!=12+4+MetaDataByteLength && cmpSize!=12+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
                 if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION)
+                {
                         if(isZlib)
+                        if(confparams_dec->losslessCompressor!=-1)
                                 confparams_dec->szMode = SZ_BEST_COMPRESSION;
                         else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);
 …
+                }
+        }
+        else if (dim == 1)
+                getSnapshotData_double_1D(newData,r1,tdps, errBoundMode);
+        else
+        if (dim == 2)
+                getSnapshotData_double_2D(newData,r2,r1,tdps, errBoundMode);
+        else
+        if (dim == 3)
+                getSnapshotData_double_3D(newData,r3,r2,r1,tdps, errBoundMode);
+        else
+        if (dim == 4)
+                getSnapshotData_double_4D(newData,r4,r3,r2,r1,tdps, errBoundMode);
+        else
+        else
+        {
+                printf("Error: currently support only at most 4 dimensions!\n");
+                status = SZ_DERR;
+        }
+                if(tdps->raBytes_size > 0) //v2.0
+                {
+                        if (dim == 1)
+                                getSnapshotData_double_1D(newData,r1,tdps, errBoundMode);
+                        else if(dim == 2)
+                                decompressDataSeries_double_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes);
+                        else if(dim == 3)
+                                decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes);
+                        else if(dim == 4)
+                                decompressDataSeries_double_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes);
+                        else
+                        {
+                                printf("Error: currently support only at most 4 dimensions!\n");
+                                status = SZ_DERR;
+                        }
+                }
+                else //1.4.13
+                {
+                        if (dim == 1)
+                                getSnapshotData_double_1D(newData,r1,tdps, errBoundMode);
+                        else
+                        if (dim == 2)
+                                getSnapshotData_double_2D(newData,r2,r1,tdps, errBoundMode);
+                        else
+                        if (dim == 3)
+                                getSnapshotData_double_3D(newData,r3,r2,r1,tdps, errBoundMode);
+                        else
+                        if (dim == 4)
+                                getSnapshotData_double_4D(newData,r4,r3,r2,r1,tdps, errBoundMode);
+                        else
+                        {
+                                printf("Error: currently support only at most 4 dimensions!\n");
+                                status = SZ_DERR;
+                        }
+                }
+        }
         free_TightDataPointStorageD2(tdps);
         if(confparams_dec->szMode!=SZ_BEST_SPEED && cmpSize!=12+MetaDataByteLength+exe_params->SZ_SIZE_TYPE)
 …
                         else
+                        {
                                 //decompressDataSeries_double_1D_pwr(data, dataSeriesLength, tdps);
                                 decompressDataSeries_double_1D_pwrgroup(data, dataSeriesLength, tdps);
+                                decompressDataSeries_double_1D_pwr_pre_log(data, dataSeriesLength, tdps);
+                                //decompressDataSeries_double_1D_pwrgroup(data, dataSeriesLength, tdps);
+                        }
                         return;
 …
                                 decompressDataSeries_double_1D(&decmpData, dataSeriesLength, tdps);
                         else
+                                decompressDataSeries_double_1D_pwr(&decmpData, dataSeriesLength, tdps);
+                                //decompressDataSeries_double_1D_pwr(&decmpData, dataSeriesLength, tdps);
+                                decompressDataSeries_double_1D_pwr_pre_log(&decmpData, dataSeriesLength, tdps);
                         // insert the decompressed data
                         size_t k = 0;
 …
+                        }
                         else
+                                decompressDataSeries_double_2D_pwr(data, r1, r2, tdps);
+                                //decompressDataSeries_double_2D_pwr(data, r1, r2, tdps);
+                                decompressDataSeries_double_2D_pwr_pre_log(data, r1, r2, tdps);
                         return;
                 } else {
 …
                                 decompressDataSeries_double_2D(&decmpData, r1, r2, tdps);
                         else
+                                decompressDataSeries_double_2D_pwr(&decmpData, r1, r2, tdps);
+                                //decompressDataSeries_double_2D_pwr(&decmpData, r1, r2, tdps);
+                                decompressDataSeries_double_2D_pwr_pre_log(&decmpData, r1, r2, tdps);
                         // insert the decompressed data
                         size_t k = 0;
 …
+                        }
                         else
+                                decompressDataSeries_double_3D_pwr(data, r1, r2, r3, tdps);
+                                //decompressDataSeries_double_3D_pwr(data, r1, r2, r3, tdps);
+                                decompressDataSeries_double_3D_pwr_pre_log(data, r1, r2, r3, tdps);
                         return;
                 } else {
 …
                                 decompressDataSeries_double_3D(&decmpData, r1, r2, r3, tdps);
                         else
+                                decompressDataSeries_double_3D_pwr(&decmpData, r1, r2, r3, tdps);
+                                //decompressDataSeries_double_3D_pwr(&decmpData, r1, r2, r3, tdps);
+                                decompressDataSeries_double_3D_pwr_pre_log(&decmpData, r1, r2, r3, tdps);
                         // insert the decompressed data
                         size_t k = 0;
 …
                         else
+                        {
+                                decompressDataSeries_double_3D_pwr(data, r1*r2, r3, r4, tdps);
+                                //decompressDataSeries_double_3D_pwr(data, r1*r2, r3, r4, tdps);
+                                decompressDataSeries_double_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps);
                                 //ToDO
                                 //decompressDataSeries_double_4D_pwr(data, r1, r2, r3, r4, tdps);
 …
                                 decompressDataSeries_double_4D(&decmpData, r1, r2, r3, r4, tdps);
                         else
+                                decompressDataSeries_double_3D_pwr(&decmpData, r1*r2, r3, r4, tdps);
+                                //decompressDataSeries_double_3D_pwr(&decmpData, r1*r2, r3, r4, tdps);
+                                decompressDataSeries_double_3D_pwr_pre_log(&decmpData, r1*r2, r3, r4, tdps);
                                 //ToDo
                                 //decompressDataSeries_double_4D_pwr(&decmpData, r1, r2, r3, r4, tdps);
 …
+        }
+}
+void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data){
+        size_t dim0_offset = r2;
+        size_t num_elements = r1 * r2;
+        *data = (double*)malloc(sizeof(double)*num_elements);
+        unsigned char * comp_data_pos = comp_data;
+        size_t block_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        // calculate block dims
+        size_t num_x, num_y;
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        size_t split_index_x, split_index_y;
+        size_t early_blockcount_x, early_blockcount_y;
+        size_t late_blockcount_x, late_blockcount_y;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        size_t num_blocks = num_x * num_y;
+        double realPrecision = bytesToDouble(comp_data_pos);
+        comp_data_pos += sizeof(double);
+        unsigned int intervals = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        updateQuantizationInfo(intervals);
+        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        int stateNum = 2*intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount);
+        comp_data_pos += sizeof(int) + tree_size;
+        double mean;
+        unsigned char use_mean;
+        memcpy(&use_mean, comp_data_pos, sizeof(unsigned char));
+        comp_data_pos += sizeof(unsigned char);
+        memcpy(&mean, comp_data_pos, sizeof(double));
+        comp_data_pos += sizeof(double);
+        size_t reg_count = 0;
+        unsigned char * indicator;
+        size_t indicator_bitlength = (num_blocks - 1)/8 + 1;
+        convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator);
+        comp_data_pos += indicator_bitlength;
+        for(size_t i=0; i<num_blocks; i++){
+                if(!indicator[i]) reg_count ++;
+        }
+        //printf("reg_count: %ld\n", reg_count);
+        int coeff_intvRadius[3];
+        int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int));
+        int * coeff_type[3];
+        double precision[3];
+        double * coeff_unpred_data[3];
+        if(reg_count > 0){
+                for(int i=0; i<3; i++){
+                        precision[i] = bytesToDouble(comp_data_pos);
+                        comp_data_pos += sizeof(double);
+                        coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        int stateNum = 2*coeff_intvRadius[i]*2;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+                        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount);
+                        comp_data_pos += sizeof(int) + tree_size;
+                        coeff_type[i] = coeff_result_type + i * num_blocks;
+                        size_t typeArray_size = bytesToSize(comp_data_pos);
+                        decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]);
+                        comp_data_pos += sizeof(size_t) + typeArray_size;
+                        int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        coeff_unpred_data[i] = (double *) comp_data_pos;
+                        comp_data_pos += coeff_unpred_count * sizeof(double);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        double last_coefficients[3] = {0.0};
+        int coeff_unpred_data_count[3] = {0};
+        int coeff_index = 0;
+        updateQuantizationInfo(intervals);
+        size_t total_unpred;
+        memcpy(&total_unpred, comp_data_pos, sizeof(size_t));
+        comp_data_pos += sizeof(size_t);
+        double * unpred_data = (double *) comp_data_pos;
+        comp_data_pos += total_unpred * sizeof(double);
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        decode(comp_data_pos, num_elements, root, result_type);
+        SZ_ReleaseHuffman(huffmanTree);
+        int intvRadius = exe_params->intvRadius;
+        int * type;
+        double * data_pos = *data;
+        size_t offset_x, offset_y;
+        size_t current_blockcount_x, current_blockcount_y;
+        size_t cur_unpred_count;
+        unsigned char * indicator_pos = indicator;
+        if(use_mean){
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                data_pos = *data + offset_x * dim0_offset + offset_y;
+                                current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                size_t current_block_elements = current_blockcount_x * current_blockcount_y;
+                                if(*indicator_pos){
+                                        // decompress by SZ
+                                        double * block_data_pos = data_pos;
+                                        double pred;
+                                        size_t index = 0;
+                                        int type_;
+                                        // d11 is current data
+                                        size_t unpredictable_count = 0;
+                                        double d00, d01, d10;
+                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        type_ = type[index];
+                                                        if(type_ == intvRadius){
+                                                                *block_data_pos = mean;
+                                                        }
+                                                        else if(type_ == 0){
+                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                        }
+                                                        else{
+                                                                d00 = d01 = d10 = 1;
+                                                                if(i == 0 && ii == 0){
+                                                                        d00 = d01 = 0;
+                                                                }
+                                                                if(j == 0 && jj == 0){
+                                                                        d00 = d10 = 0;
+                                                                }
+                                                                if(d00){
+                                                                        d00 = block_data_pos[- dim0_offset - 1];
+                                                                }
+                                                                if(d01){
+                                                                        d01 = block_data_pos[- dim0_offset];
+                                                                }
+                                                                if(d10){
+                                                                        d10 = block_data_pos[- 1];
+                                                                }
+                                                                if(type_ < intvRadius) type_ += 1;
+                                                                pred = d10 + d01 - d00;
+                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                        }
+                                                        index ++;
+                                                        block_data_pos ++;
+                                                }
+                                                block_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        cur_unpred_count = unpredictable_count;
+                                }
+                                else{
+                                        // decompress by regression
+                                        {
+                                                //restore regression coefficients
+                                                double pred;
+                                                int type_;
+                                                for(int e=0; e<3; e++){
+                                                        type_ = coeff_type[e][coeff_index];
+                                                        if (type_ != 0){
+                                                                pred = last_coefficients[e];
+                                                                last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                        }
+                                                        else{
+                                                                last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                coeff_unpred_data_count[e] ++;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        {
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                int type_;
+                                                size_t index = 0;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                type_ = type[index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2];
+                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                }
+                                                                else{
+                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                }
+                                                                index ++;
+                                                                block_data_pos ++;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                }
+                                type += current_block_elements;
+                                indicator_pos ++;
+                                unpred_data += cur_unpred_count;
+                        }
+                }
+        }
+        else{
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                data_pos = *data + offset_x * dim0_offset + offset_y;
+                                current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                size_t current_block_elements = current_blockcount_x * current_blockcount_y;
+                                if(*indicator_pos){
+                                        // decompress by SZ
+                                        double * block_data_pos = data_pos;
+                                        double pred;
+                                        size_t index = 0;
+                                        int type_;
+                                        // d11 is current data
+                                        size_t unpredictable_count = 0;
+                                        double d00, d01, d10;
+                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        type_ = type[index];
+                                                        if(type_ == 0){
+                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                        }
+                                                        else{
+                                                                d00 = d01 = d10 = 1;
+                                                                if(i == 0 && ii == 0){
+                                                                        d00 = d01 = 0;
+                                                                }
+                                                                if(j == 0 && jj == 0){
+                                                                        d00 = d10 = 0;
+                                                                }
+                                                                if(d00){
+                                                                        d00 = block_data_pos[- dim0_offset - 1];
+                                                                }
+                                                                if(d01){
+                                                                        d01 = block_data_pos[- dim0_offset];
+                                                                }
+                                                                if(d10){
+                                                                        d10 = block_data_pos[- 1];
+                                                                }
+                                                                pred = d10 + d01 - d00;
+                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                        }
+                                                        index ++;
+                                                        block_data_pos ++;
+                                                }
+                                                block_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        cur_unpred_count = unpredictable_count;
+                                }
+                                else{
+                                        // decompress by regression
+                                        {
+                                                //restore regression coefficients
+                                                double pred;
+                                                int type_;
+                                                for(int e=0; e<3; e++){
+                                                        type_ = coeff_type[e][coeff_index];
+                                                        if (type_ != 0){
+                                                                pred = last_coefficients[e];
+                                                                last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                        }
+                                                        else{
+                                                                last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                coeff_unpred_data_count[e] ++;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        {
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                int type_;
+                                                size_t index = 0;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                type_ = type[index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2];
+                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                }
+                                                                else{
+                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                }
+                                                                index ++;
+                                                                block_data_pos ++;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                }
+                                type += current_block_elements;
+                                indicator_pos ++;
+                                unpred_data += cur_unpred_count;
+                        }
+                }
+        }
+        free(coeff_result_type);
+        free(indicator);
+        free(result_type);
+}
+void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){
+        size_t dim0_offset = r2 * r3;
+        size_t dim1_offset = r3;
+        size_t num_elements = r1 * r2 * r3;
+        *data = (double*)malloc(sizeof(double)*num_elements);
+        unsigned char * comp_data_pos = comp_data;
+        size_t block_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        // calculate block dims
+        size_t num_x, num_y, num_z;
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
+        size_t split_index_x, split_index_y, split_index_z;
+        size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
+        size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
+        size_t num_blocks = num_x * num_y * num_z;
+        double realPrecision = bytesToDouble(comp_data_pos);
+        comp_data_pos += sizeof(double);
+        unsigned int intervals = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        updateQuantizationInfo(intervals);
+        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        int stateNum = 2*intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+4, nodeCount);
+        comp_data_pos += sizeof(int) + tree_size;
+        double mean;
+        unsigned char use_mean;
+        memcpy(&use_mean, comp_data_pos, sizeof(unsigned char));
+        comp_data_pos += sizeof(unsigned char);
+        memcpy(&mean, comp_data_pos, sizeof(double));
+        comp_data_pos += sizeof(double);
+        size_t reg_count = 0;
+        unsigned char * indicator;
+        size_t indicator_bitlength = (num_blocks - 1)/8 + 1;
+        convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator);
+        comp_data_pos += indicator_bitlength;
+        for(size_t i=0; i<num_blocks; i++){
+                if(!indicator[i]) reg_count ++;
+        }
+        int coeff_intvRadius[4];
+        int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int));
+        int * coeff_type[4];
+        double precision[4];
+        double * coeff_unpred_data[4];
+        if(reg_count > 0){
+                for(int i=0; i<4; i++){
+                        precision[i] = bytesToDouble(comp_data_pos);
+                        comp_data_pos += sizeof(double);
+                        coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        int stateNum = 2*coeff_intvRadius[i]*2;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+                        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+4, nodeCount);
+                        comp_data_pos += sizeof(int) + tree_size;
+                        coeff_type[i] = coeff_result_type + i * num_blocks;
+                        size_t typeArray_size = bytesToSize(comp_data_pos);
+                        decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]);
+                        comp_data_pos += sizeof(size_t) + typeArray_size;
+                        int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        coeff_unpred_data[i] = (double *) comp_data_pos;
+                        comp_data_pos += coeff_unpred_count * sizeof(double);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        double last_coefficients[4] = {0.0};
+        int coeff_unpred_data_count[4] = {0};
+        int coeff_index = 0;
+        updateQuantizationInfo(intervals);
+        size_t total_unpred;
+        memcpy(&total_unpred, comp_data_pos, sizeof(size_t));
+        comp_data_pos += sizeof(size_t);
+        double * unpred_data = (double *) comp_data_pos;
+        comp_data_pos += total_unpred * sizeof(double);
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        decode(comp_data_pos, num_elements, root, result_type);
+        SZ_ReleaseHuffman(huffmanTree);
+        int intvRadius = exe_params->intvRadius;
+        int * type;
+        double * data_pos = *data;
+        size_t offset_x, offset_y, offset_z;
+        size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
+        size_t cur_unpred_count;
+        unsigned char * indicator_pos = indicator;
+        if(use_mean){
+                // type = result_type;
+                // for(size_t i=0; i<num_x; i++){
+                //      for(size_t j=0; j<num_y; j++){
+                //              for(size_t k=0; k<num_z; k++){
+                //                      offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                //                      offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                //                      offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                //                      data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                //                      current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                //                      current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                //                      current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                //                      // type_offset = offset_x * dim0_offset +  offset_y * current_blockcount_x * dim1_offset + offset_z * current_blockcount_x * current_blockcount_y;
+                //                      // type = result_type + type_offset;
+                //                      size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                //                      // index = i * num_y * num_z + j * num_z + k;
+                //                      // printf("i j k: %ld %ld %ld\toffset: %ld %ld %ld\tindicator: %ld\n", i, j, k, offset_x, offset_y, offset_z, indicator[index]);
+                //                      if(*indicator_pos){
+                //                              // decompress by SZ
+                //                              // cur_unpred_count = decompressDataSeries_double_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data);
+                //                              double * block_data_pos = data_pos;
+                //                              double pred;
+                //                              size_t index = 0;
+                //                              int type_;
+                //                              // d111 is current data
+                //                              size_t unpredictable_count = 0;
+                //                              double d000, d001, d010, d011, d100, d101, d110;
+                //                              for(size_t ii=0; ii<current_blockcount_x; ii++){
+                //                                      for(size_t jj=0; jj<current_blockcount_y; jj++){
+                //                                              for(size_t kk=0; kk<current_blockcount_z; kk++){
+                //                                                      type_ = type[index];
+                //                                                      if(type_ == intvRadius){
+                //                                                              *block_data_pos = mean;
+                //                                                      }
+                //                                                      else if(type_ == 0){
+                //                                                              *block_data_pos = unpred_data[unpredictable_count ++];
+                //                                                      }
+                //                                                      else{
+                //                                                              d000 = d001 = d010 = d011 = d100 = d101 = d110 = 1;
+                //                                                              if(i == 0 && ii == 0){
+                //                                                                      d000 = d001 = d010 = d011 = 0;
+                //                                                              }
+                //                                                              if(j == 0 && jj == 0){
+                //                                                                      d000 = d001 = d100 = d101 = 0;
+                //                                                              }
+                //                                                              if(k == 0 && kk == 0){
+                //                                                                      d000 = d010 = d100 = d110 = 0;
+                //                                                              }
+                //                                                              if(d000){
+                //                                                                      d000 = block_data_pos[- dim0_offset - dim1_offset - 1];
+                //                                                              }
+                //                                                              if(d001){
+                //                                                                      d001 = block_data_pos[- dim0_offset - dim1_offset];
+                //                                                              }
+                //                                                              if(d010){
+                //                                                                      d010 = block_data_pos[- dim0_offset - 1];
+                //                                                              }
+                //                                                              if(d011){
+                //                                                                      d011 = block_data_pos[- dim0_offset];
+                //                                                              }
+                //                                                              if(d100){
+                //                                                                      d100 = block_data_pos[- dim1_offset - 1];
+                //                                                              }
+                //                                                              if(d101){
+                //                                                                      d101 = block_data_pos[- dim1_offset];
+                //                                                              }
+                //                                                              if(d110){
+                //                                                                      d110 = block_data_pos[- 1];
+                //                                                              }
+                //                                                              if(type_ < intvRadius) type_ += 1;
+                //                                                              pred = d110 + d101 + d011 - d100 - d010 - d001 + d000;
+                //                                                              *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                //                                                      }
+                //                                                      index ++;
+                //                                                      block_data_pos ++;
+                //                                              }
+                //                                              block_data_pos += dim1_offset - current_blockcount_z;
+                //                                      }
+                //                                      block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                //                              }
+                //                              cur_unpred_count = unpredictable_count;
+                //                      }
+                //                      else{
+                //                              // decompress by regression
+                //                              {
+                //                                      //restore regression coefficients
+                //                                      double pred;
+                //                                      int type_;
+                //                                      for(int e=0; e<4; e++){
+                //                                              // if(i == 0 && j == 0 && k == 19){
+                //                                              //      printf("~\n");
+                //                                              // }
+                //                                              type_ = coeff_type[e][coeff_index];
+                //                                              if (type_ != 0){
+                //                                                      pred = last_coefficients[e];
+                //                                                      last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                //                                              }
+                //                                              else{
+                //                                                      last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                //                                                      coeff_unpred_data_count[e] ++;
+                //                                              }
+                //                                              if(fabs(last_coefficients[e]) > 10000){
+                //                                                      printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]);
+                //                                                      exit(0);
+                //                                              }
+                //                                      }
+                //                                      coeff_index ++;
+                //                              }
+                //                              {
+                //                                      double * block_data_pos = data_pos;
+                //                                      double pred;
+                //                                      int type_;
+                //                                      size_t index = 0;
+                //                                      size_t unpredictable_count = 0;
+                //                                      for(size_t ii=0; ii<current_blockcount_x; ii++){
+                //                                              for(size_t jj=0; jj<current_blockcount_y; jj++){
+                //                                                      for(size_t kk=0; kk<current_blockcount_z; kk++){
+                //                                                              if(block_data_pos - (*data) == 19470788){
+                //                                                                      printf("dec stop\n");
+                //                                                              }
+                //                                                              type_ = type[index];
+                //                                                              if (type_ != 0){
+                //                                                                      pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                //                                                                      *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                //                                                              }
+                //                                                              else{
+                //                                                                      *block_data_pos = unpred_data[unpredictable_count ++];
+                //                                                              }
+                //                                                              index ++;
+                //                                                              block_data_pos ++;
+                //                                                      }
+                //                                                      block_data_pos += dim1_offset - current_blockcount_z;
+                //                                              }
+                //                                              block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                //                                      }
+                //                                      cur_unpred_count = unpredictable_count;
+                //                              }
+                //                      }
+                //                      type += current_block_elements;
+                //                      indicator_pos ++;
+                //                      unpred_data += cur_unpred_count;
+                //                      // decomp_unpred += cur_unpred_count;
+                //                      // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data);
+                //                      // fflush(stdout);
+                //              }
+                //      }
+                // }
+                type = result_type;
+                // i == 0
+                {
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        data_pos = *data;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = 0;
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                // i == 0 j == 0 k != 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j==0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_y * dim1_offset;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                } // end i==0
+                for(size_t i=1; i<num_x; i++){
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        data_pos = *data + offset_x * dim0_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j = 0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                }
+        }
+        else{
+                type = result_type;
+                // i == 0
+                {
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        data_pos = *data;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = 0;
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                // i == 0 j == 0 k != 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j==0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_y * dim1_offset;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                } // end i==0
+                for(size_t i=1; i<num_x; i++){
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        data_pos = *data + offset_x * dim0_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j = 0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                double * block_data_pos = data_pos;
+                                                double pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        double pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        double * block_data_pos = data_pos;
+                                                        double pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                }
+        }
+        free(coeff_result_type);
+        free(indicator);
+        free(result_type);
+}

TabularUnified thirdparty/SZ/sz/src/szd_double_pwr.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "sz.h"
 #include "Huffman.h"
+#include "utility.h"
 //#include "rw.h"
 …
         free(groupID);
+}
+void decompressDataSeries_double_1D_pwr_pre_log(double** data, size_t dataSeriesLength, TightDataPointStorageD* tdps) {
+        decompressDataSeries_double_1D(data, dataSeriesLength, tdps);
+        double threshold = tdps->minLogValue;
+        if(tdps->pwrErrBoundBytes_size > 0){
+                unsigned char * signs;
+                sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength);
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                        if(signs[i]) (*data)[i] = -((*data)[i]);
+                }
+                free(signs);
+        }
+        else{
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                }
+        }
+}
+void decompressDataSeries_double_2D_pwr_pre_log(double** data, size_t r1, size_t r2, TightDataPointStorageD* tdps) {
+        size_t dataSeriesLength = r1 * r2;
+        decompressDataSeries_double_2D(data, r1, r2, tdps);
+        double threshold = tdps->minLogValue;
+        if(tdps->pwrErrBoundBytes_size > 0){
+                unsigned char * signs;
+                sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength);
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                        if(signs[i]) (*data)[i] = -((*data)[i]);
+                }
+                free(signs);
+        }
+        else{
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                }
+        }
+}
+void decompressDataSeries_double_3D_pwr_pre_log(double** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageD* tdps) {
+        size_t dataSeriesLength = r1 * r2 * r3;
+        decompressDataSeries_double_3D(data, r1, r2, r3, tdps);
+        double threshold = tdps->minLogValue;
+        if(tdps->pwrErrBoundBytes_size > 0){
+                unsigned char * signs;
+                sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength);
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                        if(signs[i]) (*data)[i] = -((*data)[i]);
+                }
+                free(signs);
+        }
+        else{
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                }
+        }
+}
 #pragma GCC diagnostic pop

TabularUnified thirdparty/SZ/sz/src/szd_float.c ¶

-                      r2c47b73
+                      r9ee2ce3
 /**
  *  @file szd_float.c
  *  @author Sheng Di and Dingwen Tao
  *  @date Aug, 2016
+ *  @author Sheng Di, Dingwen Tao, Xin Liang
+ *  @date Aug, 2018
  *  @brief
  *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
 …
 #include "szd_float_pwr.h"
 #include "szd_float_ts.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=8+4+MetaDataByteLength && cmpSize!=8+8+MetaDataByteLength) //4,8 means two posibilities of SZ_SIZE_TYPE
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
                 if(confparams_dec->szMode!=SZ_TEMPORAL_COMPRESSION)
+                {
                         if(isZlib)
+                        if(confparams_dec->losslessCompressor!=-1)
                                 confparams_dec->szMode = SZ_BEST_COMPRESSION;
                         else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);
 …
+                }
+        }
+        else if (dim == 1)
+                getSnapshotData_float_1D(newData,r1,tdps, errBoundMode);
+        else
+        if (dim == 2)
+                getSnapshotData_float_2D(newData,r2,r1,tdps, errBoundMode);
+        else
+        if (dim == 3)
+                getSnapshotData_float_3D(newData,r3,r2,r1,tdps, errBoundMode);
+        else
+        if (dim == 4)
+                getSnapshotData_float_4D(newData,r4,r3,r2,r1,tdps, errBoundMode);
+        else
+        else
+        {
+                printf("Error: currently support only at most 4 dimensions!\n");
+                status = SZ_DERR;
+                if(tdps->raBytes_size > 0) //v2.0
+                {
+                        if (dim == 1)
+                                getSnapshotData_float_1D(newData,r1,tdps, errBoundMode);
+                        else if(dim == 2)
+                                decompressDataSeries_float_2D_nonblocked_with_blocked_regression(newData, r2, r1, tdps->raBytes);
+                        else if(dim == 3)
+                                decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r3, r2, r1, tdps->raBytes);
+                        else if(dim == 4)
+                                decompressDataSeries_float_3D_nonblocked_with_blocked_regression(newData, r4*r3, r2, r1, tdps->raBytes);
+                        else
+                        {
+                                printf("Error: currently support only at most 4 dimensions!\n");
+                                status = SZ_DERR;
+                        }
+                }
+                else //1.4.13
+                {
+                        if (dim == 1)
+                                getSnapshotData_float_1D(newData,r1,tdps, errBoundMode);
+                        else if (dim == 2)
+                                getSnapshotData_float_2D(newData,r2,r1,tdps, errBoundMode);
+                        else if (dim == 3)
+                                getSnapshotData_float_3D(newData,r3,r2,r1,tdps, errBoundMode);
+                        else if (dim == 4)
+                                getSnapshotData_float_4D(newData,r4,r3,r2,r1,tdps, errBoundMode);
+                        else
+                        {
+                                printf("Error: currently support only at most 4 dimensions!\n");
+                                status = SZ_DERR;
+                        }
+                }
+        }
         free_TightDataPointStorageF2(tdps);
 …
                         else
+                        {
                                 //decompressDataSeries_float_1D_pwr(data, dataSeriesLength, tdps);
                                 decompressDataSeries_float_1D_pwrgroup(data, dataSeriesLength, tdps);
+                                decompressDataSeries_float_1D_pwr_pre_log(data, dataSeriesLength, tdps);
+                                //decompressDataSeries_float_1D_pwrgroup(data, dataSeriesLength, tdps);
+                        }
                         return;
 …
                                 decompressDataSeries_float_1D(&decmpData, dataSeriesLength, tdps);
                         else
+                                decompressDataSeries_float_1D_pwr(&decmpData, dataSeriesLength, tdps);
+                                //decompressDataSeries_float_1D_pwr(&decmpData, dataSeriesLength, tdps);
+                                decompressDataSeries_float_1D_pwr_pre_log(&decmpData, dataSeriesLength, tdps);
                         // insert the decompressed data
                         size_t k = 0;
 …
                         else
+                        {
+                                decompressDataSeries_float_2D_pwr(data, r1, r2, tdps);
+                                //decompressDataSeries_float_2D_pwr(data, r1, r2, tdps);
+                                decompressDataSeries_float_2D_pwr_pre_log(data, r1, r2, tdps);
+                        }
 …
                                 decompressDataSeries_float_2D(&decmpData, r1, r2, tdps);
                         else
+                                decompressDataSeries_float_2D_pwr(&decmpData, r1, r2, tdps);
+                                //decompressDataSeries_float_2D_pwr(&decmpData, r1, r2, tdps);
+                                decompressDataSeries_float_2D_pwr_pre_log(&decmpData, r1, r2, tdps);
                         // insert the decompressed data
                         size_t k = 0;
 …
                                                 decompressDataSeries_float_3D(data, r1, r2, r3, tdps);
                                         else
                                                 decompressDataSeries_float_1D_ts(data, r1*r2*r3, multisteps, tdps);
+                                                decompressDataSeries_float_1D_ts(data, dataSeriesLength, multisteps, tdps);
+                                }
                                 else
 …
                         else
+                        {
+                                decompressDataSeries_float_3D_pwr(data, r1, r2, r3, tdps);
+                                //decompressDataSeries_float_3D_pwr(data, r1, r2, r3, tdps);
+                                decompressDataSeries_float_3D_pwr_pre_log(data, r1, r2, r3, tdps);
+                        }
 …
                                 decompressDataSeries_float_3D(&decmpData, r1, r2, r3, tdps);
                         else
+                                decompressDataSeries_float_3D_pwr(&decmpData, r1, r2, r3, tdps);
+                                //decompressDataSeries_float_3D_pwr(&decmpData, r1, r2, r3, tdps);
+                                decompressDataSeries_float_3D_pwr_pre_log(&decmpData, r1, r2, r3, tdps);
                         // insert the decompressed data
                         size_t k = 0;
 …
                         else
+                        {
+                                decompressDataSeries_float_3D_pwr(data, r1*r2, r3, r4, tdps);
+                                //decompressDataSeries_float_3D_pwr(data, r1*r2, r3, r4, tdps);
+                                decompressDataSeries_float_3D_pwr_pre_log(data, r1*r2, r3, r4, tdps);
                                 //ToDO
                                 //decompressDataSeries_float_4D_pwr(data, r1, r2, r3, r4, tdps);
 …
                                 decompressDataSeries_float_4D(&decmpData, r1, r2, r3, r4, tdps);
                         else
+                                decompressDataSeries_float_3D_pwr(&decmpData, r1*r2, r3, r4, tdps);
+                                //decompressDataSeries_float_3D_pwr(&decmpData, r1*r2, r3, r4, tdps);
+                                decompressDataSeries_float_3D_pwr_pre_log(&decmpData, r1*r2, r3, r4, tdps);
                                 //ToDO
                                 //decompressDataSeries_float_4D_pwr(&decompData, r1, r2, r3, r4, tdps);
 …
+}
+void decompressDataSeries_float_2D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, unsigned char* comp_data){
+        size_t dim0_offset = r2;
+        size_t num_elements = r1 * r2;
+        *data = (float*)malloc(sizeof(float)*num_elements);
+        unsigned char * comp_data_pos = comp_data;
+        size_t block_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        // calculate block dims
+        size_t num_x, num_y;
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        size_t split_index_x, split_index_y;
+        size_t early_blockcount_x, early_blockcount_y;
+        size_t late_blockcount_x, late_blockcount_y;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        size_t num_blocks = num_x * num_y;
+        double realPrecision = bytesToDouble(comp_data_pos);
+        comp_data_pos += sizeof(double);
+        unsigned int intervals = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        updateQuantizationInfo(intervals);
+        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        int stateNum = 2*intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount);
+        comp_data_pos += sizeof(int) + tree_size;
+        float mean;
+        unsigned char use_mean;
+        memcpy(&use_mean, comp_data_pos, sizeof(unsigned char));
+        comp_data_pos += sizeof(unsigned char);
+        memcpy(&mean, comp_data_pos, sizeof(float));
+        comp_data_pos += sizeof(float);
+        size_t reg_count = 0;
+        unsigned char * indicator;
+        size_t indicator_bitlength = (num_blocks - 1)/8 + 1;
+        convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator);
+        comp_data_pos += indicator_bitlength;
+        for(size_t i=0; i<num_blocks; i++){
+                if(!indicator[i]) reg_count ++;
+        }
+        //printf("reg_count: %ld\n", reg_count);
+        int coeff_intvRadius[3];
+        int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int));
+        int * coeff_type[3];
+        double precision[3];
+        float * coeff_unpred_data[3];
+        if(reg_count > 0){
+                for(int i=0; i<3; i++){
+                        precision[i] = bytesToDouble(comp_data_pos);
+                        comp_data_pos += sizeof(double);
+                        coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        int stateNum = 2*coeff_intvRadius[i]*2;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+                        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount);
+                        comp_data_pos += sizeof(int) + tree_size;
+                        coeff_type[i] = coeff_result_type + i * num_blocks;
+                        size_t typeArray_size = bytesToSize(comp_data_pos);
+                        decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]);
+                        comp_data_pos += sizeof(size_t) + typeArray_size;
+                        int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        coeff_unpred_data[i] = (float *) comp_data_pos;
+                        comp_data_pos += coeff_unpred_count * sizeof(float);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        float last_coefficients[3] = {0.0};
+        int coeff_unpred_data_count[3] = {0};
+        int coeff_index = 0;
+        updateQuantizationInfo(intervals);
+        size_t total_unpred;
+        memcpy(&total_unpred, comp_data_pos, sizeof(size_t));
+        comp_data_pos += sizeof(size_t);
+        float * unpred_data = (float *) comp_data_pos;
+        comp_data_pos += total_unpred * sizeof(float);
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        decode(comp_data_pos, num_elements, root, result_type);
+        SZ_ReleaseHuffman(huffmanTree);
+        int intvRadius = exe_params->intvRadius;
+        int * type;
+        float * data_pos = *data;
+        size_t offset_x, offset_y;
+        size_t current_blockcount_x, current_blockcount_y;
+        size_t cur_unpred_count;
+        unsigned char * indicator_pos = indicator;
+        if(use_mean){
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                data_pos = *data + offset_x * dim0_offset + offset_y;
+                                current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                size_t current_block_elements = current_blockcount_x * current_blockcount_y;
+                                if(*indicator_pos){
+                                        // decompress by SZ
+                                        float * block_data_pos = data_pos;
+                                        float pred;
+                                        size_t index = 0;
+                                        int type_;
+                                        // d11 is current data
+                                        size_t unpredictable_count = 0;
+                                        float d00, d01, d10;
+                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        type_ = type[index];
+                                                        if(type_ == intvRadius){
+                                                                *block_data_pos = mean;
+                                                        }
+                                                        else if(type_ == 0){
+                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                        }
+                                                        else{
+                                                                d00 = d01 = d10 = 1;
+                                                                if(i == 0 && ii == 0){
+                                                                        d00 = d01 = 0;
+                                                                }
+                                                                if(j == 0 && jj == 0){
+                                                                        d00 = d10 = 0;
+                                                                }
+                                                                if(d00){
+                                                                        d00 = block_data_pos[- dim0_offset - 1];
+                                                                }
+                                                                if(d01){
+                                                                        d01 = block_data_pos[- dim0_offset];
+                                                                }
+                                                                if(d10){
+                                                                        d10 = block_data_pos[- 1];
+                                                                }
+                                                                if(type_ < intvRadius) type_ += 1;
+                                                                pred = d10 + d01 - d00;
+                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                        }
+                                                        index ++;
+                                                        block_data_pos ++;
+                                                }
+                                                block_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        cur_unpred_count = unpredictable_count;
+                                }
+                                else{
+                                        // decompress by regression
+                                        {
+                                                //restore regression coefficients
+                                                float pred;
+                                                int type_;
+                                                for(int e=0; e<3; e++){
+                                                        type_ = coeff_type[e][coeff_index];
+                                                        if (type_ != 0){
+                                                                pred = last_coefficients[e];
+                                                                last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                        }
+                                                        else{
+                                                                last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                coeff_unpred_data_count[e] ++;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        {
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                int type_;
+                                                size_t index = 0;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                type_ = type[index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2];
+                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                }
+                                                                else{
+                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                }
+                                                                index ++;
+                                                                block_data_pos ++;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                }
+                                type += current_block_elements;
+                                indicator_pos ++;
+                                unpred_data += cur_unpred_count;
+                        }
+                }
+        }
+        else{
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                data_pos = *data + offset_x * dim0_offset + offset_y;
+                                current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                size_t current_block_elements = current_blockcount_x * current_blockcount_y;
+                                if(*indicator_pos){
+                                        // decompress by SZ
+                                        float * block_data_pos = data_pos;
+                                        float pred;
+                                        size_t index = 0;
+                                        int type_;
+                                        // d11 is current data
+                                        size_t unpredictable_count = 0;
+                                        float d00, d01, d10;
+                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                        type_ = type[index];
+                                                        if(type_ == 0){
+                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                        }
+                                                        else{
+                                                                d00 = d01 = d10 = 1;
+                                                                if(i == 0 && ii == 0){
+                                                                        d00 = d01 = 0;
+                                                                }
+                                                                if(j == 0 && jj == 0){
+                                                                        d00 = d10 = 0;
+                                                                }
+                                                                if(d00){
+                                                                        d00 = block_data_pos[- dim0_offset - 1];
+                                                                }
+                                                                if(d01){
+                                                                        d01 = block_data_pos[- dim0_offset];
+                                                                }
+                                                                if(d10){
+                                                                        d10 = block_data_pos[- 1];
+                                                                }
+                                                                pred = d10 + d01 - d00;
+                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                        }
+                                                        index ++;
+                                                        block_data_pos ++;
+                                                }
+                                                block_data_pos += dim0_offset - current_blockcount_y;
+                                        }
+                                        cur_unpred_count = unpredictable_count;
+                                }
+                                else{
+                                        // decompress by regression
+                                        {
+                                                //restore regression coefficients
+                                                float pred;
+                                                int type_;
+                                                for(int e=0; e<3; e++){
+                                                        type_ = coeff_type[e][coeff_index];
+                                                        if (type_ != 0){
+                                                                pred = last_coefficients[e];
+                                                                last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                        }
+                                                        else{
+                                                                last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                coeff_unpred_data_count[e] ++;
+                                                        }
+                                                }
+                                                coeff_index ++;
+                                        }
+                                        {
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                int type_;
+                                                size_t index = 0;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                type_ = type[index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2];
+                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                }
+                                                                else{
+                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                }
+                                                                index ++;
+                                                                block_data_pos ++;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                }
+                                type += current_block_elements;
+                                indicator_pos ++;
+                                unpred_data += cur_unpred_count;
+                        }
+                }
+        }
+        free(coeff_result_type);
+        free(indicator);
+        free(result_type);
+}
+void decompressDataSeries_float_3D_nonblocked_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){
+        size_t dim0_offset = r2 * r3;
+        size_t dim1_offset = r3;
+        size_t num_elements = r1 * r2 * r3;
+        *data = (float*)malloc(sizeof(float)*num_elements);
+        unsigned char * comp_data_pos = comp_data;
+        size_t block_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        // calculate block dims
+        size_t num_x, num_y, num_z;
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
+        SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
+        size_t split_index_x, split_index_y, split_index_z;
+        size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
+        size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
+        SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
+        SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
+        SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
+        size_t num_blocks = num_x * num_y * num_z;
+        double realPrecision = bytesToDouble(comp_data_pos);
+        comp_data_pos += sizeof(double);
+        unsigned int intervals = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        updateQuantizationInfo(intervals);
+        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        int stateNum = 2*intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount);
+        comp_data_pos += sizeof(int) + tree_size;
+        float mean;
+        unsigned char use_mean;
+        memcpy(&use_mean, comp_data_pos, sizeof(unsigned char));
+        comp_data_pos += sizeof(unsigned char);
+        memcpy(&mean, comp_data_pos, sizeof(float));
+        comp_data_pos += sizeof(float);
+        size_t reg_count = 0;
+        unsigned char * indicator;
+        size_t indicator_bitlength = (num_blocks - 1)/8 + 1;
+        convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator);
+        comp_data_pos += indicator_bitlength;
+        for(size_t i=0; i<num_blocks; i++){
+                if(!indicator[i]) reg_count ++;
+        }
+        int coeff_intvRadius[4];
+        int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int));
+        int * coeff_type[4];
+        double precision[4];
+        float * coeff_unpred_data[4];
+        if(reg_count > 0){
+                for(int i=0; i<4; i++){
+                        precision[i] = bytesToDouble(comp_data_pos);
+                        comp_data_pos += sizeof(double);
+                        coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        int stateNum = 2*coeff_intvRadius[i]*2;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+                        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount);
+                        comp_data_pos += sizeof(int) + tree_size;
+                        coeff_type[i] = coeff_result_type + i * num_blocks;
+                        size_t typeArray_size = bytesToSize(comp_data_pos);
+                        decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]);
+                        comp_data_pos += sizeof(size_t) + typeArray_size;
+                        int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        coeff_unpred_data[i] = (float *) comp_data_pos;
+                        comp_data_pos += coeff_unpred_count * sizeof(float);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        float last_coefficients[4] = {0.0};
+        int coeff_unpred_data_count[4] = {0};
+        int coeff_index = 0;
+        updateQuantizationInfo(intervals);
+        size_t total_unpred;
+        memcpy(&total_unpred, comp_data_pos, sizeof(size_t));
+        comp_data_pos += sizeof(size_t);
+        float * unpred_data = (float *) comp_data_pos;
+        comp_data_pos += total_unpred * sizeof(float);
+        int * result_type = (int *) malloc(num_elements * sizeof(int));
+        decode(comp_data_pos, num_elements, root, result_type);
+        SZ_ReleaseHuffman(huffmanTree);
+        int intvRadius = exe_params->intvRadius;
+        int * type;
+        float * data_pos = *data;
+        size_t offset_x, offset_y, offset_z;
+        size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
+        size_t cur_unpred_count;
+        unsigned char * indicator_pos = indicator;
+        if(use_mean){
+                // type = result_type;
+                // for(size_t i=0; i<num_x; i++){
+                //      for(size_t j=0; j<num_y; j++){
+                //              for(size_t k=0; k<num_z; k++){
+                //                      offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                //                      offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                //                      offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                //                      data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                //                      current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                //                      current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                //                      current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                //                      // type_offset = offset_x * dim0_offset +  offset_y * current_blockcount_x * dim1_offset + offset_z * current_blockcount_x * current_blockcount_y;
+                //                      // type = result_type + type_offset;
+                //                      size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                //                      // index = i * num_y * num_z + j * num_z + k;
+                //                      // printf("i j k: %ld %ld %ld\toffset: %ld %ld %ld\tindicator: %ld\n", i, j, k, offset_x, offset_y, offset_z, indicator[index]);
+                //                      if(*indicator_pos){
+                //                              // decompress by SZ
+                //                              // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data);
+                //                              float * block_data_pos = data_pos;
+                //                              float pred;
+                //                              size_t index = 0;
+                //                              int type_;
+                //                              // d111 is current data
+                //                              size_t unpredictable_count = 0;
+                //                              float d000, d001, d010, d011, d100, d101, d110;
+                //                              for(size_t ii=0; ii<current_blockcount_x; ii++){
+                //                                      for(size_t jj=0; jj<current_blockcount_y; jj++){
+                //                                              for(size_t kk=0; kk<current_blockcount_z; kk++){
+                //                                                      type_ = type[index];
+                //                                                      if(type_ == intvRadius){
+                //                                                              *block_data_pos = mean;
+                //                                                      }
+                //                                                      else if(type_ == 0){
+                //                                                              *block_data_pos = unpred_data[unpredictable_count ++];
+                //                                                      }
+                //                                                      else{
+                //                                                              d000 = d001 = d010 = d011 = d100 = d101 = d110 = 1;
+                //                                                              if(i == 0 && ii == 0){
+                //                                                                      d000 = d001 = d010 = d011 = 0;
+                //                                                              }
+                //                                                              if(j == 0 && jj == 0){
+                //                                                                      d000 = d001 = d100 = d101 = 0;
+                //                                                              }
+                //                                                              if(k == 0 && kk == 0){
+                //                                                                      d000 = d010 = d100 = d110 = 0;
+                //                                                              }
+                //                                                              if(d000){
+                //                                                                      d000 = block_data_pos[- dim0_offset - dim1_offset - 1];
+                //                                                              }
+                //                                                              if(d001){
+                //                                                                      d001 = block_data_pos[- dim0_offset - dim1_offset];
+                //                                                              }
+                //                                                              if(d010){
+                //                                                                      d010 = block_data_pos[- dim0_offset - 1];
+                //                                                              }
+                //                                                              if(d011){
+                //                                                                      d011 = block_data_pos[- dim0_offset];
+                //                                                              }
+                //                                                              if(d100){
+                //                                                                      d100 = block_data_pos[- dim1_offset - 1];
+                //                                                              }
+                //                                                              if(d101){
+                //                                                                      d101 = block_data_pos[- dim1_offset];
+                //                                                              }
+                //                                                              if(d110){
+                //                                                                      d110 = block_data_pos[- 1];
+                //                                                              }
+                //                                                              if(type_ < intvRadius) type_ += 1;
+                //                                                              pred = d110 + d101 + d011 - d100 - d010 - d001 + d000;
+                //                                                              *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                //                                                      }
+                //                                                      index ++;
+                //                                                      block_data_pos ++;
+                //                                              }
+                //                                              block_data_pos += dim1_offset - current_blockcount_z;
+                //                                      }
+                //                                      block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                //                              }
+                //                              cur_unpred_count = unpredictable_count;
+                //                      }
+                //                      else{
+                //                              // decompress by regression
+                //                              {
+                //                                      //restore regression coefficients
+                //                                      float pred;
+                //                                      int type_;
+                //                                      for(int e=0; e<4; e++){
+                //                                              // if(i == 0 && j == 0 && k == 19){
+                //                                              //      printf("~\n");
+                //                                              // }
+                //                                              type_ = coeff_type[e][coeff_index];
+                //                                              if (type_ != 0){
+                //                                                      pred = last_coefficients[e];
+                //                                                      last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                //                                              }
+                //                                              else{
+                //                                                      last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                //                                                      coeff_unpred_data_count[e] ++;
+                //                                              }
+                //                                              if(fabs(last_coefficients[e]) > 10000){
+                //                                                      printf("%d %d %d-%d: pred %.4f type %d precision %.4g last_coefficients %.4g\n", i, j, k, e, pred, type_, precision[e], last_coefficients[e]);
+                //                                                      exit(0);
+                //                                              }
+                //                                      }
+                //                                      coeff_index ++;
+                //                              }
+                //                              {
+                //                                      float * block_data_pos = data_pos;
+                //                                      float pred;
+                //                                      int type_;
+                //                                      size_t index = 0;
+                //                                      size_t unpredictable_count = 0;
+                //                                      for(size_t ii=0; ii<current_blockcount_x; ii++){
+                //                                              for(size_t jj=0; jj<current_blockcount_y; jj++){
+                //                                                      for(size_t kk=0; kk<current_blockcount_z; kk++){
+                //                                                              if(block_data_pos - (*data) == 19470788){
+                //                                                                      printf("dec stop\n");
+                //                                                              }
+                //                                                              type_ = type[index];
+                //                                                              if (type_ != 0){
+                //                                                                      pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                //                                                                      *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                //                                                              }
+                //                                                              else{
+                //                                                                      *block_data_pos = unpred_data[unpredictable_count ++];
+                //                                                              }
+                //                                                              index ++;
+                //                                                              block_data_pos ++;
+                //                                                      }
+                //                                                      block_data_pos += dim1_offset - current_blockcount_z;
+                //                                              }
+                //                                              block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                //                                      }
+                //                                      cur_unpred_count = unpredictable_count;
+                //                              }
+                //                      }
+                //                      type += current_block_elements;
+                //                      indicator_pos ++;
+                //                      unpred_data += cur_unpred_count;
+                //                      // decomp_unpred += cur_unpred_count;
+                //                      // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data);
+                //                      // fflush(stdout);
+                //              }
+                //      }
+                // }
+                type = result_type;
+                // i == 0
+                {
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        data_pos = *data;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = 0;
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                // i == 0 j == 0 k != 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j==0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_y * dim1_offset;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                } // end i==0
+                for(size_t i=1; i<num_x; i++){
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        data_pos = *data + offset_x * dim0_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j = 0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == intvRadius){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                if(type_ < intvRadius) type_ += 1;
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                }
+        }
+        else{
+                type = result_type;
+                // i == 0
+                {
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        data_pos = *data;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = 0;
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                // i == 0 j == 0 k != 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j==0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_y * dim1_offset;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = early_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                // ii == 0
+                                                {
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] - block_data_pos[- dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                for(size_t ii=1; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                } // end i==0
+                for(size_t i=1; i<num_x; i++){
+                        // j == 0
+                        {
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        data_pos = *data + offset_x * dim0_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim0_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = early_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        // jj == 0
+                                                        {
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        for(size_t jj=1; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }// end j = 0
+                        for(size_t j=1; j<num_y; j++){
+                                // k == 0
+                                {
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = early_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                {
+                                                                        // kk == 0
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim0_offset - dim1_offset];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                for(size_t kk=1; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                } // end k == 0
+                                for(size_t k=1; k<num_z; k++){
+                                        offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
+                                        offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
+                                        offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
+                                        data_pos = *data + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
+                                        current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
+                                        current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
+                                        current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
+                                        size_t current_block_elements = current_blockcount_x * current_blockcount_y * current_blockcount_z;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                float * block_data_pos = data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                        for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[- 1] + block_data_pos[- dim1_offset] + block_data_pos[- dim0_offset] - block_data_pos[- dim1_offset - 1] - block_data_pos[- dim0_offset - 1] - block_data_pos[- dim0_offset - dim1_offset] + block_data_pos[- dim0_offset - dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                        block_data_pos ++;
+                                                                }
+                                                                block_data_pos += dim1_offset - current_blockcount_z;
+                                                        }
+                                                        block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float * block_data_pos = data_pos;
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<current_blockcount_x; ii++){
+                                                                for(size_t jj=0; jj<current_blockcount_y; jj++){
+                                                                        for(size_t kk=0; kk<current_blockcount_z; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                                block_data_pos ++;
+                                                                        }
+                                                                        block_data_pos += dim1_offset - current_blockcount_z;
+                                                                }
+                                                                block_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        type += current_block_elements;
+                                        unpred_data += cur_unpred_count;
+                                }
+                        }
+                }
+        }
+#ifdef HAVE_TIMECMPR
+        if(confparams_dec->szMode == SZ_TEMPORAL_COMPRESSION)
+                memcpy(multisteps->hist_data, (*data), num_elements*sizeof(float));
+#endif
+        free(coeff_result_type);
+        free(indicator);
+        free(result_type);
+}
+void decompressDataSeries_float_3D_random_access_with_blocked_regression(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){
+        size_t dim0_offset = r2 * r3;
+        size_t dim1_offset = r3;
+        size_t num_elements = r1 * r2 * r3;
+        *data = (float*)malloc(sizeof(float)*num_elements);
+        unsigned char * comp_data_pos = comp_data;
+        size_t block_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        // calculate block dims
+        size_t num_x, num_y, num_z;
+        num_x = (r1 - 1) / block_size + 1;
+        num_y = (r2 - 1) / block_size + 1;
+        num_z = (r3 - 1) / block_size + 1;
+        size_t max_num_block_elements = block_size * block_size * block_size;
+        size_t num_blocks = num_x * num_y * num_z;
+        double realPrecision = bytesToDouble(comp_data_pos);
+        comp_data_pos += sizeof(double);
+        unsigned int intervals = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        updateQuantizationInfo(intervals);
+        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+        comp_data_pos += sizeof(int);
+        int stateNum = 2*intervals;
+        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,comp_data_pos+sizeof(int), nodeCount);
+        comp_data_pos += sizeof(int) + tree_size;
+        float mean;
+        unsigned char use_mean;
+        memcpy(&use_mean, comp_data_pos, sizeof(unsigned char));
+        comp_data_pos += sizeof(unsigned char);
+        memcpy(&mean, comp_data_pos, sizeof(float));
+        comp_data_pos += sizeof(float);
+        size_t reg_count = 0;
+        unsigned char * indicator;
+        size_t indicator_bitlength = (num_blocks - 1)/8 + 1;
+        convertByteArray2IntArray_fast_1b(num_blocks, comp_data_pos, indicator_bitlength, &indicator);
+        comp_data_pos += indicator_bitlength;
+        for(size_t i=0; i<num_blocks; i++){
+                if(!indicator[i]) reg_count ++;
+        }
+        int coeff_intvRadius[4];
+        int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int));
+        int * coeff_type[4];
+        double precision[4];
+        float * coeff_unpred_data[4];
+        if(reg_count > 0){
+                for(int i=0; i<4; i++){
+                        precision[i] = bytesToDouble(comp_data_pos);
+                        comp_data_pos += sizeof(double);
+                        coeff_intvRadius[i] = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        int stateNum = 2*coeff_intvRadius[i]*2;
+                        HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+                        int nodeCount = bytesToInt_bigEndian(comp_data_pos);
+                        node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+sizeof(int), nodeCount);
+                        comp_data_pos += sizeof(int) + tree_size;
+                        coeff_type[i] = coeff_result_type + i * num_blocks;
+                        size_t typeArray_size = bytesToSize(comp_data_pos);
+                        decode(comp_data_pos + sizeof(size_t), reg_count, root, coeff_type[i]);
+                        comp_data_pos += sizeof(size_t) + typeArray_size;
+                        int coeff_unpred_count = bytesToInt_bigEndian(comp_data_pos);
+                        comp_data_pos += sizeof(int);
+                        coeff_unpred_data[i] = (float *) comp_data_pos;
+                        comp_data_pos += coeff_unpred_count * sizeof(float);
+                        SZ_ReleaseHuffman(huffmanTree);
+                }
+        }
+        float last_coefficients[4] = {0.0};
+        int coeff_unpred_data_count[4] = {0};
+        int coeff_index = 0;
+        updateQuantizationInfo(intervals);
+        size_t total_unpred;
+        memcpy(&total_unpred, comp_data_pos, sizeof(size_t));
+        comp_data_pos += sizeof(size_t);
+        float * unpred_data = (float *) comp_data_pos;
+        comp_data_pos += total_unpred * sizeof(float);
+        int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int));
+        decode(comp_data_pos, num_blocks*max_num_block_elements, root, result_type);
+        SZ_ReleaseHuffman(huffmanTree);
+        int intvRadius = exe_params->intvRadius;
+        int * type;
+        float * data_pos = *data;
+        size_t cur_unpred_count;
+        unsigned char * indicator_pos = indicator;
+        int dec_buffer_size = block_size + 1;
+        float * dec_buffer = (float *) malloc(dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float));
+        memset(dec_buffer, 0, dec_buffer_size*dec_buffer_size*dec_buffer_size*sizeof(float));
+        float * block_data_pos_x = NULL;
+        float * block_data_pos_y = NULL;
+        float * block_data_pos_z = NULL;
+        int block_dim0_offset = dec_buffer_size*dec_buffer_size;
+        int block_dim1_offset = dec_buffer_size;
+        if(use_mean){
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                for(size_t k=0; k<num_z; k++){
+                                        data_pos = dec_buffer + dec_buffer_size*dec_buffer_size + dec_buffer_size + 1;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data);
+                                                float * block_data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<block_size; ii++){
+                                                        for(size_t jj=0; jj<block_size; jj++){
+                                                                for(size_t kk=0; kk<block_size; kk++){
+                                                                        block_data_pos = data_pos + ii*block_dim0_offset + jj*block_dim1_offset + kk;
+                                                                        type_ = type[index];
+                                                                        if(type_ == 1){
+                                                                                *block_data_pos = mean;
+                                                                        }
+                                                                        else if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[-1] + block_data_pos[-block_dim1_offset]+ block_data_pos[-block_dim0_offset] - block_data_pos[-block_dim1_offset - 1]
+                                                                                                 - block_data_pos[-block_dim0_offset - 1] - block_data_pos[-block_dim0_offset - block_dim1_offset] + block_data_pos[-block_dim0_offset - block_dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                }
+                                                        }
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                // if(i == 0 && j == 0 && k == 19){
+                                                                //      printf("~\n");
+                                                                // }
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<block_size; ii++){
+                                                                for(size_t jj=0; jj<block_size; jj++){
+                                                                        for(size_t kk=0; kk<block_size; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                        }
+                                                                }
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        unpred_data += cur_unpred_count;
+                                        // decomp_unpred += cur_unpred_count;
+                                        // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data);
+                                        // fflush(stdout);
+                                        type += block_size * block_size * block_size;
+                                        // mv data back
+                                        block_data_pos_x = *data + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size;
+                                        for(int ii=0; ii<block_size; ii++){
+                                                if(i*block_size + ii >= r1) break;
+                                                block_data_pos_y = block_data_pos_x;
+                                                for(int jj=0; jj<block_size; jj++){
+                                                        if(j*block_size + jj >= r2) break;
+                                                        block_data_pos_z = block_data_pos_y;
+                                                        for(int kk=0; kk<block_size; kk++){
+                                                                if(k*block_size + kk >= r3) break;
+                                                                *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk];
+                                                                block_data_pos_z ++;
+                                                        }
+                                                        block_data_pos_y += dim1_offset;
+                                                }
+                                                block_data_pos_x += dim0_offset;
+                                        }
+                                }
+                        }
+                }
+        }
+        else{
+                type = result_type;
+                for(size_t i=0; i<num_x; i++){
+                        for(size_t j=0; j<num_y; j++){
+                                for(size_t k=0; k<num_z; k++){
+                                        data_pos = dec_buffer + dec_buffer_size*dec_buffer_size + dec_buffer_size + 1;
+                                        if(*indicator_pos){
+                                                // decompress by SZ
+                                                // cur_unpred_count = decompressDataSeries_float_3D_blocked_nonblock_pred(data_pos, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, i, j, k, realPrecision, type, unpred_data);
+                                                float * block_data_pos;
+                                                float pred;
+                                                size_t index = 0;
+                                                int type_;
+                                                size_t unpredictable_count = 0;
+                                                for(size_t ii=0; ii<block_size; ii++){
+                                                        for(size_t jj=0; jj<block_size; jj++){
+                                                                for(size_t kk=0; kk<block_size; kk++){
+                                                                        block_data_pos = data_pos + ii*block_dim0_offset + jj*block_dim1_offset + kk;
+                                                                        type_ = type[index];
+                                                                        if(type_ == 0){
+                                                                                *block_data_pos = unpred_data[unpredictable_count ++];
+                                                                        }
+                                                                        else{
+                                                                                pred = block_data_pos[-1] + block_data_pos[-block_dim1_offset]+ block_data_pos[-block_dim0_offset] - block_data_pos[-block_dim1_offset - 1]
+                                                                                                 - block_data_pos[-block_dim0_offset - 1] - block_data_pos[-block_dim0_offset - block_dim1_offset] + block_data_pos[-block_dim0_offset - block_dim1_offset - 1];
+                                                                                *block_data_pos = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                        }
+                                                                        index ++;
+                                                                }
+                                                        }
+                                                }
+                                                cur_unpred_count = unpredictable_count;
+                                        }
+                                        else{
+                                                // decompress by regression
+                                                {
+                                                        //restore regression coefficients
+                                                        float pred;
+                                                        int type_;
+                                                        for(int e=0; e<4; e++){
+                                                                // if(i == 0 && j == 0 && k == 19){
+                                                                //      printf("~\n");
+                                                                // }
+                                                                type_ = coeff_type[e][coeff_index];
+                                                                if (type_ != 0){
+                                                                        pred = last_coefficients[e];
+                                                                        last_coefficients[e] = pred + 2 * (type_ - coeff_intvRadius[e]) * precision[e];
+                                                                }
+                                                                else{
+                                                                        last_coefficients[e] = coeff_unpred_data[e][coeff_unpred_data_count[e]];
+                                                                        coeff_unpred_data_count[e] ++;
+                                                                }
+                                                        }
+                                                        coeff_index ++;
+                                                }
+                                                {
+                                                        float pred;
+                                                        int type_;
+                                                        size_t index = 0;
+                                                        size_t unpredictable_count = 0;
+                                                        for(size_t ii=0; ii<block_size; ii++){
+                                                                for(size_t jj=0; jj<block_size; jj++){
+                                                                        for(size_t kk=0; kk<block_size; kk++){
+                                                                                type_ = type[index];
+                                                                                if (type_ != 0){
+                                                                                        pred = last_coefficients[0] * ii + last_coefficients[1] * jj + last_coefficients[2] * kk + last_coefficients[3];
+                                                                                        data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = pred + 2 * (type_ - intvRadius) * realPrecision;
+                                                                                }
+                                                                                else{
+                                                                                        data_pos[ii*block_dim0_offset + jj*block_dim1_offset + kk] = unpred_data[unpredictable_count ++];
+                                                                                }
+                                                                                index ++;
+                                                                        }
+                                                                }
+                                                        }
+                                                        cur_unpred_count = unpredictable_count;
+                                                }
+                                        }
+                                        indicator_pos ++;
+                                        unpred_data += cur_unpred_count;
+                                        // decomp_unpred += cur_unpred_count;
+                                        // printf("block comp done, data_offset from %ld to %ld: diff %ld\n", *data, data_pos, data_pos - *data);
+                                        // fflush(stdout);
+                                        type += block_size * block_size * block_size;
+                                        // mv data back
+                                        block_data_pos_x = *data + i*block_size * dim0_offset + j*block_size * dim1_offset + k*block_size;
+                                        for(int ii=0; ii<block_size; ii++){
+                                                if(i*block_size + ii >= r1) break;
+                                                block_data_pos_y = block_data_pos_x;
+                                                for(int jj=0; jj<block_size; jj++){
+                                                        if(j*block_size + jj >= r2) break;
+                                                        block_data_pos_z = block_data_pos_y;
+                                                        for(int kk=0; kk<block_size; kk++){
+                                                                if(k*block_size + kk >= r3) break;
+                                                                *block_data_pos_z = data_pos[ii*dec_buffer_size*dec_buffer_size + jj*dec_buffer_size + kk];
+                                                                block_data_pos_z ++;
+                                                        }
+                                                        block_data_pos_y += dim1_offset;
+                                                }
+                                                block_data_pos_x += dim0_offset;
+                                        }
+                                }
+                        }
+                }
+        }
+        free(dec_buffer);
+        free(coeff_result_type);
+        free(indicator);
+        free(result_type);
+}

TabularUnified thirdparty/SZ/sz/src/szd_float_pwr.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "Huffman.h"
 #include "sz_float_pwr.h"
+#include "utility.h"
 //#include "rw.h"
 //
 …
         free(groupID);
+}
+void decompressDataSeries_float_1D_pwr_pre_log(float** data, size_t dataSeriesLength, TightDataPointStorageF* tdps) {
+        decompressDataSeries_float_1D(data, dataSeriesLength, tdps);
+        float threshold = tdps->minLogValue;
+        if(tdps->pwrErrBoundBytes_size > 0){
+                unsigned char * signs;
+                sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength);
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                        if(signs[i]) (*data)[i] = -((*data)[i]);
+                }
+                free(signs);
+        }
+        else{
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                }
+        }
+}
+void decompressDataSeries_float_2D_pwr_pre_log(float** data, size_t r1, size_t r2, TightDataPointStorageF* tdps) {
+        size_t dataSeriesLength = r1 * r2;
+        decompressDataSeries_float_2D(data, r1, r2, tdps);
+        float threshold = tdps->minLogValue;
+        if(tdps->pwrErrBoundBytes_size > 0){
+                unsigned char * signs;
+                sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength);
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                        if(signs[i]) (*data)[i] = -((*data)[i]);
+                }
+                free(signs);
+        }
+        else{
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                }
+        }
+}
+void decompressDataSeries_float_3D_pwr_pre_log(float** data, size_t r1, size_t r2, size_t r3, TightDataPointStorageF* tdps) {
+        size_t dataSeriesLength = r1 * r2 * r3;
+        decompressDataSeries_float_3D(data, r1, r2, r3, tdps);
+        float threshold = tdps->minLogValue;
+        if(tdps->pwrErrBoundBytes_size > 0){
+                unsigned char * signs;
+                sz_lossless_decompress(confparams_dec->losslessCompressor, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength);
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                        if(signs[i]) (*data)[i] = -((*data)[i]);
+                }
+                free(signs);
+        }
+        else{
+                for(size_t i=0; i<dataSeriesLength; i++){
+                        if((*data)[i] < threshold) (*data)[i] = 0;
+                        else (*data)[i] = exp2((*data)[i]);
+                }
+        }
+}
 #pragma GCC diagnostic pop

TabularUnified thirdparty/SZ/sz/src/szd_int16.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_int16.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         unsigned char* szTmpBytes;
                 if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+        if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength)
+        {
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szd_int32.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_int32.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szd_int64.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_int64.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szd_int8.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_int8.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szd_uint16.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_uint16.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=4+2+4+MetaDataByteLength && cmpSize!=4+2+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szd_uint32.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_uint32.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=4+4+4+MetaDataByteLength && cmpSize!=4+4+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szd_uint64.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_uint64.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=4+8+4+MetaDataByteLength && cmpSize!=4+8+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szd_uint8.c ¶

-                      r2c47b73
+                      r9ee2ce3
 #include "szd_uint8.h"
 #include "Huffman.h"
+#include "utility.h"
 /**
 …
         if(cmpSize!=4+1+4+MetaDataByteLength && cmpSize!=4+1+8+MetaDataByteLength)
+        {
                 int isZlib = isZlibFormat(cmpBytes[0], cmpBytes[1]);
                 if(isZlib)
+                confparams_dec->losslessCompressor = is_lossless_compressed_data(cmpBytes, cmpSize);
+                if(confparams_dec->losslessCompressor!=-1)
                         confparams_dec->szMode = SZ_BEST_COMPRESSION;
                 else
 …
                         if(targetUncompressSize<MIN_ZLIB_DEC_ALLOMEM_BYTES) //Considering the minimum size
                                 targetUncompressSize = MIN_ZLIB_DEC_ALLOMEM_BYTES;
                         tmpSize = zlib_uncompress5(cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//         (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
+                        tmpSize = sz_lossless_decompress(confparams_dec->losslessCompressor, cmpBytes, (unsigned long)cmpSize, &szTmpBytes, (unsigned long)targetUncompressSize+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE);//               (unsigned long)targetUncompressSize+8: consider the total length under lossless compression mode is actually 3+4+1+targetUncompressSize
                         //szTmpBytes = (unsigned char*)malloc(sizeof(unsigned char)*tmpSize);
                         //memcpy(szTmpBytes, tmpBytes, tmpSize);

TabularUnified thirdparty/SZ/sz/src/szf.c ¶

-                      r2c47b73
+                      r9ee2ce3
 void sz_compress_d1_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, 0, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d2_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d3_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, *r3, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d4_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, *r4, *r3, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d5_float_args_(float* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, *r5, *r4, *r3, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_FLOAT, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d1_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, 0, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d2_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, 0, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d3_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, 0, *r3, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d4_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, 0, *r4, *r3, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
 void sz_compress_d5_double_args_(double* data, unsigned char *bytes, size_t *outSize, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5)
+{
         unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 1, *r5, *r4, *r3, *r2, *r1);
+        unsigned char *tmp_bytes = SZ_compress_args(SZ_DOUBLE, data, outSize, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1);
         memcpy(bytes, tmp_bytes, *outSize);
         free(tmp_bytes);
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, 0, *r1);
+        SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1);
+}
 void sz_batchaddvar_d2_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1);
+}
 void sz_batchaddvar_d3_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, *r3, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1);
+}
 void sz_batchaddvar_d4_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, *r4, *r3, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1);
+}
 void sz_batchaddvar_d5_float_(char* varName, int *len, float* data, int *errBoundMode, float *absErrBound, float *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_FLOAT, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1);
+}
 void sz_batchaddvar_d1_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, 0, *r1);
+        SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, 0, *r1);
+}
 void sz_batchaddvar_d2_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, 0, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, 0, *r2, *r1);
+}
 void sz_batchaddvar_d3_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, 0, *r3, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, 0, *r3, *r2, *r1);
+}
 void sz_batchaddvar_d4_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0, *r4, *r3, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, 0, *r4, *r3, *r2, *r1);
+}
 void sz_batchaddvar_d5_double_(char* varName, int *len, double* data, int *errBoundMode, double *absErrBound, double *relBoundRatio, size_t *r1, size_t *r2, size_t *r3, size_t *r4, size_t *r5)
 …
         s2[i]=varName[i];
     s2[*len]='\0';
         SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, *r5, *r4, *r3, *r2, *r1);
+        SZ_batchAddVar(s2, SZ_DOUBLE, data, *errBoundMode, *absErrBound, *relBoundRatio, 0.1, *r5, *r4, *r3, *r2, *r1);
+}
 void sz_batchdelvar_c_(char* varName, int *len, int *errState)
 …
         *errState = SZ_batchDelVar(s2);
+}
+/*@deprecated*/
 void sz_batch_compress_c_(unsigned char* bytes, size_t *outSize)
+{
+        unsigned char* tmp_bytes = SZ_batch_compress(outSize);
+        memcpy(bytes, tmp_bytes, *outSize);
+        free(tmp_bytes);
+}
+        //unsigned char* tmp_bytes = SZ_batch_compress(outSize);
+        //memcpy(bytes, tmp_bytes, *outSize);
+        //free(tmp_bytes);
+}
+/*@deprecated*/
 void sz_batch_decompress_c_(unsigned char* bytes, size_t *byteLength, int *ierr)
+{
         SZ_batch_decompress(bytes, *byteLength, ierr);
+        //SZ_batch_decompress(bytes, *byteLength, ierr);
+}

Note: See TracChangeset for help on using the changeset viewer.