[2c47b73] | 1 | #ifndef PASTRIF_H |
---|
| 2 | #define PASTRIF_H |
---|
| 3 | |
---|
| 4 | static inline int64_t pastri_float_quantize(float x, float binSize){ |
---|
| 5 | //Add or sub 0.5, depending on the sign: |
---|
| 6 | x=x/binSize; |
---|
| 7 | |
---|
| 8 | u_UI64I64D u1,half; |
---|
| 9 | u1.d=x; |
---|
| 10 | |
---|
| 11 | half.d=0.5; |
---|
| 12 | |
---|
| 13 | //printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); |
---|
| 14 | //printf("sign(x):0x%lx\n", x); |
---|
| 15 | //printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); |
---|
| 16 | half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); |
---|
| 17 | //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); |
---|
| 18 | return (int64_t)(x + half.d); |
---|
| 19 | } |
---|
| 20 | |
---|
| 21 | static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){ |
---|
| 22 | //Find the pattern. |
---|
| 23 | //First, find the extremum point: |
---|
| 24 | float absExt=0; //Absolute value of Extremum |
---|
| 25 | int extIdx=-1; //Index of Extremum |
---|
| 26 | bp->nonZeros=0; |
---|
| 27 | int i,sb; |
---|
| 28 | for(i=0;i<p->bSize;i++){ |
---|
| 29 | //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG |
---|
| 30 | if(abs_FastD(data[i])>p->usedEb){ |
---|
| 31 | bp->nonZeros++; |
---|
| 32 | //if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG |
---|
| 33 | } |
---|
| 34 | if(abs_FastD(data[i])>absExt){ |
---|
| 35 | absExt=abs_FastD(data[i]); |
---|
| 36 | extIdx=i; |
---|
| 37 | } |
---|
| 38 | } |
---|
| 39 | int patternIdx; //Starting Index of Pattern |
---|
| 40 | patternIdx=(extIdx/p->sbSize)*p->sbSize; |
---|
| 41 | |
---|
| 42 | float patternExt=data[extIdx]; |
---|
| 43 | bp->binSize=2*p->usedEb; |
---|
| 44 | |
---|
| 45 | //if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG |
---|
| 46 | //if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG |
---|
| 47 | |
---|
| 48 | //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG |
---|
| 49 | |
---|
| 50 | //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! |
---|
| 51 | |
---|
| 52 | |
---|
| 53 | for(i=0;i<p->sbSize;i++){ |
---|
| 54 | patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize); |
---|
| 55 | if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} |
---|
| 56 | } |
---|
| 57 | |
---|
| 58 | bp->patternBits=bitsNeeded_float((abs_FastD(patternExt)/bp->binSize)+1)+1; |
---|
| 59 | bp->scaleBits=bp->patternBits; |
---|
| 60 | bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1); |
---|
| 61 | //if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG |
---|
| 62 | //if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG |
---|
| 63 | if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG |
---|
| 64 | |
---|
| 65 | //Calculate Scales. |
---|
| 66 | //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values. |
---|
| 67 | int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum. |
---|
| 68 | //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! |
---|
| 69 | int patternExtZero=(patternExt==0); |
---|
| 70 | //if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG |
---|
| 71 | for(sb=0;sb<p->sbNum;sb++){ |
---|
| 72 | //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; |
---|
| 73 | //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt; |
---|
| 74 | //assert(scales[sb]<=1); |
---|
| 75 | scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); |
---|
| 76 | if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} |
---|
| 77 | } |
---|
| 78 | //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG |
---|
| 79 | |
---|
| 80 | //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. |
---|
| 81 | |
---|
| 82 | //uint64_t wVal; |
---|
| 83 | bp->ECQExt=0; |
---|
| 84 | int _1DIdx; |
---|
| 85 | bp->ECQ1s=0; |
---|
| 86 | bp->ECQOthers=0; |
---|
| 87 | float PS_binSize=bp->scalesBinSize*bp->binSize; |
---|
| 88 | for(sb=0;sb<p->sbNum;sb++){ |
---|
| 89 | for(i=0;i<p->sbSize;i++){ |
---|
| 90 | _1DIdx=sb*p->sbSize+i; |
---|
| 91 | ECQ[_1DIdx]=pastri_float_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize ); |
---|
| 92 | float absECQ=abs_FastD(ECQ[_1DIdx]); |
---|
| 93 | if(absECQ > bp->ECQExt) |
---|
| 94 | bp->ECQExt=absECQ; |
---|
| 95 | //if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG |
---|
| 96 | switch (ECQ[_1DIdx]){ |
---|
| 97 | case 0: |
---|
| 98 | //ECQ0s++; //Currently not needed |
---|
| 99 | break; |
---|
| 100 | case 1: |
---|
| 101 | bp->ECQ1s++; |
---|
| 102 | break; |
---|
| 103 | case -1: |
---|
| 104 | bp->ECQ1s++; |
---|
| 105 | break; |
---|
| 106 | default: |
---|
| 107 | bp->ECQOthers++; |
---|
| 108 | break; |
---|
| 109 | } |
---|
| 110 | } |
---|
| 111 | } |
---|
| 112 | |
---|
| 113 | /* |
---|
| 114 | //DEBUG: Self-check. Remove this later. |
---|
| 115 | for(sb=0;sb<p->sbNum;sb++){ |
---|
| 116 | for(i=0;i<p->sbSize;i++){ |
---|
| 117 | _1DIdx=sb*p->sbSize+i; |
---|
| 118 | float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; |
---|
| 119 | if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ |
---|
| 120 | printf("p->usedEb=%.6e\n",p->usedEb); |
---|
| 121 | printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); |
---|
| 122 | assert(0); |
---|
| 123 | } |
---|
| 124 | } |
---|
| 125 | } |
---|
| 126 | */ |
---|
| 127 | } |
---|
| 128 | |
---|
| 129 | static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){ |
---|
| 130 | bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1; |
---|
| 131 | bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); |
---|
| 132 | //(*numOutBytes)=0; |
---|
| 133 | |
---|
| 134 | int i; |
---|
| 135 | |
---|
| 136 | //Encode: 3 options: |
---|
| 137 | //Compressed, Sparse ECQ |
---|
| 138 | //Compressed, Non-Sparse ECQ |
---|
| 139 | //Uncompressed, Sparse Data |
---|
| 140 | //Uncompressed, Non-spsarse Data |
---|
| 141 | |
---|
| 142 | unsigned int UCSparseBits; //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data} |
---|
| 143 | unsigned int UCNonSparseBits; //Uncompressed, NonSparse bits. Includes: mode, data |
---|
| 144 | unsigned int CSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} |
---|
| 145 | unsigned int CNonSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ} |
---|
| 146 | //int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier! |
---|
| 147 | |
---|
| 148 | //Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2} |
---|
| 149 | //ECQ0s is actually not needed, but others are needed. |
---|
| 150 | |
---|
| 151 | UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16); //64 bits for 4 indexes, 64 bit for data. |
---|
| 152 | UCNonSparseBits = p->dataSize*(1 + p->bSize*8); |
---|
| 153 | bp->numOutliers=bp->ECQ1s+bp->ECQOthers; |
---|
| 154 | if(bp->ECQBits==2){ |
---|
| 155 | CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits); |
---|
| 156 | CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ; //Or: ECQ0s+ECQ1s*2; |
---|
| 157 | }else{ //ECQBits>2 |
---|
| 158 | CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits); |
---|
| 159 | //CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits); |
---|
| 160 | CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits); |
---|
| 161 | } |
---|
| 162 | |
---|
| 163 | int UCSparseBytes=(UCSparseBits+7)/8; |
---|
| 164 | int UCNonSparseBytes=(UCNonSparseBits+7)/8; |
---|
| 165 | int CSparseBytes=(CSparseBits+7)/8; |
---|
| 166 | int CNonSparseBytes=(CNonSparseBits+7)/8; |
---|
| 167 | uint64_t bitPos=0; |
---|
| 168 | uint64_t bytePos=0; |
---|
| 169 | int i0,i1,i2,i3; |
---|
| 170 | int _1DIdx; |
---|
| 171 | |
---|
| 172 | //*(uint16_t*)(&outBuf[1])=p->idxOffset[0]; |
---|
| 173 | //*(uint16_t*)(&outBuf[3])=p->idxOffset[1]; |
---|
| 174 | //*(uint16_t*)(&outBuf[5])=p->idxOffset[2]; |
---|
| 175 | //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; |
---|
| 176 | |
---|
| 177 | if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG |
---|
| 178 | if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG |
---|
| 179 | |
---|
| 180 | //**************************************************************************************** |
---|
| 181 | //if(0){ //DEBUG |
---|
| 182 | //W:UCSparse |
---|
| 183 | if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){ |
---|
| 184 | //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data |
---|
| 185 | *numOutBytes=UCSparseBytes; |
---|
| 186 | if(D_G){printf("UCSparse\n");} //DEBUG |
---|
| 187 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
| 188 | outBuf[0]=0; //mode |
---|
| 189 | |
---|
| 190 | //*(uint16_t*)(&outBuf[9])=nonZeros; |
---|
| 191 | //bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11. |
---|
| 192 | *(uint16_t*)(&outBuf[1])=bp->nonZeros; |
---|
| 193 | bytePos=3;//0:mode, 2-3:NonZeros. So start from 3. |
---|
| 194 | |
---|
| 195 | for(i0=0;i0<p->idxRange[0];i0++) |
---|
| 196 | for(i1=0;i1<p->idxRange[1];i1++) |
---|
| 197 | for(i2=0;i2<p->idxRange[2];i2++) |
---|
| 198 | for(i3=0;i3<p->idxRange[3];i3++){ |
---|
| 199 | _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; |
---|
| 200 | if(abs_FastD(data[_1DIdx])>p->usedEb){ |
---|
| 201 | //*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0]; |
---|
| 202 | *(uint16_t*)(&outBuf[bytePos])=i0; |
---|
| 203 | bytePos+=2; |
---|
| 204 | //*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1]; |
---|
| 205 | *(uint16_t*)(&outBuf[bytePos])=i1; |
---|
| 206 | bytePos+=2; |
---|
| 207 | //*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2]; |
---|
| 208 | *(uint16_t*)(&outBuf[bytePos])=i2; |
---|
| 209 | bytePos+=2; |
---|
| 210 | //*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3]; |
---|
| 211 | *(uint16_t*)(&outBuf[bytePos])=i3; |
---|
| 212 | bytePos+=2; |
---|
| 213 | |
---|
| 214 | *(float*)(&outBuf[bytePos])=data[_1DIdx]; |
---|
| 215 | bytePos+=p->dataSize; |
---|
| 216 | } |
---|
| 217 | } |
---|
| 218 | |
---|
| 219 | if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG |
---|
| 220 | |
---|
| 221 | //**************************************************************************************** |
---|
| 222 | //}else if(0){ //DEBUG |
---|
| 223 | //W:UCNonSparse |
---|
| 224 | }else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){ |
---|
| 225 | //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data |
---|
| 226 | *numOutBytes=UCNonSparseBytes; |
---|
| 227 | if(D_G){printf("UCNonSparse\n");} //DEBUG |
---|
| 228 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
| 229 | outBuf[0]=1; //mode |
---|
| 230 | |
---|
| 231 | //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9); |
---|
| 232 | memcpy(&outBuf[1], data, p->bSize*p->dataSize); |
---|
| 233 | |
---|
| 234 | if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG |
---|
| 235 | /* |
---|
| 236 | for(i=0;i<UCNonSparseBytes-17;i++){ |
---|
| 237 | printf("%d ",inBuf[p->bSize*8+i]); |
---|
| 238 | } |
---|
| 239 | printf("\n"); |
---|
| 240 | for(i=0;i<UCNonSparseBytes-17;i++){ |
---|
| 241 | printf("%d ",outBuf[17+i]); |
---|
| 242 | } |
---|
| 243 | printf("\n"); |
---|
| 244 | */ |
---|
| 245 | //**************************************************************************************** |
---|
| 246 | //}else if(1){ //DEBUG |
---|
| 247 | //W:CSparse |
---|
| 248 | }else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){ |
---|
| 249 | //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} |
---|
| 250 | *numOutBytes=CSparseBytes; |
---|
| 251 | if(D_G){printf("CSparse\n");} //DEBUG |
---|
| 252 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
| 253 | //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG |
---|
| 254 | outBuf[0]=2; //mode |
---|
| 255 | |
---|
| 256 | ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. |
---|
| 257 | //outBuf[13]=patternBits; |
---|
| 258 | //outBuf[14]=ECQBits; |
---|
| 259 | ////Currently, we are at the end of 15th byte. |
---|
| 260 | //*(uint16_t*)(&outBuf[15])=numOutliers; |
---|
| 261 | //bitPos=17*8; //Currently, we are at the end of 17th byte. |
---|
| 262 | |
---|
| 263 | //outBuf bytes [1:4] are reserved for compressedBytes. |
---|
| 264 | outBuf[5]=bp->patternBits; |
---|
| 265 | outBuf[6]=bp->ECQBits; |
---|
| 266 | //Currently, we are at the end of 7th byte. |
---|
| 267 | |
---|
| 268 | *(uint16_t*)(&outBuf[7])=bp->numOutliers; |
---|
| 269 | //Now, we are at the end of 9th byte. |
---|
| 270 | bitPos=9*8; |
---|
| 271 | |
---|
| 272 | //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG |
---|
| 273 | |
---|
| 274 | for(i=0;i<p->sbSize;i++){ |
---|
| 275 | writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point |
---|
| 276 | } |
---|
| 277 | //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG |
---|
| 278 | for(i=0;i<p->sbNum;i++){ |
---|
| 279 | writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale |
---|
| 280 | } |
---|
| 281 | //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG |
---|
| 282 | //if(DEBUG)printf("ECQBits:%d\n",ECQBits); |
---|
| 283 | switch(bp->ECQBits){ |
---|
| 284 | case 2: |
---|
| 285 | for(i=0;i<p->bSize;i++){ |
---|
| 286 | switch(ECQ[i]){ |
---|
| 287 | case 0: |
---|
| 288 | break; |
---|
| 289 | case 1: |
---|
| 290 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG |
---|
| 291 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
| 292 | //writeBits_Fast(outBuf,&bitPos,2,0x10); |
---|
| 293 | //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 |
---|
| 294 | //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 |
---|
| 295 | writeBits_Fast(outBuf,&bitPos,1,0);//0x00 |
---|
| 296 | break; |
---|
| 297 | case -1: |
---|
| 298 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG |
---|
| 299 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
| 300 | //writeBits_Fast(outBuf,&bitPos,2,0x11); |
---|
| 301 | //writeBits_Fast(outBuf,&bitPos,2,1);//0x01 |
---|
| 302 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 303 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
| 304 | break; |
---|
| 305 | default: |
---|
| 306 | assert(0); |
---|
| 307 | break; |
---|
| 308 | } |
---|
| 309 | } |
---|
| 310 | break; |
---|
| 311 | default: //ECQBits>2 |
---|
| 312 | for(i=0;i<p->bSize;i++){ |
---|
| 313 | switch(ECQ[i]){ |
---|
| 314 | case 0: |
---|
| 315 | break; |
---|
| 316 | case 1: |
---|
| 317 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG |
---|
| 318 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
| 319 | //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 |
---|
| 320 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 321 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 322 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 323 | break; |
---|
| 324 | case -1: |
---|
| 325 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG |
---|
| 326 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
| 327 | //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 |
---|
| 328 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 329 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 330 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
| 331 | break; |
---|
| 332 | default: |
---|
| 333 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG |
---|
| 334 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
| 335 | //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]); |
---|
| 336 | //writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits)); |
---|
| 337 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 338 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
| 339 | writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); |
---|
| 340 | break; |
---|
| 341 | } |
---|
| 342 | } |
---|
| 343 | break; |
---|
| 344 | } |
---|
| 345 | |
---|
| 346 | //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG |
---|
| 347 | if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG |
---|
| 348 | |
---|
| 349 | |
---|
| 350 | uint32_t bytePos=(bitPos+7)/8; |
---|
| 351 | //*(uint32_t*)(&outBuf[9])=bytePos; |
---|
| 352 | *(uint32_t*)(&outBuf[1])=bytePos; |
---|
| 353 | |
---|
| 354 | if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG |
---|
| 355 | if(D_G){assert(bitPos==CSparseBits);} |
---|
| 356 | |
---|
| 357 | //**************************************************************************************** |
---|
| 358 | //W:CNonSparse |
---|
| 359 | }else { |
---|
| 360 | //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} |
---|
| 361 | *numOutBytes=CNonSparseBytes; |
---|
| 362 | if(D_G){printf("CNonSparse\n");} //DEBUG |
---|
| 363 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
| 364 | //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG |
---|
| 365 | outBuf[0]=3; //mode |
---|
| 366 | |
---|
| 367 | ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. |
---|
| 368 | //outBuf[13]=patternBits; |
---|
| 369 | //outBuf[14]=ECQBits; |
---|
| 370 | //bitPos=15*8; //Currently, we are at the end of 15th byte. |
---|
| 371 | |
---|
| 372 | //outBuf bytes [1:4] are reserved for compressedBytes. |
---|
| 373 | outBuf[5]=bp->patternBits; |
---|
| 374 | outBuf[6]=bp->ECQBits; |
---|
| 375 | bitPos=7*8; //Currently, we are at the end of 7th byte. |
---|
| 376 | |
---|
| 377 | //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG |
---|
| 378 | |
---|
| 379 | for(i=0;i<p->sbSize;i++){ |
---|
| 380 | writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point |
---|
| 381 | } |
---|
| 382 | //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG |
---|
| 383 | for(i=0;i<p->sbNum;i++){ |
---|
| 384 | writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale |
---|
| 385 | } |
---|
| 386 | //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG |
---|
| 387 | //if(DEBUG)printf("ECQBits:%d\n",ECQBits); |
---|
| 388 | switch(bp->ECQBits){ |
---|
| 389 | case 2: |
---|
| 390 | for(i=0;i<p->bSize;i++){ |
---|
| 391 | switch(ECQ[i]){ |
---|
| 392 | case 0: |
---|
| 393 | //if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG |
---|
| 394 | writeBits_Fast(outBuf,&bitPos,1,1);//0x1 |
---|
| 395 | break; |
---|
| 396 | case 1: |
---|
| 397 | //if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG |
---|
| 398 | //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 |
---|
| 399 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 400 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 401 | break; |
---|
| 402 | case -1: |
---|
| 403 | //if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG |
---|
| 404 | //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 |
---|
| 405 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 406 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
| 407 | break; |
---|
| 408 | default: |
---|
| 409 | assert(0); |
---|
| 410 | break; |
---|
| 411 | } |
---|
| 412 | } |
---|
| 413 | break; |
---|
| 414 | default: //ECQBits>2 |
---|
| 415 | //if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG |
---|
| 416 | for(i=0;i<p->bSize;i++){ |
---|
| 417 | //if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
| 418 | //if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG |
---|
| 419 | //if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG |
---|
| 420 | switch(ECQ[i]){ |
---|
| 421 | case 0: |
---|
| 422 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG |
---|
| 423 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
| 424 | //temp1=bitPos; |
---|
| 425 | writeBits_Fast(outBuf,&bitPos,1,1); //0x1 |
---|
| 426 | //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 |
---|
| 427 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
| 428 | break; |
---|
| 429 | case 1: |
---|
| 430 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG |
---|
| 431 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
| 432 | //temp1=bitPos; |
---|
| 433 | //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 |
---|
| 434 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 435 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 436 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 437 | //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 |
---|
| 438 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
| 439 | break; |
---|
| 440 | case -1: |
---|
| 441 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG |
---|
| 442 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
| 443 | //temp1=bitPos; |
---|
| 444 | //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 |
---|
| 445 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 446 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 447 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
| 448 | //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 |
---|
| 449 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
| 450 | break; |
---|
| 451 | default: |
---|
| 452 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG |
---|
| 453 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
| 454 | //temp1=bitPos; |
---|
| 455 | //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 |
---|
| 456 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
| 457 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
| 458 | //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 |
---|
| 459 | writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); |
---|
| 460 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
| 461 | break; |
---|
| 462 | } |
---|
| 463 | } |
---|
| 464 | break; |
---|
| 465 | } |
---|
| 466 | |
---|
| 467 | //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG |
---|
| 468 | if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG |
---|
| 469 | |
---|
| 470 | |
---|
| 471 | |
---|
| 472 | uint32_t bytePos=(bitPos+7)/8; |
---|
| 473 | //*(uint32_t*)(&outBuf[9])=bytePos; |
---|
| 474 | *(uint32_t*)(&outBuf[1])=bytePos; |
---|
| 475 | |
---|
| 476 | if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG |
---|
| 477 | if(D_G){assert(bitPos==CNonSparseBits);} |
---|
| 478 | |
---|
| 479 | } |
---|
| 480 | //for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG |
---|
| 481 | |
---|
| 482 | } |
---|
| 483 | static inline int pastri_float_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){ |
---|
| 484 | pastri_blockParams bp; |
---|
| 485 | |
---|
| 486 | if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG |
---|
| 487 | if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG |
---|
| 488 | if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG |
---|
| 489 | if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG |
---|
| 490 | |
---|
| 491 | int64_t patternQ[MAX_PS_SIZE]; |
---|
| 492 | int64_t scalesQ[MAX_PS_SIZE]; |
---|
| 493 | int64_t ECQ[MAX_BLOCK_SIZE]; |
---|
| 494 | |
---|
| 495 | float *data; |
---|
| 496 | data=(float*)inBuf; |
---|
| 497 | |
---|
| 498 | //STEP 0: PREPROCESSING: |
---|
| 499 | //This step can include flattening the block, determining the period, etc. |
---|
| 500 | //Currently not needed. |
---|
| 501 | |
---|
| 502 | //STEP 1: PATTERN MATCH |
---|
| 503 | pastri_float_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ); |
---|
| 504 | |
---|
| 505 | //STEP 2: ENCODING(Include QUANTIZE) |
---|
| 506 | pastri_float_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes); |
---|
| 507 | |
---|
| 508 | |
---|
| 509 | return 0; |
---|
| 510 | } |
---|
| 511 | |
---|
| 512 | static inline float pastri_float_InverseQuantization(int64_t q, float binSize){ |
---|
| 513 | return q*binSize; |
---|
| 514 | } |
---|
| 515 | |
---|
| 516 | static inline void pastri_float_PredictData(pastri_params *p,pastri_blockParams *bp,float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ |
---|
| 517 | int j; |
---|
| 518 | float PS_binSize=bp->scalesBinSize*bp->binSize; |
---|
| 519 | for(j=0;j<p->bSize;j++){ |
---|
| 520 | //data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize; |
---|
| 521 | data[j]=pastri_float_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_float_InverseQuantization(ECQ[j],bp->binSize); |
---|
| 522 | } |
---|
| 523 | } |
---|
| 524 | |
---|
| 525 | static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ |
---|
| 526 | int j; |
---|
| 527 | bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); |
---|
| 528 | //float *data=(float*)(outBuf+p->bSize*8); |
---|
| 529 | float *data=(float*)(outBuf); |
---|
| 530 | int i0,i1,i2,i3; |
---|
| 531 | //uint16_t *idx0,*idx1,*idx2,*idx3; |
---|
| 532 | int _1DIdx; |
---|
| 533 | |
---|
| 534 | int64_t ECQTemp; |
---|
| 535 | uint64_t bytePos=0; |
---|
| 536 | uint64_t bitPos=0; |
---|
| 537 | uint64_t temp,temp2; |
---|
| 538 | //int sb,localIdx; |
---|
| 539 | |
---|
| 540 | |
---|
| 541 | //idx0=(uint16_t*)(outBuf ); |
---|
| 542 | //idx1=(uint16_t*)(outBuf+p->bSize*2); |
---|
| 543 | //idx2=(uint16_t*)(outBuf+p->bSize*4); |
---|
| 544 | //idx3=(uint16_t*)(outBuf+p->bSize*6); |
---|
| 545 | //p->idxOffset[0]=*(uint32_t*)(&inBuf[1]); |
---|
| 546 | //p->idxOffset[1]=*(uint32_t*)(&inBuf[3]); |
---|
| 547 | //p->idxOffset[2]=*(uint32_t*)(&inBuf[5]); |
---|
| 548 | //p->idxOffset[3]=*(uint32_t*)(&inBuf[7]); |
---|
| 549 | /* |
---|
| 550 | for(i0=0;i0<p->idxRange[0];i0++) |
---|
| 551 | for(i1=0;i1<p->idxRange[1];i1++) |
---|
| 552 | for(i2=0;i2<p->idxRange[2];i2++) |
---|
| 553 | for(i3=0;i3<p->idxRange[3];i3++){ |
---|
| 554 | //_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3; |
---|
| 555 | _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; |
---|
| 556 | idx0[_1DIdx]=i0+1+p->idxOffset[0]; |
---|
| 557 | idx1[_1DIdx]=i1+1+p->idxOffset[1]; |
---|
| 558 | idx2[_1DIdx]=i2+1+p->idxOffset[2]; |
---|
| 559 | idx3[_1DIdx]=i3+1+p->idxOffset[3]; |
---|
| 560 | } |
---|
| 561 | */ |
---|
| 562 | |
---|
| 563 | //*numOutBytes=p->bSize*16; |
---|
| 564 | |
---|
| 565 | //inBuf[0] is "mode" |
---|
| 566 | switch(inBuf[0]){ |
---|
| 567 | //R:UCSparse |
---|
| 568 | case 0: |
---|
| 569 | if(D_G){printf("\nDC:UCSparse\n");} //DEBUG |
---|
| 570 | //bp->nonZeros=*(uint16_t*)(&inBuf[9]); |
---|
| 571 | //bytePos=11; |
---|
| 572 | bp->nonZeros=*(uint16_t*)(&inBuf[1]); |
---|
| 573 | bytePos=3; |
---|
| 574 | for(j=0;j<p->bSize;j++){ |
---|
| 575 | data[j]=0; |
---|
| 576 | } |
---|
| 577 | for(j=0;j<bp->nonZeros;j++){ |
---|
| 578 | //i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0 |
---|
| 579 | i0=*(uint16_t*)(&inBuf[bytePos]); //i0 |
---|
| 580 | bytePos+=2; |
---|
| 581 | //i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1 |
---|
| 582 | i1=*(uint16_t*)(&inBuf[bytePos]); //i1 |
---|
| 583 | bytePos+=2; |
---|
| 584 | //i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2 |
---|
| 585 | i2=*(uint16_t*)(&inBuf[bytePos]); //i2 |
---|
| 586 | bytePos+=2; |
---|
| 587 | //i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3 |
---|
| 588 | i3=*(uint16_t*)(&inBuf[bytePos]); //i3 |
---|
| 589 | bytePos+=2; |
---|
| 590 | _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; |
---|
| 591 | data[_1DIdx]=*(float*)(&inBuf[bytePos]); |
---|
| 592 | bytePos+=8; |
---|
| 593 | } |
---|
| 594 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
| 595 | break; |
---|
| 596 | //R:UCNonSparse |
---|
| 597 | case 1: |
---|
| 598 | if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG |
---|
| 599 | //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); |
---|
| 600 | memcpy(data, &inBuf[1], p->bSize*8); |
---|
| 601 | bytePos=p->bSize*8; |
---|
| 602 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
| 603 | break; |
---|
| 604 | //R:CSparse |
---|
| 605 | case 2: |
---|
| 606 | if(D_G){printf("\nDC:CSparse\n");} //DEBUG |
---|
| 607 | //for(j=0;j<p->bSize;j++){ |
---|
| 608 | // data[j]=0; |
---|
| 609 | //} |
---|
| 610 | |
---|
| 611 | //bp->patternBits=inBuf[13]; |
---|
| 612 | //bp->ECQBits=inBuf[14]; |
---|
| 613 | |
---|
| 614 | bp->patternBits=inBuf[5]; |
---|
| 615 | bp->ECQBits=inBuf[6]; |
---|
| 616 | |
---|
| 617 | if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG |
---|
| 618 | |
---|
| 619 | //bp->numOutliers=*(uint16_t*)(&inBuf[15]); |
---|
| 620 | //bitPos=17*8; |
---|
| 621 | bp->numOutliers=*(uint16_t*)(&inBuf[7]); |
---|
| 622 | bitPos=9*8; |
---|
| 623 | if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG |
---|
| 624 | |
---|
| 625 | bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); |
---|
| 626 | |
---|
| 627 | bp->binSize=p->usedEb*2; |
---|
| 628 | |
---|
| 629 | if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG |
---|
| 630 | |
---|
| 631 | for(j=0;j<p->sbSize;j++){ |
---|
| 632 | patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point |
---|
| 633 | if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} |
---|
| 634 | } |
---|
| 635 | for(j=0;j<p->sbNum;j++){ |
---|
| 636 | scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale |
---|
| 637 | if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} |
---|
| 638 | } |
---|
| 639 | |
---|
| 640 | /* //Splitting |
---|
| 641 | for(j=0;j<p->bSize;j++){ |
---|
| 642 | data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; |
---|
| 643 | } |
---|
| 644 | */ |
---|
| 645 | for(j=0;j<p->bSize;j++){ |
---|
| 646 | ECQ[j]=0; |
---|
| 647 | } |
---|
| 648 | switch(bp->ECQBits){ |
---|
| 649 | case 2: |
---|
| 650 | for(j=0;j<bp->numOutliers;j++){ |
---|
| 651 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG |
---|
| 652 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG |
---|
| 653 | |
---|
| 654 | _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
| 655 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
| 656 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
| 657 | //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); |
---|
| 658 | //continue; |
---|
| 659 | //sb=_1DIdx/p->sbSize; |
---|
| 660 | //localIdx=_1DIdx%p->sbSize; |
---|
| 661 | |
---|
| 662 | ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting |
---|
| 663 | ECQ[_1DIdx]=ECQTemp; |
---|
| 664 | |
---|
| 665 | //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG |
---|
| 666 | } |
---|
| 667 | break; |
---|
| 668 | default: //bp->ECQBits>2 |
---|
| 669 | if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG |
---|
| 670 | |
---|
| 671 | for(j=0;j<bp->numOutliers;j++){ |
---|
| 672 | _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
| 673 | //sb=_1DIdx/p->sbSize; |
---|
| 674 | //localIdx=_1DIdx%p->sbSize; |
---|
| 675 | temp=readBits_UI64(inBuf,&bitPos,1); |
---|
| 676 | //if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG |
---|
| 677 | switch(temp){ |
---|
| 678 | case 0: //+-1 |
---|
| 679 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
| 680 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
| 681 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
| 682 | //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); |
---|
| 683 | break; |
---|
| 684 | case 1: //Others |
---|
| 685 | ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); |
---|
| 686 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
| 687 | //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); |
---|
| 688 | break; |
---|
| 689 | //default: |
---|
| 690 | // printf("ERROR: Bad 2-bit value: 0x%lx",temp); |
---|
| 691 | // assert(0); //AMG |
---|
| 692 | // break; |
---|
| 693 | } |
---|
| 694 | |
---|
| 695 | //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting |
---|
| 696 | ECQ[_1DIdx]=ECQTemp; |
---|
| 697 | |
---|
| 698 | //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG |
---|
| 699 | } |
---|
| 700 | break; |
---|
| 701 | } |
---|
| 702 | //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] |
---|
| 703 | //patternQ=(int64_t*)(inBuf+15); |
---|
| 704 | //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); |
---|
| 705 | |
---|
| 706 | bytePos=(bitPos+7)/8; |
---|
| 707 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
| 708 | |
---|
| 709 | //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) |
---|
| 710 | pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); |
---|
| 711 | |
---|
| 712 | break; |
---|
| 713 | //R:CNonSparse |
---|
| 714 | case 3: |
---|
| 715 | if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG |
---|
| 716 | |
---|
| 717 | //for(j=0;j<p->bSize;j++){ |
---|
| 718 | // data[j]=0; |
---|
| 719 | //} |
---|
| 720 | |
---|
| 721 | //bp->patternBits=inBuf[13]; |
---|
| 722 | //bp->ECQBits=inBuf[14]; |
---|
| 723 | |
---|
| 724 | bp->patternBits=inBuf[5]; |
---|
| 725 | bp->ECQBits=inBuf[6]; |
---|
| 726 | |
---|
| 727 | if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG |
---|
| 728 | |
---|
| 729 | //bitPos=15*8; |
---|
| 730 | bitPos=7*8; |
---|
| 731 | |
---|
| 732 | bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); |
---|
| 733 | bp->binSize=p->usedEb*2; |
---|
| 734 | |
---|
| 735 | if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG |
---|
| 736 | |
---|
| 737 | for(j=0;j<p->sbSize;j++){ |
---|
| 738 | patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point |
---|
| 739 | if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} |
---|
| 740 | } |
---|
| 741 | for(j=0;j<p->sbNum;j++){ |
---|
| 742 | scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale |
---|
| 743 | if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} |
---|
| 744 | } |
---|
| 745 | /* //Splitting |
---|
| 746 | for(j=0;j<p->bSize;j++){ |
---|
| 747 | data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; |
---|
| 748 | //if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} |
---|
| 749 | } |
---|
| 750 | */ |
---|
| 751 | switch(bp->ECQBits){ |
---|
| 752 | case 2: |
---|
| 753 | for(j=0;j<p->bSize;j++){ |
---|
| 754 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG |
---|
| 755 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG |
---|
| 756 | //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
| 757 | temp=readBits_UI64(inBuf,&bitPos,1); |
---|
| 758 | switch(temp){ |
---|
| 759 | case 0: |
---|
| 760 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
| 761 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
| 762 | break; |
---|
| 763 | case 1: |
---|
| 764 | ECQTemp=0; |
---|
| 765 | break; |
---|
| 766 | default: |
---|
| 767 | assert(0); |
---|
| 768 | break; |
---|
| 769 | } |
---|
| 770 | |
---|
| 771 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
| 772 | //continue; |
---|
| 773 | //sb=_1DIdx/p->sbSize; |
---|
| 774 | //localIdx=_1DIdx%p->sbSize; |
---|
| 775 | |
---|
| 776 | //data[j]-=ECQTemp*bp->binSize; //Splitting |
---|
| 777 | ECQ[j]=ECQTemp; |
---|
| 778 | |
---|
| 779 | //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG |
---|
| 780 | } |
---|
| 781 | break; |
---|
| 782 | default: //bp->ECQBits>2 |
---|
| 783 | //if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); |
---|
| 784 | |
---|
| 785 | for(j=0;j<p->bSize;j++){ |
---|
| 786 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
| 787 | //if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); |
---|
| 788 | |
---|
| 789 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG |
---|
| 790 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG |
---|
| 791 | //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
| 792 | temp=readBits_UI64(inBuf,&bitPos,1); |
---|
| 793 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
| 794 | switch(temp){ |
---|
| 795 | case 0: |
---|
| 796 | //if(DEBUG)printf("Read:0"); |
---|
| 797 | temp2=readBits_UI64(inBuf,&bitPos,1); |
---|
| 798 | switch(temp2){ |
---|
| 799 | case 0: |
---|
| 800 | //if(DEBUG)printf("0"); |
---|
| 801 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
| 802 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
| 803 | //if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); |
---|
| 804 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
| 805 | //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); |
---|
| 806 | break; |
---|
| 807 | case 1: |
---|
| 808 | //if(DEBUG)printf("1\n"); |
---|
| 809 | ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); |
---|
| 810 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
| 811 | //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); |
---|
| 812 | break; |
---|
| 813 | default: |
---|
| 814 | assert(0); |
---|
| 815 | break; |
---|
| 816 | } |
---|
| 817 | break; |
---|
| 818 | case 1: |
---|
| 819 | //if(DEBUG)printf("Read:1\n"); |
---|
| 820 | ECQTemp=0; |
---|
| 821 | //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); |
---|
| 822 | break; |
---|
| 823 | default: |
---|
| 824 | assert(0); |
---|
| 825 | break; |
---|
| 826 | } |
---|
| 827 | |
---|
| 828 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
| 829 | //continue; |
---|
| 830 | //sb=_1DIdx/p->sbSize; |
---|
| 831 | //localIdx=_1DIdx%p->sbSize; |
---|
| 832 | |
---|
| 833 | //data[j]-=ECQTemp*bp->binSize; //Splitting |
---|
| 834 | ECQ[j]=ECQTemp; |
---|
| 835 | |
---|
| 836 | //if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG |
---|
| 837 | } |
---|
| 838 | break; |
---|
| 839 | } |
---|
| 840 | //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] |
---|
| 841 | //patternQ=(int64_t*)(inBuf+15); |
---|
| 842 | //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); |
---|
| 843 | bytePos=(bitPos+7)/8; |
---|
| 844 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
| 845 | |
---|
| 846 | //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) |
---|
| 847 | pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); |
---|
| 848 | break; |
---|
| 849 | |
---|
| 850 | default: |
---|
| 851 | assert(0); |
---|
| 852 | break; |
---|
| 853 | } |
---|
| 854 | (*numReadBytes)=bytePos; |
---|
| 855 | } |
---|
| 856 | |
---|
| 857 | static inline void pastri_float_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){ |
---|
| 858 | int64_t patternQ[MAX_PS_SIZE]; |
---|
| 859 | int64_t scalesQ[MAX_PS_SIZE]; |
---|
| 860 | int64_t ECQ[MAX_BLOCK_SIZE]; |
---|
| 861 | |
---|
| 862 | pastri_blockParams bp; |
---|
| 863 | |
---|
| 864 | //STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION)) |
---|
| 865 | //(Further steps are called inside pastri_float_Decode function) |
---|
| 866 | pastri_float_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ); |
---|
| 867 | |
---|
| 868 | return; |
---|
| 869 | } |
---|
| 870 | |
---|
| 871 | //inBuf vs Decompressed |
---|
| 872 | static inline int pastri_float_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){ |
---|
| 873 | int i; |
---|
| 874 | |
---|
| 875 | float *data=(float*)(inBuf); |
---|
| 876 | float *data_dc=(float*)(DC); |
---|
| 877 | |
---|
| 878 | //Comparing Indexes: |
---|
| 879 | /* |
---|
| 880 | for(i=0;i<p->bSize;i++){ |
---|
| 881 | if(idx0[i]!=idx0_dc[i]){ |
---|
| 882 | printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); |
---|
| 883 | assert(0); |
---|
| 884 | } |
---|
| 885 | if(idx1[i]!=idx1_dc[i]){ |
---|
| 886 | printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); |
---|
| 887 | assert(0); |
---|
| 888 | } |
---|
| 889 | if(idx2[i]!=idx2_dc[i]){ |
---|
| 890 | printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); |
---|
| 891 | assert(0); |
---|
| 892 | } |
---|
| 893 | if(idx3[i]!=idx3_dc[i]){ |
---|
| 894 | printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); |
---|
| 895 | assert(0); |
---|
| 896 | } |
---|
| 897 | } |
---|
| 898 | */ |
---|
| 899 | |
---|
| 900 | //Comparing Data: |
---|
| 901 | for(i=0;i<p->bSize;i++){ |
---|
| 902 | if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ |
---|
| 903 | printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); |
---|
| 904 | assert(0); |
---|
| 905 | } |
---|
| 906 | } |
---|
| 907 | return 0; |
---|
| 908 | } |
---|
| 909 | |
---|
| 910 | |
---|
| 911 | #endif |
---|