1 | #ifndef PASTRIF_H |
---|
2 | #define PASTRIF_H |
---|
3 | |
---|
4 | static inline int64_t pastri_float_quantize(float x, float binSize){ |
---|
5 | //Add or sub 0.5, depending on the sign: |
---|
6 | x=x/binSize; |
---|
7 | |
---|
8 | u_UI64I64D u1,half; |
---|
9 | u1.d=x; |
---|
10 | |
---|
11 | half.d=0.5; |
---|
12 | |
---|
13 | //printf("pastri_float_quantize:\nx=%lf x=0x%lx\n",x,(*((uint64_t *)(&x)))); |
---|
14 | //printf("sign(x):0x%lx\n", x); |
---|
15 | //printf("0.5:0x%lx\n", (*((uint64_t *)(&half)))); |
---|
16 | half.ui64 |= (u1.ui64 & (uint64_t)0x8000000000000000); |
---|
17 | //printf("sign(x)*0.5:0x%lx\n", (*((uint64_t *)(&half)))); |
---|
18 | return (int64_t)(x + half.d); |
---|
19 | } |
---|
20 | |
---|
21 | static inline void pastri_float_PatternMatch(float*data,pastri_params* p,pastri_blockParams* bp,int64_t* patternQ,int64_t *scalesQ, int64_t* ECQ){ |
---|
22 | //Find the pattern. |
---|
23 | //First, find the extremum point: |
---|
24 | float absExt=0; //Absolute value of Extremum |
---|
25 | int extIdx=-1; //Index of Extremum |
---|
26 | bp->nonZeros=0; |
---|
27 | int i,sb; |
---|
28 | for(i=0;i<p->bSize;i++){ |
---|
29 | //printf("data[%d] = %.16lf\n",i,data[i]);//DEBUG |
---|
30 | if(abs_FastD(data[i])>p->usedEb){ |
---|
31 | bp->nonZeros++; |
---|
32 | //if(DEBUG)printf("data[%d]:%.6e\n",i,data[i]); //DEBUG |
---|
33 | } |
---|
34 | if(abs_FastD(data[i])>absExt){ |
---|
35 | absExt=abs_FastD(data[i]); |
---|
36 | extIdx=i; |
---|
37 | } |
---|
38 | } |
---|
39 | int patternIdx; //Starting Index of Pattern |
---|
40 | patternIdx=(extIdx/p->sbSize)*p->sbSize; |
---|
41 | |
---|
42 | float patternExt=data[extIdx]; |
---|
43 | bp->binSize=2*p->usedEb; |
---|
44 | |
---|
45 | //if(DEBUG){printf("Extremum : data[%d] = %.6e\n",extIdx,patternExt);} //DEBUG |
---|
46 | //if(DEBUG){printf("patternIdx: %d\n",patternIdx);} //DEBUG |
---|
47 | |
---|
48 | //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("pattern[%d]=data[%d]=%.6e Quantized:%d\n",i,patternIdx+i,data[patternIdx+i],pastri_float_quantize(data[patternIdx+i]/binSize) );} }//DEBUG |
---|
49 | |
---|
50 | //int64_t *patternQ=(int64_t*)(outBuf+15); //Possible Improvement! |
---|
51 | |
---|
52 | |
---|
53 | for(i=0;i<p->sbSize;i++){ |
---|
54 | patternQ[i]=pastri_float_quantize(data[patternIdx+i],bp->binSize); |
---|
55 | if(D_W){printf("patternQ[%d]=%ld\n",i,patternQ[i]);} |
---|
56 | } |
---|
57 | |
---|
58 | bp->patternBits=bitsNeeded_float((abs_FastD(patternExt)/bp->binSize)+1)+1; |
---|
59 | bp->scaleBits=bp->patternBits; |
---|
60 | bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->scaleBits-1))-1); |
---|
61 | //if(DEBUG){printf("(patternExt/binSize)+1: %.6e\n",(patternExt/binSize)+1);} //DEBUG |
---|
62 | //if(DEBUG){printf("scaleBits=patternBits: %d\n",scaleBits);} //DEBUG |
---|
63 | if(D_W){printf("scalesBinSize: %.6e\n",bp->scalesBinSize);} //DEBUG |
---|
64 | |
---|
65 | //Calculate Scales. |
---|
66 | //The index part of the input buffer will be reused to hold Scale, Pattern, etc. values. |
---|
67 | int localExtIdx=extIdx%p->sbSize; //Local extremum index. This is not the actual extremum of the current sb, but rather the index that correspond to the global (block) extremum. |
---|
68 | //int64_t *scalesQ=(int64_t*)(outBuf+15+p->sbSize*8); //Possible Improvement! |
---|
69 | int patternExtZero=(patternExt==0); |
---|
70 | //if(DEBUG){printf("patternExtZero: %d\n",patternExtZero);} //DEBUG |
---|
71 | for(sb=0;sb<p->sbNum;sb++){ |
---|
72 | //scales[sb]=data[sb*p->sbSize+localExtIdx]/patternExt; |
---|
73 | //scales[sb]=patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt; |
---|
74 | //assert(scales[sb]<=1); |
---|
75 | scalesQ[sb]=pastri_float_quantize((patternExtZero ? 0 : data[sb*p->sbSize+localExtIdx]/patternExt),bp->scalesBinSize); |
---|
76 | if(D_W){printf("scalesQ[%d]=%ld\n",sb,scalesQ[sb]);} |
---|
77 | } |
---|
78 | //if(DEBUG){for(i=0;i<p->sbSize;i++){printf("scalesQ[%d]=%ld \n",i,scalesQ[i]);}} //DEBUG |
---|
79 | |
---|
80 | //int64_t *ECQ=(int64_t*)(outBuf+p->bSize*8); //ECQ is written into outBuf, just be careful when handling it. |
---|
81 | |
---|
82 | //uint64_t wVal; |
---|
83 | bp->ECQExt=0; |
---|
84 | int _1DIdx; |
---|
85 | bp->ECQ1s=0; |
---|
86 | bp->ECQOthers=0; |
---|
87 | float PS_binSize=bp->scalesBinSize*bp->binSize; |
---|
88 | for(sb=0;sb<p->sbNum;sb++){ |
---|
89 | for(i=0;i<p->sbSize;i++){ |
---|
90 | _1DIdx=sb*p->sbSize+i; |
---|
91 | ECQ[_1DIdx]=pastri_float_quantize( (scalesQ[sb]*patternQ[i]*PS_binSize-data[_1DIdx]),bp->binSize ); |
---|
92 | float absECQ=abs_FastD(ECQ[_1DIdx]); |
---|
93 | if(absECQ > bp->ECQExt) |
---|
94 | bp->ECQExt=absECQ; |
---|
95 | //if(DEBUG){printf("EC[%d]: %.6e Quantized:%ld \n",_1DIdx,(scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-data[_1DIdx]),ECQ[_1DIdx]);} //DEBUG |
---|
96 | switch (ECQ[_1DIdx]){ |
---|
97 | case 0: |
---|
98 | //ECQ0s++; //Currently not needed |
---|
99 | break; |
---|
100 | case 1: |
---|
101 | bp->ECQ1s++; |
---|
102 | break; |
---|
103 | case -1: |
---|
104 | bp->ECQ1s++; |
---|
105 | break; |
---|
106 | default: |
---|
107 | bp->ECQOthers++; |
---|
108 | break; |
---|
109 | } |
---|
110 | } |
---|
111 | } |
---|
112 | |
---|
113 | /* |
---|
114 | //DEBUG: Self-check. Remove this later. |
---|
115 | for(sb=0;sb<p->sbNum;sb++){ |
---|
116 | for(i=0;i<p->sbSize;i++){ |
---|
117 | _1DIdx=sb*p->sbSize+i; |
---|
118 | float decompressed=scalesQ[sb]*patternQ[i]*scalesBinSize*binSize-ECQ[_1DIdx]*binSize; |
---|
119 | if(abs_FastD(decompressed-data[_1DIdx])>(p->usedEb)){ |
---|
120 | printf("p->usedEb=%.6e\n",p->usedEb); |
---|
121 | printf("data[%d]=%.6e decompressed[%d]=%.6e diff=%.6e\n",_1DIdx,data[_1DIdx],_1DIdx,decompressed,abs_FastD(data[_1DIdx]-decompressed)); |
---|
122 | assert(0); |
---|
123 | } |
---|
124 | } |
---|
125 | } |
---|
126 | */ |
---|
127 | } |
---|
128 | |
---|
129 | static inline void pastri_float_Encode(float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ,pastri_params *p,pastri_blockParams* bp,unsigned char* outBuf,int *numOutBytes){ |
---|
130 | bp->ECQBits=bitsNeeded_UI64(bp->ECQExt)+1; |
---|
131 | bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); |
---|
132 | //(*numOutBytes)=0; |
---|
133 | |
---|
134 | int i; |
---|
135 | |
---|
136 | //Encode: 3 options: |
---|
137 | //Compressed, Sparse ECQ |
---|
138 | //Compressed, Non-Sparse ECQ |
---|
139 | //Uncompressed, Sparse Data |
---|
140 | //Uncompressed, Non-spsarse Data |
---|
141 | |
---|
142 | unsigned int UCSparseBits; //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, nonZeros, {indexes, data} |
---|
143 | unsigned int UCNonSparseBits; //Uncompressed, NonSparse bits. Includes: mode, data |
---|
144 | unsigned int CSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} |
---|
145 | unsigned int CNonSparseBits; //Includes: mode, compressedBytes, patternBits, ECQBits,P, S, {ECQ} |
---|
146 | //int BOOKKEEPINGBITS=120; //Includes: mode, compressedBytes, patternBits, ECQBits (8+64+32+8+8) //Moved to much earlier! |
---|
147 | |
---|
148 | //Consider: ECQ0s, ECQ1s, ECQOthers. Number of following values in ECQ: {0}, {1,-1}, { val<=-2, val>=2} |
---|
149 | //ECQ0s is actually not needed, but others are needed. |
---|
150 | |
---|
151 | UCSparseBits = p->dataSize*(1 + 2 + bp->nonZeros*16); //64 bits for 4 indexes, 64 bit for data. |
---|
152 | UCNonSparseBits = p->dataSize*(1 + p->bSize*8); |
---|
153 | bp->numOutliers=bp->ECQ1s+bp->ECQOthers; |
---|
154 | if(bp->ECQBits==2){ |
---|
155 | CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(1+bp->_1DIdxBits); |
---|
156 | CNonSparseBits = p->dataSize*(1+4+1+1) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s ; //Or: ECQ0s+ECQ1s*2; |
---|
157 | }else{ //ECQBits>2 |
---|
158 | CSparseBits = p->dataSize*(1+4+1+1+2) + bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + bp->ECQ1s*(2+bp->_1DIdxBits) + bp->ECQOthers*(1+bp->_1DIdxBits+bp->ECQBits); |
---|
159 | //CNonSparseBits = 8+32+8+8+ patternBits*p->sbSize + scaleBits*p->sbNum + p->bSize + ECQ0s + ECQ1s*3 + ECQOthers*(2+ECQBits); |
---|
160 | CNonSparseBits = p->dataSize*(1+4+1+1)+ bp->patternBits*p->sbSize + bp->scaleBits*p->sbNum + p->bSize + bp->ECQ1s*2 + bp->ECQOthers*(1+bp->ECQBits); |
---|
161 | } |
---|
162 | |
---|
163 | int UCSparseBytes=(UCSparseBits+7)/8; |
---|
164 | int UCNonSparseBytes=(UCNonSparseBits+7)/8; |
---|
165 | int CSparseBytes=(CSparseBits+7)/8; |
---|
166 | int CNonSparseBytes=(CNonSparseBits+7)/8; |
---|
167 | uint64_t bitPos=0; |
---|
168 | uint64_t bytePos=0; |
---|
169 | int i0,i1,i2,i3; |
---|
170 | int _1DIdx; |
---|
171 | |
---|
172 | //*(uint16_t*)(&outBuf[1])=p->idxOffset[0]; |
---|
173 | //*(uint16_t*)(&outBuf[3])=p->idxOffset[1]; |
---|
174 | //*(uint16_t*)(&outBuf[5])=p->idxOffset[2]; |
---|
175 | //*(uint16_t*)(&outBuf[7])=p->idxOffset[3]; |
---|
176 | |
---|
177 | if(D_W){printf("ECQ0s:%d ECQ1s:%d ECQOthers:%d Total:%d\n",p->bSize-bp->ECQ1s-bp->ECQOthers,bp->ECQ1s,bp->ECQOthers,p->bSize);} //DEBUG |
---|
178 | if(D_W){printf("numOutliers:%d\n",bp->numOutliers);} //DEBUG |
---|
179 | |
---|
180 | //**************************************************************************************** |
---|
181 | //if(0){ //DEBUG |
---|
182 | //W:UCSparse |
---|
183 | if((UCSparseBytes<UCNonSparseBytes) && (UCSparseBytes<CSparseBytes) && (UCSparseBytes<CNonSparseBytes) ){ |
---|
184 | //Uncompressed, Sparse bits. Just like the original GAMESS data. Includes: mode, indexOffsets, nonZeros, indexes, data |
---|
185 | *numOutBytes=UCSparseBytes; |
---|
186 | if(D_G){printf("UCSparse\n");} //DEBUG |
---|
187 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
188 | outBuf[0]=0; //mode |
---|
189 | |
---|
190 | //*(uint16_t*)(&outBuf[9])=nonZeros; |
---|
191 | //bytePos=11;//0:mode, 1-8:indexOffsets 9-10:NonZeros. So start from 11. |
---|
192 | *(uint16_t*)(&outBuf[1])=bp->nonZeros; |
---|
193 | bytePos=3;//0:mode, 2-3:NonZeros. So start from 3. |
---|
194 | |
---|
195 | for(i0=0;i0<p->idxRange[0];i0++) |
---|
196 | for(i1=0;i1<p->idxRange[1];i1++) |
---|
197 | for(i2=0;i2<p->idxRange[2];i2++) |
---|
198 | for(i3=0;i3<p->idxRange[3];i3++){ |
---|
199 | _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; |
---|
200 | if(abs_FastD(data[_1DIdx])>p->usedEb){ |
---|
201 | //*(uint16_t*)(&outBuf[bytePos])=i0+1+p->idxOffset[0]; |
---|
202 | *(uint16_t*)(&outBuf[bytePos])=i0; |
---|
203 | bytePos+=2; |
---|
204 | //*(uint16_t*)(&outBuf[bytePos])=i1+1+p->idxOffset[1]; |
---|
205 | *(uint16_t*)(&outBuf[bytePos])=i1; |
---|
206 | bytePos+=2; |
---|
207 | //*(uint16_t*)(&outBuf[bytePos])=i2+1+p->idxOffset[2]; |
---|
208 | *(uint16_t*)(&outBuf[bytePos])=i2; |
---|
209 | bytePos+=2; |
---|
210 | //*(uint16_t*)(&outBuf[bytePos])=i3+1+p->idxOffset[3]; |
---|
211 | *(uint16_t*)(&outBuf[bytePos])=i3; |
---|
212 | bytePos+=2; |
---|
213 | |
---|
214 | *(float*)(&outBuf[bytePos])=data[_1DIdx]; |
---|
215 | bytePos+=p->dataSize; |
---|
216 | } |
---|
217 | } |
---|
218 | |
---|
219 | if(D_G)printf("UCSparseBytes:%d \n",UCSparseBytes); //DEBUG |
---|
220 | |
---|
221 | //**************************************************************************************** |
---|
222 | //}else if(0){ //DEBUG |
---|
223 | //W:UCNonSparse |
---|
224 | }else if((UCNonSparseBytes<UCSparseBytes) && (UCNonSparseBytes<CSparseBytes) && (UCNonSparseBytes<CNonSparseBytes) ){ |
---|
225 | //Uncompressed, NonSparse bits. Includes: mode, indexOffsets, data |
---|
226 | *numOutBytes=UCNonSparseBytes; |
---|
227 | if(D_G){printf("UCNonSparse\n");} //DEBUG |
---|
228 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
229 | outBuf[0]=1; //mode |
---|
230 | |
---|
231 | //memcpy(&outBuf[9], &inBuf[p->bSize*8], UCNonSparseBytes-9); |
---|
232 | memcpy(&outBuf[1], data, p->bSize*p->dataSize); |
---|
233 | |
---|
234 | if(D_G)printf("UCNonSparseBytes:%d \n",UCNonSparseBytes); //DEBUG |
---|
235 | /* |
---|
236 | for(i=0;i<UCNonSparseBytes-17;i++){ |
---|
237 | printf("%d ",inBuf[p->bSize*8+i]); |
---|
238 | } |
---|
239 | printf("\n"); |
---|
240 | for(i=0;i<UCNonSparseBytes-17;i++){ |
---|
241 | printf("%d ",outBuf[17+i]); |
---|
242 | } |
---|
243 | printf("\n"); |
---|
244 | */ |
---|
245 | //**************************************************************************************** |
---|
246 | //}else if(1){ //DEBUG |
---|
247 | //W:CSparse |
---|
248 | }else if((CSparseBytes<UCNonSparseBytes) && (CSparseBytes<UCSparseBytes) && (CSparseBytes<CNonSparseBytes) ){ |
---|
249 | //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,numOutliers,P, S, {Indexes(Sparse), ECQ} |
---|
250 | *numOutBytes=CSparseBytes; |
---|
251 | if(D_G){printf("CSparse\n");} //DEBUG |
---|
252 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
253 | //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG |
---|
254 | outBuf[0]=2; //mode |
---|
255 | |
---|
256 | ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. |
---|
257 | //outBuf[13]=patternBits; |
---|
258 | //outBuf[14]=ECQBits; |
---|
259 | ////Currently, we are at the end of 15th byte. |
---|
260 | //*(uint16_t*)(&outBuf[15])=numOutliers; |
---|
261 | //bitPos=17*8; //Currently, we are at the end of 17th byte. |
---|
262 | |
---|
263 | //outBuf bytes [1:4] are reserved for compressedBytes. |
---|
264 | outBuf[5]=bp->patternBits; |
---|
265 | outBuf[6]=bp->ECQBits; |
---|
266 | //Currently, we are at the end of 7th byte. |
---|
267 | |
---|
268 | *(uint16_t*)(&outBuf[7])=bp->numOutliers; |
---|
269 | //Now, we are at the end of 9th byte. |
---|
270 | bitPos=9*8; |
---|
271 | |
---|
272 | //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG |
---|
273 | |
---|
274 | for(i=0;i<p->sbSize;i++){ |
---|
275 | writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point |
---|
276 | } |
---|
277 | //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG |
---|
278 | for(i=0;i<p->sbNum;i++){ |
---|
279 | writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale |
---|
280 | } |
---|
281 | //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG |
---|
282 | //if(DEBUG)printf("ECQBits:%d\n",ECQBits); |
---|
283 | switch(bp->ECQBits){ |
---|
284 | case 2: |
---|
285 | for(i=0;i<p->bSize;i++){ |
---|
286 | switch(ECQ[i]){ |
---|
287 | case 0: |
---|
288 | break; |
---|
289 | case 1: |
---|
290 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x0\n",i,ECQ[i]); //DEBUG |
---|
291 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
292 | //writeBits_Fast(outBuf,&bitPos,2,0x10); |
---|
293 | //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 |
---|
294 | //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 |
---|
295 | writeBits_Fast(outBuf,&bitPos,1,0);//0x00 |
---|
296 | break; |
---|
297 | case -1: |
---|
298 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG |
---|
299 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
300 | //writeBits_Fast(outBuf,&bitPos,2,0x11); |
---|
301 | //writeBits_Fast(outBuf,&bitPos,2,1);//0x01 |
---|
302 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
303 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
304 | break; |
---|
305 | default: |
---|
306 | assert(0); |
---|
307 | break; |
---|
308 | } |
---|
309 | } |
---|
310 | break; |
---|
311 | default: //ECQBits>2 |
---|
312 | for(i=0;i<p->bSize;i++){ |
---|
313 | switch(ECQ[i]){ |
---|
314 | case 0: |
---|
315 | break; |
---|
316 | case 1: |
---|
317 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x00\n",i,ECQ[i]); //DEBUG |
---|
318 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
319 | //writeBits_Fast(outBuf,&bitPos,3,0);//0x000 |
---|
320 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
321 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
322 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
323 | break; |
---|
324 | case -1: |
---|
325 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01\n",i,ECQ[i]); //DEBUG |
---|
326 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
327 | //writeBits_Fast(outBuf,&bitPos,3,1);//0x001 |
---|
328 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
329 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
330 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
331 | break; |
---|
332 | default: |
---|
333 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1 0x%lx\n",i,ECQ[i],ECQ[i]); //DEBUG |
---|
334 | writeBits_Fast(outBuf,&bitPos,bp->_1DIdxBits,i); |
---|
335 | //writeBits_Fast(outBuf,&bitPos,2+ECQBits,((uint64_t)0x11<<ECQBits)|ECQ[i]); |
---|
336 | //writeBits_Fast(outBuf,&bitPos,2+ECQBits,(ECQ[i]&((uint64_t)0x00<<ECQBits))|((uint64_t)0x01<<ECQBits)); |
---|
337 | //writeBits_Fast(outBuf,&bitPos,1,0); |
---|
338 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
339 | writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); |
---|
340 | break; |
---|
341 | } |
---|
342 | } |
---|
343 | break; |
---|
344 | } |
---|
345 | |
---|
346 | //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG |
---|
347 | if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG |
---|
348 | |
---|
349 | |
---|
350 | uint32_t bytePos=(bitPos+7)/8; |
---|
351 | //*(uint32_t*)(&outBuf[9])=bytePos; |
---|
352 | *(uint32_t*)(&outBuf[1])=bytePos; |
---|
353 | |
---|
354 | if(D_G)printf("bitPos:%ld CSparseBits:%d bytePos:%d CSparseBytes:%d\n",bitPos,CSparseBits,bytePos,CSparseBytes); //DEBUG |
---|
355 | if(D_G){assert(bitPos==CSparseBits);} |
---|
356 | |
---|
357 | //**************************************************************************************** |
---|
358 | //W:CNonSparse |
---|
359 | }else { |
---|
360 | //Includes: mode, indexOffsets, compressedBytes, patternBits, ECQBits,P, S, {ECQ} |
---|
361 | *numOutBytes=CNonSparseBytes; |
---|
362 | if(D_G){printf("CNonSparse\n");} //DEBUG |
---|
363 | if(D_G)printf("ECQBits:%d\n",bp->ECQBits); //DEBUG |
---|
364 | //if(DEBUG){printf("patternBits:%d _1DIdxBits:%d\n",patternBits,_1DIdxBits);} //DEBUG |
---|
365 | outBuf[0]=3; //mode |
---|
366 | |
---|
367 | ////outBuf bytes [1:8] are indexOffsets, which are already written. outBuf bytes [9:12] are reserved for compressedBytes. |
---|
368 | //outBuf[13]=patternBits; |
---|
369 | //outBuf[14]=ECQBits; |
---|
370 | //bitPos=15*8; //Currently, we are at the end of 15th byte. |
---|
371 | |
---|
372 | //outBuf bytes [1:4] are reserved for compressedBytes. |
---|
373 | outBuf[5]=bp->patternBits; |
---|
374 | outBuf[6]=bp->ECQBits; |
---|
375 | bitPos=7*8; //Currently, we are at the end of 7th byte. |
---|
376 | |
---|
377 | //if(DEBUG){printf("bitPos_B:%ld\n",bitPos);} //DEBUG |
---|
378 | |
---|
379 | for(i=0;i<p->sbSize;i++){ |
---|
380 | writeBits_Fast(outBuf,&bitPos,bp->patternBits,patternQ[i]);//Pattern point |
---|
381 | } |
---|
382 | //if(DEBUG){printf("bitPos_P:%ld\n",bitPos);} //DEBUG |
---|
383 | for(i=0;i<p->sbNum;i++){ |
---|
384 | writeBits_Fast(outBuf,&bitPos,bp->scaleBits,scalesQ[i]);//Scale |
---|
385 | } |
---|
386 | //if(DEBUG){printf("bitPos_S:%ld\n",bitPos);} //DEBUG |
---|
387 | //if(DEBUG)printf("ECQBits:%d\n",ECQBits); |
---|
388 | switch(bp->ECQBits){ |
---|
389 | case 2: |
---|
390 | for(i=0;i<p->bSize;i++){ |
---|
391 | switch(ECQ[i]){ |
---|
392 | case 0: |
---|
393 | //if(DEBUG)printf("Index:%d ECQ:%d Written:0x1\n",i,ECQ[i]); //DEBUG |
---|
394 | writeBits_Fast(outBuf,&bitPos,1,1);//0x1 |
---|
395 | break; |
---|
396 | case 1: |
---|
397 | //if(DEBUG)printf("Index:%d ECQ:%d Written:0x00\n",i,ECQ[i]); //DEBUG |
---|
398 | //writeBits_Fast(outBuf,&bitPos,2,0);//0x00 |
---|
399 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
400 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
401 | break; |
---|
402 | case -1: |
---|
403 | //if(DEBUG)printf("Index:%d ECQ:%d Written:0x01\n",i,ECQ[i]); //DEBUG |
---|
404 | //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 |
---|
405 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
406 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
407 | break; |
---|
408 | default: |
---|
409 | assert(0); |
---|
410 | break; |
---|
411 | } |
---|
412 | } |
---|
413 | break; |
---|
414 | default: //ECQBits>2 |
---|
415 | //if(DEBUG) printf("AMG_W1:bitPos:%ld\n",bitPos); //DEBUG |
---|
416 | for(i=0;i<p->bSize;i++){ |
---|
417 | //if(DEBUG){printf("AMG_W3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
418 | //if(DEBUG) printf("AMG_W2:bitPos:%ld\n",bitPos); //DEBUG |
---|
419 | //if(DEBUG) printf("ECQ[%d]:%ld\n",i,ECQ[i]); //DEBUG |
---|
420 | switch(ECQ[i]){ |
---|
421 | case 0: |
---|
422 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x1\n",i,ECQ[i]); //DEBUG |
---|
423 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
424 | //temp1=bitPos; |
---|
425 | writeBits_Fast(outBuf,&bitPos,1,1); //0x1 |
---|
426 | //wVal=1; writeBits_Fast(outBuf,&bitPos,1,wVal); //0x1 |
---|
427 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
428 | break; |
---|
429 | case 1: |
---|
430 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x000\n",i,ECQ[i]); //DEBUG |
---|
431 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
432 | //temp1=bitPos; |
---|
433 | //writeBits_Fast(outBuf,&bitPos,3,0); //0x000 |
---|
434 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
435 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
436 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
437 | //wVal=0; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x000 |
---|
438 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
439 | break; |
---|
440 | case -1: |
---|
441 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x001\n",i,ECQ[i]); //DEBUG |
---|
442 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
443 | //temp1=bitPos; |
---|
444 | //writeBits_Fast(outBuf,&bitPos,3,8); //0x001 |
---|
445 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
446 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
447 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
448 | //wVal=8; writeBits_Fast(outBuf,&bitPos,3,wVal); //0x001 |
---|
449 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
450 | break; |
---|
451 | default: |
---|
452 | //if(DEBUG)printf("Index:%d ECQ:%ld Written:0x01 0x%lx\n",i,ECQ[i]); //DEBUG |
---|
453 | //if(DEBUG){printf("AMG_WB3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&outBuf[bitPos/8]));}; //DEBUG |
---|
454 | //temp1=bitPos; |
---|
455 | //writeBits_Fast(outBuf,&bitPos,2,2); //0x01 |
---|
456 | writeBits_Fast(outBuf,&bitPos,1,0); |
---|
457 | writeBits_Fast(outBuf,&bitPos,1,1); |
---|
458 | //wVal=2; writeBits_Fast(outBuf,&bitPos,2,wVal); //0x01 |
---|
459 | writeBits_Fast(outBuf,&bitPos,bp->ECQBits,ECQ[i]); |
---|
460 | //if(DEBUG){printf("AMG_WA3:bitPos:%ld buffer[%ld]=0x%lx\n",temp1,temp1/8,*(uint64_t*)(&outBuf[temp1/8]));}; //DEBUG |
---|
461 | break; |
---|
462 | } |
---|
463 | } |
---|
464 | break; |
---|
465 | } |
---|
466 | |
---|
467 | //if(DEBUG){printf("bitPos_E:%ld\n",bitPos);} //DEBUG |
---|
468 | if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: ECQBits:%d numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG |
---|
469 | |
---|
470 | |
---|
471 | |
---|
472 | uint32_t bytePos=(bitPos+7)/8; |
---|
473 | //*(uint32_t*)(&outBuf[9])=bytePos; |
---|
474 | *(uint32_t*)(&outBuf[1])=bytePos; |
---|
475 | |
---|
476 | if(D_G)printf("bitPos:%ld CNonSparseBits:%d bytePos:%d CNonSparseBytes:%d\n",bitPos,CNonSparseBits,bytePos,CNonSparseBytes); //DEBUG |
---|
477 | if(D_G){assert(bitPos==CNonSparseBits);} |
---|
478 | |
---|
479 | } |
---|
480 | //for(i=213;i<233;i++)if(DEBUG)printf("AMG_WE:bitPos:%d buffer[%d]=0x%lx\n",i*8,i,*(uint64_t*)(&outBuf[i])); //DEBUG |
---|
481 | |
---|
482 | } |
---|
483 | static inline int pastri_float_Compress(unsigned char*inBuf,pastri_params *p,unsigned char*outBuf,int *numOutBytes){ |
---|
484 | pastri_blockParams bp; |
---|
485 | |
---|
486 | if(D_G2){printf("Parameters: dataSize:%d\n",p->dataSize);} //DEBUG |
---|
487 | if(D_G2){printf("Parameters: bfs:%d %d %d %d originalEb:%.3e\n",p->bf[0],p->bf[1],p->bf[2],p->bf[3],p->usedEb);} //DEBUG |
---|
488 | if(D_G2){printf("Parameters: idxRanges:%d %d %d %d\n",p->idxRange[0],p->idxRange[1],p->idxRange[2],p->idxRange[3]);} //DEBUG |
---|
489 | if(D_G2){printf("Parameters: sbSize:%d sbNum:%d bSize:%d\n",p->sbSize,p->sbNum,p->bSize); }//DEBUG |
---|
490 | |
---|
491 | int64_t patternQ[MAX_PS_SIZE]; |
---|
492 | int64_t scalesQ[MAX_PS_SIZE]; |
---|
493 | int64_t ECQ[MAX_BLOCK_SIZE]; |
---|
494 | |
---|
495 | float *data; |
---|
496 | data=(float*)inBuf; |
---|
497 | |
---|
498 | //STEP 0: PREPROCESSING: |
---|
499 | //This step can include flattening the block, determining the period, etc. |
---|
500 | //Currently not needed. |
---|
501 | |
---|
502 | //STEP 1: PATTERN MATCH |
---|
503 | pastri_float_PatternMatch(data,p,&bp,patternQ,scalesQ,ECQ); |
---|
504 | |
---|
505 | //STEP 2: ENCODING(Include QUANTIZE) |
---|
506 | pastri_float_Encode(data,patternQ,scalesQ,ECQ,p,&bp,outBuf,numOutBytes); |
---|
507 | |
---|
508 | |
---|
509 | return 0; |
---|
510 | } |
---|
511 | |
---|
512 | static inline float pastri_float_InverseQuantization(int64_t q, float binSize){ |
---|
513 | return q*binSize; |
---|
514 | } |
---|
515 | |
---|
516 | static inline void pastri_float_PredictData(pastri_params *p,pastri_blockParams *bp,float *data,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ |
---|
517 | int j; |
---|
518 | float PS_binSize=bp->scalesBinSize*bp->binSize; |
---|
519 | for(j=0;j<p->bSize;j++){ |
---|
520 | //data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*PS_binSize - ECQ[j]*bp->binSize; |
---|
521 | data[j]=pastri_float_InverseQuantization(scalesQ[j/p->sbSize]*patternQ[j%p->sbSize],PS_binSize) - pastri_float_InverseQuantization(ECQ[j],bp->binSize); |
---|
522 | } |
---|
523 | } |
---|
524 | |
---|
525 | static inline void pastri_float_Decode(unsigned char*inBuf,pastri_params *p,pastri_blockParams *bp,unsigned char*outBuf,int *numReadBytes,int64_t* patternQ,int64_t* scalesQ,int64_t* ECQ){ |
---|
526 | int j; |
---|
527 | bp->_1DIdxBits=bitsNeeded_UI64(p->bSize); |
---|
528 | //float *data=(float*)(outBuf+p->bSize*8); |
---|
529 | float *data=(float*)(outBuf); |
---|
530 | int i0,i1,i2,i3; |
---|
531 | //uint16_t *idx0,*idx1,*idx2,*idx3; |
---|
532 | int _1DIdx; |
---|
533 | |
---|
534 | int64_t ECQTemp; |
---|
535 | uint64_t bytePos=0; |
---|
536 | uint64_t bitPos=0; |
---|
537 | uint64_t temp,temp2; |
---|
538 | //int sb,localIdx; |
---|
539 | |
---|
540 | |
---|
541 | //idx0=(uint16_t*)(outBuf ); |
---|
542 | //idx1=(uint16_t*)(outBuf+p->bSize*2); |
---|
543 | //idx2=(uint16_t*)(outBuf+p->bSize*4); |
---|
544 | //idx3=(uint16_t*)(outBuf+p->bSize*6); |
---|
545 | //p->idxOffset[0]=*(uint32_t*)(&inBuf[1]); |
---|
546 | //p->idxOffset[1]=*(uint32_t*)(&inBuf[3]); |
---|
547 | //p->idxOffset[2]=*(uint32_t*)(&inBuf[5]); |
---|
548 | //p->idxOffset[3]=*(uint32_t*)(&inBuf[7]); |
---|
549 | /* |
---|
550 | for(i0=0;i0<p->idxRange[0];i0++) |
---|
551 | for(i1=0;i1<p->idxRange[1];i1++) |
---|
552 | for(i2=0;i2<p->idxRange[2];i2++) |
---|
553 | for(i3=0;i3<p->idxRange[3];i3++){ |
---|
554 | //_1DIdx=i0*p->idxRange[1]*p->idxRange[2]*p->idxRange[3]+i1*p->idxRange[2]*p->idxRange[3]+i2*p->idxRange[3]+i3; |
---|
555 | _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; |
---|
556 | idx0[_1DIdx]=i0+1+p->idxOffset[0]; |
---|
557 | idx1[_1DIdx]=i1+1+p->idxOffset[1]; |
---|
558 | idx2[_1DIdx]=i2+1+p->idxOffset[2]; |
---|
559 | idx3[_1DIdx]=i3+1+p->idxOffset[3]; |
---|
560 | } |
---|
561 | */ |
---|
562 | |
---|
563 | //*numOutBytes=p->bSize*16; |
---|
564 | |
---|
565 | //inBuf[0] is "mode" |
---|
566 | switch(inBuf[0]){ |
---|
567 | //R:UCSparse |
---|
568 | case 0: |
---|
569 | if(D_G){printf("\nDC:UCSparse\n");} //DEBUG |
---|
570 | //bp->nonZeros=*(uint16_t*)(&inBuf[9]); |
---|
571 | //bytePos=11; |
---|
572 | bp->nonZeros=*(uint16_t*)(&inBuf[1]); |
---|
573 | bytePos=3; |
---|
574 | for(j=0;j<p->bSize;j++){ |
---|
575 | data[j]=0; |
---|
576 | } |
---|
577 | for(j=0;j<bp->nonZeros;j++){ |
---|
578 | //i0=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[0]; //i0 |
---|
579 | i0=*(uint16_t*)(&inBuf[bytePos]); //i0 |
---|
580 | bytePos+=2; |
---|
581 | //i1=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[1]; //i1 |
---|
582 | i1=*(uint16_t*)(&inBuf[bytePos]); //i1 |
---|
583 | bytePos+=2; |
---|
584 | //i2=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[2]; //i2 |
---|
585 | i2=*(uint16_t*)(&inBuf[bytePos]); //i2 |
---|
586 | bytePos+=2; |
---|
587 | //i3=*(uint16_t*)(&inBuf[bytePos])-1-p->idxOffset[3]; //i3 |
---|
588 | i3=*(uint16_t*)(&inBuf[bytePos]); //i3 |
---|
589 | bytePos+=2; |
---|
590 | _1DIdx=p->idxRange[3]*(i2+p->idxRange[2]*(i1+i0*p->idxRange[1]))+i3; |
---|
591 | data[_1DIdx]=*(float*)(&inBuf[bytePos]); |
---|
592 | bytePos+=8; |
---|
593 | } |
---|
594 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
595 | break; |
---|
596 | //R:UCNonSparse |
---|
597 | case 1: |
---|
598 | if(D_G){printf("\nDC:UCNonSparse\n");} //DEBUG |
---|
599 | //memcpy(&outBuf[p->bSize*8], &inBuf[9], p->bSize*8); |
---|
600 | memcpy(data, &inBuf[1], p->bSize*8); |
---|
601 | bytePos=p->bSize*8; |
---|
602 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
603 | break; |
---|
604 | //R:CSparse |
---|
605 | case 2: |
---|
606 | if(D_G){printf("\nDC:CSparse\n");} //DEBUG |
---|
607 | //for(j=0;j<p->bSize;j++){ |
---|
608 | // data[j]=0; |
---|
609 | //} |
---|
610 | |
---|
611 | //bp->patternBits=inBuf[13]; |
---|
612 | //bp->ECQBits=inBuf[14]; |
---|
613 | |
---|
614 | bp->patternBits=inBuf[5]; |
---|
615 | bp->ECQBits=inBuf[6]; |
---|
616 | |
---|
617 | if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG |
---|
618 | |
---|
619 | //bp->numOutliers=*(uint16_t*)(&inBuf[15]); |
---|
620 | //bitPos=17*8; |
---|
621 | bp->numOutliers=*(uint16_t*)(&inBuf[7]); |
---|
622 | bitPos=9*8; |
---|
623 | if(D_R){printf("bp->numOutliers:%d\n",bp->numOutliers);} //DEBUG |
---|
624 | |
---|
625 | bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); |
---|
626 | |
---|
627 | bp->binSize=p->usedEb*2; |
---|
628 | |
---|
629 | if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG |
---|
630 | |
---|
631 | for(j=0;j<p->sbSize;j++){ |
---|
632 | patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point |
---|
633 | if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} |
---|
634 | } |
---|
635 | for(j=0;j<p->sbNum;j++){ |
---|
636 | scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale |
---|
637 | if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} |
---|
638 | } |
---|
639 | |
---|
640 | /* //Splitting |
---|
641 | for(j=0;j<p->bSize;j++){ |
---|
642 | data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; |
---|
643 | } |
---|
644 | */ |
---|
645 | for(j=0;j<p->bSize;j++){ |
---|
646 | ECQ[j]=0; |
---|
647 | } |
---|
648 | switch(bp->ECQBits){ |
---|
649 | case 2: |
---|
650 | for(j=0;j<bp->numOutliers;j++){ |
---|
651 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG |
---|
652 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG |
---|
653 | |
---|
654 | _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
655 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
656 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
657 | //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); |
---|
658 | //continue; |
---|
659 | //sb=_1DIdx/p->sbSize; |
---|
660 | //localIdx=_1DIdx%p->sbSize; |
---|
661 | |
---|
662 | ////data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting |
---|
663 | ECQ[_1DIdx]=ECQTemp; |
---|
664 | |
---|
665 | //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG |
---|
666 | } |
---|
667 | break; |
---|
668 | default: //bp->ECQBits>2 |
---|
669 | if(D_C){if(!((bp->ECQBits>=2)||((bp->ECQBits==1) && (bp->numOutliers==0)))){printf("ERROR: bp->ECQBits:%d bp->numOutliers:%d This should not have happened!\n",bp->ECQBits,bp->numOutliers);assert(0);}} //DEBUG |
---|
670 | |
---|
671 | for(j=0;j<bp->numOutliers;j++){ |
---|
672 | _1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
673 | //sb=_1DIdx/p->sbSize; |
---|
674 | //localIdx=_1DIdx%p->sbSize; |
---|
675 | temp=readBits_UI64(inBuf,&bitPos,1); |
---|
676 | //if(DEBUG){printf("temp:%ld\n",temp);} //DEBUG |
---|
677 | switch(temp){ |
---|
678 | case 0: //+-1 |
---|
679 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
680 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
681 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
682 | //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); |
---|
683 | break; |
---|
684 | case 1: //Others |
---|
685 | ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); |
---|
686 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
687 | //if(D_R)printf("R:ECQ[%d]: %ld \n",_1DIdx,ECQTemp); |
---|
688 | break; |
---|
689 | //default: |
---|
690 | // printf("ERROR: Bad 2-bit value: 0x%lx",temp); |
---|
691 | // assert(0); //AMG |
---|
692 | // break; |
---|
693 | } |
---|
694 | |
---|
695 | //data[_1DIdx]-=ECQTemp*bp->binSize;//Splitting |
---|
696 | ECQ[_1DIdx]=ECQTemp; |
---|
697 | |
---|
698 | //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG |
---|
699 | } |
---|
700 | break; |
---|
701 | } |
---|
702 | //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] |
---|
703 | //patternQ=(int64_t*)(inBuf+15); |
---|
704 | //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); |
---|
705 | |
---|
706 | bytePos=(bitPos+7)/8; |
---|
707 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
708 | |
---|
709 | //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) |
---|
710 | pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); |
---|
711 | |
---|
712 | break; |
---|
713 | //R:CNonSparse |
---|
714 | case 3: |
---|
715 | if(D_G){printf("\nDC:CNonSparse\n");} //DEBUG |
---|
716 | |
---|
717 | //for(j=0;j<p->bSize;j++){ |
---|
718 | // data[j]=0; |
---|
719 | //} |
---|
720 | |
---|
721 | //bp->patternBits=inBuf[13]; |
---|
722 | //bp->ECQBits=inBuf[14]; |
---|
723 | |
---|
724 | bp->patternBits=inBuf[5]; |
---|
725 | bp->ECQBits=inBuf[6]; |
---|
726 | |
---|
727 | if(D_R){printf("bp->patternBits:%d bp->ECQBits:%d bp->_1DIdxBits:%d\n",bp->patternBits,bp->ECQBits,bp->_1DIdxBits);} //DEBUG |
---|
728 | |
---|
729 | //bitPos=15*8; |
---|
730 | bitPos=7*8; |
---|
731 | |
---|
732 | bp->scalesBinSize=1/(float)(((uint64_t)1<<(bp->patternBits-1))-1); |
---|
733 | bp->binSize=p->usedEb*2; |
---|
734 | |
---|
735 | if(D_R){printf("bp->scalesBinSize:%.6e bp->binSize:%.6e bp->scalesBinSize*bp->binSize:%.6e\n",bp->scalesBinSize,bp->binSize,bp->scalesBinSize*bp->binSize);} //DEBUG |
---|
736 | |
---|
737 | for(j=0;j<p->sbSize;j++){ |
---|
738 | patternQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Pattern point |
---|
739 | if(D_R){printf("R:patternQ[%d]=%ld\n",j,patternQ[j]);} |
---|
740 | } |
---|
741 | for(j=0;j<p->sbNum;j++){ |
---|
742 | scalesQ[j]=readBits_I64(inBuf,&bitPos,bp->patternBits);//Scale |
---|
743 | if(D_R){printf("R:scalesQ[%d]=%ld\n",j,scalesQ[j]);} |
---|
744 | } |
---|
745 | /* //Splitting |
---|
746 | for(j=0;j<p->bSize;j++){ |
---|
747 | data[j]=scalesQ[j/p->sbSize]*patternQ[j%p->sbSize]*bp->scalesBinSize*bp->binSize; |
---|
748 | //if(DEBUG){printf("DC:PS[%d]=%.6e\n",j,data[j]);} |
---|
749 | } |
---|
750 | */ |
---|
751 | switch(bp->ECQBits){ |
---|
752 | case 2: |
---|
753 | for(j=0;j<p->bSize;j++){ |
---|
754 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG |
---|
755 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG |
---|
756 | //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
757 | temp=readBits_UI64(inBuf,&bitPos,1); |
---|
758 | switch(temp){ |
---|
759 | case 0: |
---|
760 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
761 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
762 | break; |
---|
763 | case 1: |
---|
764 | ECQTemp=0; |
---|
765 | break; |
---|
766 | default: |
---|
767 | assert(0); |
---|
768 | break; |
---|
769 | } |
---|
770 | |
---|
771 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
772 | //continue; |
---|
773 | //sb=_1DIdx/p->sbSize; |
---|
774 | //localIdx=_1DIdx%p->sbSize; |
---|
775 | |
---|
776 | //data[j]-=ECQTemp*bp->binSize; //Splitting |
---|
777 | ECQ[j]=ECQTemp; |
---|
778 | |
---|
779 | //if(DEBUG){printf("decompressed[%d]:%.6e\n",_1DIdx,data[_1DIdx]);} //DEBUG |
---|
780 | } |
---|
781 | break; |
---|
782 | default: //bp->ECQBits>2 |
---|
783 | //if(DEBUG)printf("AMG_R1:bitPos: %ld\n",bitPos); |
---|
784 | |
---|
785 | for(j=0;j<p->bSize;j++){ |
---|
786 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
787 | //if(DEBUG)printf("AMG_R2:bitPos: %ld\n",bitPos); |
---|
788 | |
---|
789 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits));} //DEBUG |
---|
790 | //if(DEBUG){printf("readBits_UI64:%ld\n",readBits_I64(inBuf,&bitPos,2));} //DEBUG |
---|
791 | //_1DIdx=readBits_UI64(inBuf,&bitPos,bp->_1DIdxBits); |
---|
792 | temp=readBits_UI64(inBuf,&bitPos,1); |
---|
793 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
794 | switch(temp){ |
---|
795 | case 0: |
---|
796 | //if(DEBUG)printf("Read:0"); |
---|
797 | temp2=readBits_UI64(inBuf,&bitPos,1); |
---|
798 | switch(temp2){ |
---|
799 | case 0: |
---|
800 | //if(DEBUG)printf("0"); |
---|
801 | ECQTemp=readBits_I64(inBuf,&bitPos,1); |
---|
802 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
803 | //if(DEBUG)printf("R:ECQTemp:%ld\n",ECQTemp); |
---|
804 | ECQTemp= ((ECQTemp<<63)>>63)|(uint64_t)0x1; |
---|
805 | //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); |
---|
806 | break; |
---|
807 | case 1: |
---|
808 | //if(DEBUG)printf("1\n"); |
---|
809 | ECQTemp=readBits_I64(inBuf,&bitPos,bp->ECQBits); |
---|
810 | //if(DEBUG){printf("AMG_R3:bitPos:%ld buffer[%ld]=0x%lx\n",bitPos,bitPos/8,*(uint64_t*)(&inBuf[bitPos/8]));}; //DEBUG |
---|
811 | //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); |
---|
812 | break; |
---|
813 | default: |
---|
814 | assert(0); |
---|
815 | break; |
---|
816 | } |
---|
817 | break; |
---|
818 | case 1: |
---|
819 | //if(DEBUG)printf("Read:1\n"); |
---|
820 | ECQTemp=0; |
---|
821 | //if(DEBUG)printf("R:ECQ[%d]: %ld\n",j,ECQTemp); |
---|
822 | break; |
---|
823 | default: |
---|
824 | assert(0); |
---|
825 | break; |
---|
826 | } |
---|
827 | |
---|
828 | //if(DEBUG){printf("_1DIdx:%ld ECQTemp:0x%ld\n",_1DIdx,ECQTemp);} //DEBUG |
---|
829 | //continue; |
---|
830 | //sb=_1DIdx/p->sbSize; |
---|
831 | //localIdx=_1DIdx%p->sbSize; |
---|
832 | |
---|
833 | //data[j]-=ECQTemp*bp->binSize; //Splitting |
---|
834 | ECQ[j]=ECQTemp; |
---|
835 | |
---|
836 | //if(DEBUG){printf("DC:data[%d]:%.6e\n",j,data[j]);} //DEBUG |
---|
837 | } |
---|
838 | break; |
---|
839 | } |
---|
840 | //static inline uint64_t readBits_UI64(unsigned char* buffer,uint64_t *bitPosPtr,uint64_t numBits){ // numBits must be in range [0:56] |
---|
841 | //patternQ=(int64_t*)(inBuf+15); |
---|
842 | //scalesQ=(int64_t*)(inBuf+15+p->sbSize*8); |
---|
843 | bytePos=(bitPos+7)/8; |
---|
844 | if(D_G){printf("\nDC:bytePos:%ld\n",bytePos);} //DEBUG |
---|
845 | |
---|
846 | //STEP 2: PREDICT DATA(Includes INVERSE QUANTIZATION) |
---|
847 | pastri_float_PredictData(p,bp,data,patternQ,scalesQ,ECQ); |
---|
848 | break; |
---|
849 | |
---|
850 | default: |
---|
851 | assert(0); |
---|
852 | break; |
---|
853 | } |
---|
854 | (*numReadBytes)=bytePos; |
---|
855 | } |
---|
856 | |
---|
857 | static inline void pastri_float_Decompress(unsigned char*inBuf,int dataSize,pastri_params *p,unsigned char*outBuf,int *numReadBytes){ |
---|
858 | int64_t patternQ[MAX_PS_SIZE]; |
---|
859 | int64_t scalesQ[MAX_PS_SIZE]; |
---|
860 | int64_t ECQ[MAX_BLOCK_SIZE]; |
---|
861 | |
---|
862 | pastri_blockParams bp; |
---|
863 | |
---|
864 | //STEP 1: DECODE (Includes PREDICT DATA(Includes INVERSE QUANTIZATION)) |
---|
865 | //(Further steps are called inside pastri_float_Decode function) |
---|
866 | pastri_float_Decode(inBuf,p,&bp,outBuf,numReadBytes,patternQ,scalesQ,ECQ); |
---|
867 | |
---|
868 | return; |
---|
869 | } |
---|
870 | |
---|
871 | //inBuf vs Decompressed |
---|
872 | static inline int pastri_float_Check(unsigned char*inBuf,int dataSize,unsigned char*DC,pastri_params *p){ |
---|
873 | int i; |
---|
874 | |
---|
875 | float *data=(float*)(inBuf); |
---|
876 | float *data_dc=(float*)(DC); |
---|
877 | |
---|
878 | //Comparing Indexes: |
---|
879 | /* |
---|
880 | for(i=0;i<p->bSize;i++){ |
---|
881 | if(idx0[i]!=idx0_dc[i]){ |
---|
882 | printf("idx0[%d]=%d != %d=idx0_dc[%d]",i,idx0[i],idx0_dc[i],i); |
---|
883 | assert(0); |
---|
884 | } |
---|
885 | if(idx1[i]!=idx1_dc[i]){ |
---|
886 | printf("idx1[%d]=%d != %d=idx1_dc[%d]",i,idx1[i],idx1_dc[i],i); |
---|
887 | assert(0); |
---|
888 | } |
---|
889 | if(idx2[i]!=idx2_dc[i]){ |
---|
890 | printf("idx2[%d]=%d != %d=idx2_dc[%d]",i,idx2[i],idx2_dc[i],i); |
---|
891 | assert(0); |
---|
892 | } |
---|
893 | if(idx3[i]!=idx3_dc[i]){ |
---|
894 | printf("idx3[%d]=%d != %d=idx3_dc[%d]",i,idx3[i],idx3_dc[i],i); |
---|
895 | assert(0); |
---|
896 | } |
---|
897 | } |
---|
898 | */ |
---|
899 | |
---|
900 | //Comparing Data: |
---|
901 | for(i=0;i<p->bSize;i++){ |
---|
902 | if(abs_FastD(data[i]-data_dc[i])>p->usedEb){ |
---|
903 | printf("|data[%d]-data_dc[%d]|>originalEb : %.3e - %.3e = %.3e > %.3e\n",i,i,data[i],data_dc[i],abs_FastD(data[i]-data_dc[i]),p->usedEb); |
---|
904 | assert(0); |
---|
905 | } |
---|
906 | } |
---|
907 | return 0; |
---|
908 | } |
---|
909 | |
---|
910 | |
---|
911 | #endif |
---|