Context Navigation

source: thirdparty/SZ/sz/src/sz_float.c @ 9ee2ce3

Revision 9ee2ce3, 240.4 KB checked in by Hal Finkel <hfinkel@…>, 6 years ago (diff)
importing new SZ files
Property mode set to `100644`

Line
1	/**
2	* @file sz_float.c
3	* @author Sheng Di, Dingwen Tao, Xin Liang
4	* @date Aug, 2016
5	* @brief SZ_Init, Compression and Decompression functions
6	* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
7	* See COPYRIGHT in top-level directory.
8	*/
9
10
11	#include <stdio.h>
12	#include <stdlib.h>
13	#include <string.h>
14	#include <unistd.h>
15	#include <math.h>
16	#include "sz.h"
17	#include "CompressElement.h"
18	#include "DynamicByteArray.h"
19	#include "DynamicIntArray.h"
20	#include "TightDataPointStorageF.h"
21	#include "sz_float.h"
22	#include "sz_float_pwr.h"
23	#include "szd_float.h"
24	#include "szd_float_pwr.h"
25	#include "zlib.h"
26	#include "rw.h"
27	#include "sz_float_ts.h"
28	#include "utility.h"
29
30	unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize)
31	{
32	outSize = dataLengthsizeof(float);
33	unsigned char* out = (unsigned char)malloc(dataLengthsizeof(float));
34	memcpy(out, data, dataLength*sizeof(float));
35	return out;
36	}
37	unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision)
38	{
39	size_t i = 0, radiusIndex;
40	float pred_value = 0, pred_err;
41	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
42	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
43	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
44	for(i=2;i<dataLength;i++)
45	{
46	if(i%confparams_cpr->sampleDistance==0)
47	{
48	//pred_value = 2*oriData[i-1] - oriData[i-2];
49	pred_value = oriData[i-1];
50	pred_err = fabs(pred_value - oriData[i]);
51	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
52	if(radiusIndex>=confparams_cpr->maxRangeRadius)
53	radiusIndex = confparams_cpr->maxRangeRadius - 1;
54	intervals[radiusIndex]++;
55	}
56	}
57	//compute the appropriate number
58	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
59	size_t sum = 0;
60	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
61	{
62	sum += intervals[i];
63	if(sum>targetCount)
64	break;
65	}
66	if(i>=confparams_cpr->maxRangeRadius)
67	i = confparams_cpr->maxRangeRadius-1;
68
69	unsigned int accIntervals = 2*(i+1);
70	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
71
72	if(powerOf2<32)
73	powerOf2 = 32;
74
75	free(intervals);
76	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
77	return powerOf2;
78	}
79
80	unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, double realPrecision)
81	{
82	size_t i,j, index;
83	size_t radiusIndex;
84	float pred_value = 0, pred_err;
85	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
86	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
87	size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance;
88
89	//float max = oriData[0];
90	//float min = oriData[0];
91
92	for(i=1;i<r1;i++)
93	{
94	for(j=1;j<r2;j++)
95	{
96	if((i+j)%confparams_cpr->sampleDistance==0)
97	{
98	index = i*r2+j;
99	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
100	pred_err = fabs(pred_value - oriData[index]);
101	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
102	if(radiusIndex>=confparams_cpr->maxRangeRadius)
103	radiusIndex = confparams_cpr->maxRangeRadius - 1;
104	intervals[radiusIndex]++;
105
106	// if (max < oriData[index]) max = oriData[index];
107	// if (min > oriData[index]) min = oriData[index];
108	}
109	}
110	}
111	//compute the appropriate number
112	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
113	size_t sum = 0;
114	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
115	{
116	sum += intervals[i];
117	if(sum>targetCount)
118	break;
119	}
120	if(i>=confparams_cpr->maxRangeRadius)
121	i = confparams_cpr->maxRangeRadius-1;
122	unsigned int accIntervals = 2*(i+1);
123	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
124
125	if(powerOf2<32)
126	powerOf2 = 32;
127
128	// struct timeval costStart, costEnd;
129	// double cost_est = 0;
130	//
131	// gettimeofday(&costStart, NULL);
132	//
133	// //compute estimate of bit-rate and distortion
134	// double est_br = 0;
135	// double est_psnr = 0;
136	// double c1 = log2(targetCount)+1;
137	// double c2 = -20.0log10(realPrecision) + 20.0log10(max-min) + 10.0*log10(3);
138	//
139	// for (i = 0; i < powerOf2/2; i++)
140	// {
141	// int count = intervals[i];
142	// if (count != 0)
143	// est_br += count*log2(count);
144	// est_psnr += count;
145	// }
146	//
147	// //compute estimate of bit-rate
148	// est_br -= c1*est_psnr;
149	// est_br /= totalSampleSize;
150	// est_br = -est_br;
151	//
152	// //compute estimate of psnr
153	// est_psnr /= totalSampleSize;
154	// printf ("sum of P(i) = %lf\n", est_psnr);
155	// est_psnr = -10.0*log10(est_psnr);
156	// est_psnr += c2;
157	//
158	// printf ("estimate bitrate = %.2f\n", est_br);
159	// printf ("estimate psnr = %.2f\n",est_psnr);
160	//
161	// gettimeofday(&costEnd, NULL);
162	// cost_est = ((costEnd.tv_sec1000000+costEnd.tv_usec)-(costStart.tv_sec1000000+costStart.tv_usec))/1000000.0;
163	//
164	// printf ("analysis time = %f\n", cost_est);
165
166	free(intervals);
167	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
168	return powerOf2;
169	}
170
171	unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
172	{
173	size_t i,j,k, index;
174	size_t radiusIndex;
175	size_t r23=r2*r3;
176	float pred_value = 0, pred_err;
177	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
178	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
179	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)/confparams_cpr->sampleDistance;
180
181	//float max = oriData[0];
182	//float min = oriData[0];
183
184	for(i=1;i<r1;i++)
185	{
186	for(j=1;j<r2;j++)
187	{
188	for(k=1;k<r3;k++)
189	{
190	if((i+j+k)%confparams_cpr->sampleDistance==0)
191	{
192	index = ir23+jr3+k;
193	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
194	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
195	pred_err = fabs(pred_value - oriData[index]);
196	radiusIndex = (pred_err/realPrecision+1)/2;
197	if(radiusIndex>=confparams_cpr->maxRangeRadius)
198	{
199	radiusIndex = confparams_cpr->maxRangeRadius - 1;
200	//printf("radiusIndex=%d\n", radiusIndex);
201	}
202	intervals[radiusIndex]++;
203
204	// if (max < oriData[index]) max = oriData[index];
205	// if (min > oriData[index]) min = oriData[index];
206	}
207	}
208	}
209	}
210	//compute the appropriate number
211	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
212	size_t sum = 0;
213	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
214	{
215	sum += intervals[i];
216	if(sum>targetCount)
217	break;
218	}
219	if(i>=confparams_cpr->maxRangeRadius)
220	i = confparams_cpr->maxRangeRadius-1;
221	unsigned int accIntervals = 2*(i+1);
222	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
223
224	if(powerOf2<32)
225	powerOf2 = 32;
226
227	// struct timeval costStart, costEnd;
228	// double cost_est = 0;
229	//
230	// gettimeofday(&costStart, NULL);
231	//
232	// //compute estimate of bit-rate and distortion
233	// double est_br = 0;
234	// double est_psnr = 0;
235	// double c1 = log2(targetCount)+1;
236	// double c2 = -20.0log10(realPrecision) + 20.0log10(max-min) + 10.0*log10(3);
237	//
238	// for (i = 0; i < powerOf2/2; i++)
239	// {
240	// int count = intervals[i];
241	// if (count != 0)
242	// est_br += count*log2(count);
243	// est_psnr += count;
244	// }
245	//
246	// //compute estimate of bit-rate
247	// est_br -= c1*est_psnr;
248	// est_br /= totalSampleSize;
249	// est_br = -est_br;
250	//
251	// //compute estimate of psnr
252	// est_psnr /= totalSampleSize;
253	// printf ("sum of P(i) = %lf\n", est_psnr);
254	// est_psnr = -10.0*log10(est_psnr);
255	// est_psnr += c2;
256	//
257	// printf ("estimate bitrate = %.2f\n", est_br);
258	// printf ("estimate psnr = %.2f\n",est_psnr);
259	//
260	// gettimeofday(&costEnd, NULL);
261	// cost_est = ((costEnd.tv_sec1000000+costEnd.tv_usec)-(costStart.tv_sec1000000+costStart.tv_usec))/1000000.0;
262	//
263	// printf ("analysis time = %f\n", cost_est);
264
265	free(intervals);
266	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
267	return powerOf2;
268	}
269
270
271	unsigned int optimize_intervals_float_4D(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision)
272	{
273	size_t i,j,k,l, index;
274	size_t radiusIndex;
275	size_t r234=r2r3r4;
276	size_t r34=r3*r4;
277	float pred_value = 0, pred_err;
278	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
279	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
280	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)*(r4-1)/confparams_cpr->sampleDistance;
281	for(i=1;i<r1;i++)
282	{
283	for(j=1;j<r2;j++)
284	{
285	for(k=1;k<r3;k++)
286	{
287	for (l=1;l<r4;l++)
288	{
289	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
290	{
291	index = ir234+jr34+k*r4+l;
292	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34]
293	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
294	pred_err = fabs(pred_value - oriData[index]);
295	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
296	if(radiusIndex>=confparams_cpr->maxRangeRadius)
297	radiusIndex = confparams_cpr->maxRangeRadius - 1;
298	intervals[radiusIndex]++;
299	}
300	}
301	}
302	}
303	}
304	//compute the appropriate number
305	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
306	size_t sum = 0;
307	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
308	{
309	sum += intervals[i];
310	if(sum>targetCount)
311	break;
312	}
313	if(i>=confparams_cpr->maxRangeRadius)
314	i = confparams_cpr->maxRangeRadius-1;
315
316	unsigned int accIntervals = 2*(i+1);
317	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
318
319	if(powerOf2<32)
320	powerOf2 = 32;
321
322	free(intervals);
323	return powerOf2;
324	}
325
326	TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData,
327	size_t dataLength, double realPrecision, float valueRangeSize, float medianValue_f)
328	{
329	#ifdef HAVE_TIMECMPR
330	float* decData = NULL;
331	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
332	decData = (float*)(multisteps->hist_data);
333	#endif
334
335	unsigned int quantization_intervals;
336	if(exe_params->optQuantMode==1)
337	quantization_intervals = optimize_intervals_float_1D_opt(oriData, dataLength, realPrecision);
338	else
339	quantization_intervals = exe_params->intvCapacity;
340	updateQuantizationInfo(quantization_intervals);
341
342	size_t i;
343	int reqLength;
344	float medianValue = medianValue_f;
345	short radExpo = getExponent_float(valueRangeSize/2);
346
347	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
348
349	int* type = (int) malloc(dataLengthsizeof(int));
350
351	float* spaceFillingValue = oriData; //
352
353	DynamicIntArray *exactLeadNumArray;
354	new_DIA(&exactLeadNumArray, DynArrayInitLen);
355
356	DynamicByteArray *exactMidByteArray;
357	new_DBA(&exactMidByteArray, DynArrayInitLen);
358
359	DynamicIntArray *resiBitArray;
360	new_DIA(&resiBitArray, DynArrayInitLen);
361
362	unsigned char preDataBytes[4];
363	intToBytes_bigEndian(preDataBytes, 0);
364
365	int reqBytesLength = reqLength/8;
366	int resiBitsLength = reqLength%8;
367	float last3CmprsData[3] = {0};
368
369	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
370	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
371
372	//add the first data
373	type[0] = 0;
374	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
375	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
376	memcpy(preDataBytes,vce->curBytes,4);
377	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
378	listAdd_float(last3CmprsData, vce->data);
379	#ifdef HAVE_TIMECMPR
380	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
381	decData[0] = vce->data;
382	#endif
383
384	//add the second data
385	type[1] = 0;
386	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
387	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
388	memcpy(preDataBytes,vce->curBytes,4);
389	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
390	listAdd_float(last3CmprsData, vce->data);
391	#ifdef HAVE_TIMECMPR
392	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
393	decData[1] = vce->data;
394	#endif
395	int state;
396	double checkRadius;
397	float curData;
398	float pred;
399	float predAbsErr;
400	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
401	double interval = 2*realPrecision;
402
403	for(i=2;i<dataLength;i++)
404	{
405	curData = spaceFillingValue[i];
406	//pred = 2*last3CmprsData[0] - last3CmprsData[1];
407	pred = last3CmprsData[0];
408	predAbsErr = fabs(curData - pred);
409	if(predAbsErr<checkRadius)
410	{
411	state = (predAbsErr/realPrecision+1)/2;
412	if(curData>=pred)
413	{
414	type[i] = exe_params->intvRadius+state;
415	pred = pred + state*interval;
416	}
417	else //curData<pred
418	{
419	type[i] = exe_params->intvRadius-state;
420	pred = pred - state*interval;
421	}
422
423	//double-check the prediction error in case of machine-epsilon impact
424	if(fabs(curData-pred)>realPrecision)
425	{
426	type[i] = 0;
427	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
428	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
429	memcpy(preDataBytes,vce->curBytes,4);
430	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
431
432	listAdd_float(last3CmprsData, vce->data);
433	#ifdef HAVE_TIMECMPR
434	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
435	decData[i] = vce->data;
436	#endif
437	}
438	else
439	{
440	listAdd_float(last3CmprsData, pred);
441	#ifdef HAVE_TIMECMPR
442	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
443	decData[i] = pred;
444	#endif
445	}
446	continue;
447	}
448
449	//unpredictable data processing
450	type[i] = 0;
451	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
452	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
453	memcpy(preDataBytes,vce->curBytes,4);
454	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
455
456	listAdd_float(last3CmprsData, vce->data);
457	#ifdef HAVE_TIMECMPR
458	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
459	decData[i] = vce->data;
460	#endif
461
462	}//end of for
463
464	// char* expSegmentsInBytes;
465	// int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes);
466	size_t exactDataNum = exactLeadNumArray->size;
467
468	TightDataPointStorageF* tdps;
469
470	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
471	type, exactMidByteArray->array, exactMidByteArray->size,
472	exactLeadNumArray->array,
473	resiBitArray->array, resiBitArray->size,
474	resiBitsLength,
475	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
476
477	//sdi:Debug
478	/* int sum =0;
479	for(i=0;i<dataLength;i++)
480	if(type[i]==0) sum++;
481	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
482
483	//free memory
484	free_DIA(exactLeadNumArray);
485	free_DIA(resiBitArray);
486	free(type);
487	free(vce);
488	free(lce);
489	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
490
491	return tdps;
492	}
493
494	void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, TightDataPointStorageF* tdps,
495	unsigned char** newByteData, size_t *outSize)
496	{
497	int floatSize=sizeof(float);
498	size_t k = 0, i;
499	tdps->isLossless = 1;
500	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + floatSize*dataLength;
501	newByteData = (unsigned char)malloc(totalByteLength);
502
503	unsigned char dsLengthBytes[8];
504	for (i = 0; i < 3; i++)//3
505	(*newByteData)[k++] = versionNumber[i];
506
507	if(exe_params->SZ_SIZE_TYPE==4)//1
508	(*newByteData)[k++] = 16; //00010000
509	else
510	(*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8
511
512	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
513	k = k + MetaDataByteLength;
514
515	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
516	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
517	(*newByteData)[k++] = dsLengthBytes[i];
518
519	if(sysEndianType==BIG_ENDIAN_SYSTEM)
520	memcpy((newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLengthfloatSize);
521	else
522	{
523	unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
524	for(i=0;i<dataLength;i++,p+=floatSize)
525	floatToBytes(p, oriData[i]);
526	}
527	*outSize = totalByteLength;
528	}
529
530	char SZ_compress_args_float_NoCkRngeNoGzip_1D(unsigned char** newByteData, float *oriData,
531	size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f)
532	{
533	char compressionType = 0;
534	TightDataPointStorageF* tdps = NULL;
535
536	#ifdef HAVE_TIMECMPR
537	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
538	{
539	int timestep = sz_tsc->currentStep;
540	if(timestep % confparams_cpr->snapshotCmprStep != 0)
541	{
542	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
543	compressionType = 1; //time-series based compression
544	}
545	else
546	{
547	tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
548	compressionType = 0; //snapshot-based compression
549	multisteps->lastSnapshotStep = timestep;
550	}
551	}
552	else
553	#endif
554	tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
555
556	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
557
558	if(outSize>dataLengthsizeof(float))
559	SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
560
561	free_TightDataPointStorageF(tdps);
562	return compressionType;
563	}
564
565	TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size_t r2, double realPrecision, float valueRangeSize, float medianValue_f)
566	{
567	#ifdef HAVE_TIMECMPR
568	float* decData = NULL;
569	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
570	decData = (float*)(multisteps->hist_data);
571	#endif
572
573	unsigned int quantization_intervals;
574	if(exe_params->optQuantMode==1)
575	{
576	quantization_intervals = optimize_intervals_float_2D_opt(oriData, r1, r2, realPrecision);
577	updateQuantizationInfo(quantization_intervals);
578	}
579	else
580	quantization_intervals = exe_params->intvCapacity;
581	size_t i,j;
582	int reqLength;
583	float pred1D, pred2D;
584	float diff = 0.0;
585	double itvNum = 0;
586	float P0, P1;
587
588	size_t dataLength = r1*r2;
589
590	P0 = (float)malloc(r2sizeof(float));
591	memset(P0, 0, r2*sizeof(float));
592	P1 = (float)malloc(r2sizeof(float));
593	memset(P1, 0, r2*sizeof(float));
594
595	float medianValue = medianValue_f;
596	short radExpo = getExponent_float(valueRangeSize/2);
597	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
598
599	int* type = (int) malloc(dataLengthsizeof(int));
600	//type[dataLength]=0;
601
602	float* spaceFillingValue = oriData; //
603
604	DynamicIntArray *exactLeadNumArray;
605	new_DIA(&exactLeadNumArray, DynArrayInitLen);
606
607	DynamicByteArray *exactMidByteArray;
608	new_DBA(&exactMidByteArray, DynArrayInitLen);
609
610	DynamicIntArray *resiBitArray;
611	new_DIA(&resiBitArray, DynArrayInitLen);
612
613	type[0] = 0;
614	unsigned char preDataBytes[4];
615	intToBytes_bigEndian(preDataBytes, 0);
616
617	int reqBytesLength = reqLength/8;
618	int resiBitsLength = reqLength%8;
619
620	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
621	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
622
623	/* Process Row-0 data 0*/
624	type[0] = 0;
625	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
626	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
627	memcpy(preDataBytes,vce->curBytes,4);
628	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
629	P1[0] = vce->data;
630	#ifdef HAVE_TIMECMPR
631	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
632	decData[0] = vce->data;
633	#endif
634
635	float curData;
636
637	/* Process Row-0 data 1*/
638	pred1D = P1[0];
639	curData = spaceFillingValue[1];
640	diff = curData - pred1D;
641
642	itvNum = fabs(diff)/realPrecision + 1;
643
644	if (itvNum < exe_params->intvCapacity)
645	{
646	if (diff < 0) itvNum = -itvNum;
647	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
648	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
649
650	//ganrantee comporession error against the case of machine-epsilon
651	if(fabs(spaceFillingValue[1]-P1[1])>realPrecision)
652	{
653	type[1] = 0;
654	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
655	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
656	memcpy(preDataBytes,vce->curBytes,4);
657	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
658
659	P1[1] = vce->data;
660	}
661	}
662	else
663	{
664	type[1] = 0;
665	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
666	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
667	memcpy(preDataBytes,vce->curBytes,4);
668	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
669	P1[1] = vce->data;
670	}
671	#ifdef HAVE_TIMECMPR
672	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
673	decData[1] = P1[1];
674	#endif
675
676	/* Process Row-0 data 2 --> data r2-1 */
677	for (j = 2; j < r2; j++)
678	{
679	pred1D = 2*P1[j-1] - P1[j-2];
680	curData = spaceFillingValue[j];
681	diff = curData - pred1D;
682
683	itvNum = fabs(diff)/realPrecision + 1;
684
685	if (itvNum < exe_params->intvCapacity)
686	{
687	if (diff < 0) itvNum = -itvNum;
688	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
689	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
690
691	//ganrantee comporession error against the case of machine-epsilon
692	if(fabs(curData-P1[j])>realPrecision)
693	{
694	type[j] = 0;
695	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
696	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
697	memcpy(preDataBytes,vce->curBytes,4);
698	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
699
700	P1[j] = vce->data;
701	}
702	}
703	else
704	{
705	type[j] = 0;
706	compressSingleFloatValue(vce,curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
707	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
708	memcpy(preDataBytes,vce->curBytes,4);
709	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
710	P1[j] = vce->data;
711	}
712	#ifdef HAVE_TIMECMPR
713	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
714	decData[j] = P1[j];
715	#endif
716	}
717
718	/* Process Row-1 --> Row-r1-1 */
719	size_t index;
720	for (i = 1; i < r1; i++)
721	{
722	/* Process row-i data 0 */
723	index = i*r2;
724	pred1D = P1[0];
725	curData = spaceFillingValue[index];
726	diff = curData - pred1D;
727
728	itvNum = fabs(diff)/realPrecision + 1;
729
730	if (itvNum < exe_params->intvCapacity)
731	{
732	if (diff < 0) itvNum = -itvNum;
733	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
734	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
735
736	//ganrantee comporession error against the case of machine-epsilon
737	if(fabs(curData-P0[0])>realPrecision)
738	{
739	type[index] = 0;
740	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
741	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
742	memcpy(preDataBytes,vce->curBytes,4);
743	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
744
745	P0[0] = vce->data;
746	}
747	}
748	else
749	{
750	type[index] = 0;
751	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
752	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
753	memcpy(preDataBytes,vce->curBytes,4);
754	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
755	P0[0] = vce->data;
756	}
757	#ifdef HAVE_TIMECMPR
758	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
759	decData[index] = P0[0];
760	#endif
761
762	/* Process row-i data 1 --> r2-1*/
763	for (j = 1; j < r2; j++)
764	{
765	index = i*r2+j;
766	pred2D = P0[j-1] + P1[j] - P1[j-1];
767
768	curData = spaceFillingValue[index];
769	diff = curData - pred2D;
770
771	itvNum = fabs(diff)/realPrecision + 1;
772
773	if (itvNum < exe_params->intvCapacity)
774	{
775	if (diff < 0) itvNum = -itvNum;
776	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
777	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
778
779	//ganrantee comporession error against the case of machine-epsilon
780	if(fabs(curData-P0[j])>realPrecision)
781	{
782	type[index] = 0;
783	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
784	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
785	memcpy(preDataBytes,vce->curBytes,4);
786	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
787
788	P0[j] = vce->data;
789	}
790	}
791	else
792	{
793	type[index] = 0;
794	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
795	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
796	memcpy(preDataBytes,vce->curBytes,4);
797	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
798	P0[j] = vce->data;
799	}
800	#ifdef HAVE_TIMECMPR
801	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
802	decData[index] = P0[j];
803	#endif
804	}
805
806	float *Pt;
807	Pt = P1;
808	P1 = P0;
809	P0 = Pt;
810	}
811
812	if(r2!=1)
813	free(P0);
814	free(P1);
815	size_t exactDataNum = exactLeadNumArray->size;
816
817	TightDataPointStorageF* tdps;
818
819	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
820	type, exactMidByteArray->array, exactMidByteArray->size,
821	exactLeadNumArray->array,
822	resiBitArray->array, resiBitArray->size,
823	resiBitsLength,
824	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
825
826	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
827	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
828
829	// for(i = 3800;i<3844;i++)
830	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
831
832	//free memory
833	free_DIA(exactLeadNumArray);
834	free_DIA(resiBitArray);
835	free(type);
836	free(vce);
837	free(lce);
838	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
839
840	return tdps;
841	}
842
843	/**
844	*
845	* Note: @r1 is high dimension
846	* @r2 is low dimension
847	* */
848	char SZ_compress_args_float_NoCkRngeNoGzip_2D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
849	{
850	size_t dataLength = r1*r2;
851	char compressionType = 0;
852	TightDataPointStorageF* tdps = NULL;
853
854	#ifdef HAVE_TIMECMPR
855	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
856	{
857	int timestep = sz_tsc->currentStep;
858	if(timestep % confparams_cpr->snapshotCmprStep != 0)
859	{
860	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
861	compressionType = 1; //time-series based compression
862	}
863	else
864	{
865	tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
866	compressionType = 0; //snapshot-based compression
867	multisteps->lastSnapshotStep = timestep;
868	}
869	}
870	else
871	#endif
872	tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
873
874	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
875
876	if(outSize>dataLengthsizeof(float))
877	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
878
879	free_TightDataPointStorageF(tdps);
880
881	return compressionType;
882	}
883
884	TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float valueRangeSize, float medianValue_f)
885	{
886	#ifdef HAVE_TIMECMPR
887	float* decData = NULL;
888	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
889	decData = (float*)(multisteps->hist_data);
890	#endif
891
892	unsigned int quantization_intervals;
893	if(exe_params->optQuantMode==1)
894	{
895	quantization_intervals = optimize_intervals_float_3D_opt(oriData, r1, r2, r3, realPrecision);
896	updateQuantizationInfo(quantization_intervals);
897	}
898	else
899	quantization_intervals = exe_params->intvCapacity;
900	size_t i,j,k;
901	int reqLength;
902	float pred1D, pred2D, pred3D;
903	float diff = 0.0;
904	double itvNum = 0;
905	float P0, P1;
906
907	size_t dataLength = r1r2r3;
908	size_t r23 = r2*r3;
909	P0 = (float)malloc(r23sizeof(float));
910	P1 = (float)malloc(r23sizeof(float));
911
912	float medianValue = medianValue_f;
913	short radExpo = getExponent_float(valueRangeSize/2);
914	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
915
916	int* type = (int) malloc(dataLengthsizeof(int));
917
918	float* spaceFillingValue = oriData; //
919
920	DynamicIntArray *exactLeadNumArray;
921	new_DIA(&exactLeadNumArray, DynArrayInitLen);
922
923	DynamicByteArray *exactMidByteArray;
924	new_DBA(&exactMidByteArray, DynArrayInitLen);
925
926	DynamicIntArray *resiBitArray;
927	new_DIA(&resiBitArray, DynArrayInitLen);
928
929	unsigned char preDataBytes[4];
930	intToBytes_bigEndian(preDataBytes, 0);
931
932	int reqBytesLength = reqLength/8;
933	int resiBitsLength = reqLength%8;
934
935	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
936	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
937
938
939	/////////////////////////// Process layer-0 ///////////////////////////
940	/* Process Row-0 data 0*/
941	type[0] = 0;
942	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
943	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
944	memcpy(preDataBytes,vce->curBytes,4);
945	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
946	P1[0] = vce->data;
947	#ifdef HAVE_TIMECMPR
948	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
949	decData[0] = P1[0];
950	#endif
951
952	float curData;
953
954	/* Process Row-0 data 1*/
955	pred1D = P1[0];
956	curData = spaceFillingValue[1];
957	diff = curData - pred1D;
958
959	itvNum = fabs(diff)/realPrecision + 1;
960
961	if (itvNum < exe_params->intvCapacity)
962	{
963	if (diff < 0) itvNum = -itvNum;
964	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
965	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
966
967	//ganrantee comporession error against the case of machine-epsilon
968	if(fabs(curData-P1[1])>realPrecision)
969	{
970	type[1] = 0;
971	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
972	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
973	memcpy(preDataBytes,vce->curBytes,4);
974	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
975
976	P1[1] = vce->data;
977	}
978	}
979	else
980	{
981	type[1] = 0;
982	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
983	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
984	memcpy(preDataBytes,vce->curBytes,4);
985	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
986	P1[1] = vce->data;
987	}
988	#ifdef HAVE_TIMECMPR
989	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
990	decData[1] = P1[1];
991	#endif
992
993	/* Process Row-0 data 2 --> data r3-1 */
994	for (j = 2; j < r3; j++)
995	{
996	pred1D = 2*P1[j-1] - P1[j-2];
997	curData = spaceFillingValue[j];
998	diff = curData - pred1D;
999
1000	itvNum = fabs(diff)/realPrecision + 1;
1001
1002	if (itvNum < exe_params->intvCapacity)
1003	{
1004	if (diff < 0) itvNum = -itvNum;
1005	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
1006	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
1007
1008	//ganrantee comporession error against the case of machine-epsilon
1009	if(fabs(curData-P1[j])>realPrecision)
1010	{
1011	type[j] = 0;
1012	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1013	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1014	memcpy(preDataBytes,vce->curBytes,4);
1015	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1016
1017	P1[j] = vce->data;
1018	}
1019	}
1020	else
1021	{
1022	type[j] = 0;
1023	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1024	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1025	memcpy(preDataBytes,vce->curBytes,4);
1026	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1027	P1[j] = vce->data;
1028	}
1029	#ifdef HAVE_TIMECMPR
1030	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1031	decData[j] = P1[j];
1032	#endif
1033	}
1034
1035	/* Process Row-1 --> Row-r2-1 */
1036	size_t index;
1037	for (i = 1; i < r2; i++)
1038	{
1039	/* Process row-i data 0 */
1040	index = i*r3;
1041	pred1D = P1[index-r3];
1042	curData = spaceFillingValue[index];
1043	diff = curData - pred1D;
1044
1045	itvNum = fabs(diff)/realPrecision + 1;
1046
1047	if (itvNum < exe_params->intvCapacity)
1048	{
1049	if (diff < 0) itvNum = -itvNum;
1050	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1051	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1052
1053	//ganrantee comporession error against the case of machine-epsilon
1054	if(fabs(curData-P1[index])>realPrecision)
1055	{
1056	type[index] = 0;
1057	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1058	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1059	memcpy(preDataBytes,vce->curBytes,4);
1060	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1061
1062	P1[index] = vce->data;
1063	}
1064	}
1065	else
1066	{
1067	type[index] = 0;
1068	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1069	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1070	memcpy(preDataBytes,vce->curBytes,4);
1071	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1072	P1[index] = vce->data;
1073	}
1074	#ifdef HAVE_TIMECMPR
1075	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1076	decData[index] = P1[index];
1077	#endif
1078
1079	/* Process row-i data 1 --> data r3-1*/
1080	for (j = 1; j < r3; j++)
1081	{
1082	index = i*r3+j;
1083	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
1084
1085	curData = spaceFillingValue[index];
1086	diff = curData - pred2D;
1087
1088	itvNum = fabs(diff)/realPrecision + 1;
1089
1090	if (itvNum < exe_params->intvCapacity)
1091	{
1092	if (diff < 0) itvNum = -itvNum;
1093	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1094	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1095
1096	//ganrantee comporession error against the case of machine-epsilon
1097	if(fabs(curData-P1[index])>realPrecision)
1098	{
1099	type[index] = 0;
1100	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1101	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1102	memcpy(preDataBytes,vce->curBytes,4);
1103	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1104
1105	P1[index] = vce->data;
1106	}
1107	}
1108	else
1109	{
1110	type[index] = 0;
1111	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1112	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1113	memcpy(preDataBytes,vce->curBytes,4);
1114	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1115	P1[index] = vce->data;
1116	}
1117	#ifdef HAVE_TIMECMPR
1118	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1119	decData[index] = P1[index];
1120	#endif
1121	}
1122	}
1123
1124
1125	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
1126
1127	for (k = 1; k < r1; k++)
1128	{
1129	/* Process Row-0 data 0*/
1130	index = k*r23;
1131	pred1D = P1[0];
1132	curData = spaceFillingValue[index];
1133	diff = curData - pred1D;
1134
1135	itvNum = fabs(diff)/realPrecision + 1;
1136
1137	if (itvNum < exe_params->intvCapacity)
1138	{
1139	if (diff < 0) itvNum = -itvNum;
1140	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1141	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1142
1143	//ganrantee comporession error against the case of machine-epsilon
1144	if(fabs(curData-P0[0])>realPrecision)
1145	{
1146	type[index] = 0;
1147	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1148	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1149	memcpy(preDataBytes,vce->curBytes,4);
1150	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1151
1152	P0[0] = vce->data;
1153	}
1154	}
1155	else
1156	{
1157	type[index] = 0;
1158	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1159	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1160	memcpy(preDataBytes,vce->curBytes,4);
1161	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1162	P0[0] = vce->data;
1163	}
1164	#ifdef HAVE_TIMECMPR
1165	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1166	decData[index] = P0[0];
1167	#endif
1168
1169	/* Process Row-0 data 1 --> data r3-1 */
1170	for (j = 1; j < r3; j++)
1171	{
1172	//index = kr2r3+j;
1173	index ++;
1174	pred2D = P0[j-1] + P1[j] - P1[j-1];
1175	curData = spaceFillingValue[index];
1176	diff = spaceFillingValue[index] - pred2D;
1177
1178	itvNum = fabs(diff)/realPrecision + 1;
1179
1180	if (itvNum < exe_params->intvCapacity)
1181	{
1182	if (diff < 0) itvNum = -itvNum;
1183	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1184	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1185	//ganrantee comporession error against the case of machine-epsilon
1186	if(fabs(curData-P0[j])>realPrecision)
1187	{
1188	type[index] = 0;
1189	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1190	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1191	memcpy(preDataBytes,vce->curBytes,4);
1192	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1193
1194	P0[j] = vce->data;
1195	}
1196	}
1197	else
1198	{
1199	type[index] = 0;
1200	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1201	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1202	memcpy(preDataBytes,vce->curBytes,4);
1203	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1204	P0[j] = vce->data;
1205	}
1206	#ifdef HAVE_TIMECMPR
1207	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1208	decData[index] = P0[j];
1209	#endif
1210	}
1211
1212	/* Process Row-1 --> Row-r2-1 */
1213	size_t index2D;
1214	for (i = 1; i < r2; i++)
1215	{
1216	/* Process Row-i data 0 */
1217	index = kr23 + ir3;
1218	index2D = i*r3;
1219	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
1220	curData = spaceFillingValue[index];
1221	diff = spaceFillingValue[index] - pred2D;
1222
1223	itvNum = fabs(diff)/realPrecision + 1;
1224
1225	if (itvNum < exe_params->intvCapacity)
1226	{
1227	if (diff < 0) itvNum = -itvNum;
1228	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1229	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1230	//ganrantee comporession error against the case of machine-epsilon
1231	if(fabs(curData-P0[index2D])>realPrecision)
1232	{
1233	type[index] = 0;
1234	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1235	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1236	memcpy(preDataBytes,vce->curBytes,4);
1237	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1238
1239	P0[index2D] = vce->data;
1240	}
1241	}
1242	else
1243	{
1244	type[index] = 0;
1245	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1246	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1247	memcpy(preDataBytes,vce->curBytes,4);
1248	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1249	P0[index2D] = vce->data;
1250	}
1251	#ifdef HAVE_TIMECMPR
1252	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1253	decData[index] = P0[index2D];
1254	#endif
1255
1256	/* Process Row-i data 1 --> data r3-1 */
1257	for (j = 1; j < r3; j++)
1258	{
1259	// if(k==63&&i==43&&j==27)
1260	// printf("i=%d\n", i);
1261	//index = kr2r3 + i*r3 + j;
1262	index ++;
1263	index2D = i*r3 + j;
1264	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
1265	curData = spaceFillingValue[index];
1266	diff = curData - pred3D;
1267
1268	itvNum = fabs(diff)/realPrecision + 1;
1269
1270	if (itvNum < exe_params->intvCapacity)
1271	{
1272	if (diff < 0) itvNum = -itvNum;
1273	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1274	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1275
1276	//ganrantee comporession error against the case of machine-epsilon
1277	if(fabs(curData-P0[index2D])>realPrecision)
1278	{
1279	type[index] = 0;
1280	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1281	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1282	memcpy(preDataBytes,vce->curBytes,4);
1283	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1284
1285	P0[index2D] = vce->data;
1286	}
1287	}
1288	else
1289	{
1290	type[index] = 0;
1291	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1292	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1293	memcpy(preDataBytes,vce->curBytes,4);
1294	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1295	P0[index2D] = vce->data;
1296	}
1297	#ifdef HAVE_TIMECMPR
1298	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1299	decData[index] = P0[index2D];
1300	#endif
1301	}
1302	}
1303
1304	float *Pt;
1305	Pt = P1;
1306	P1 = P0;
1307	P0 = Pt;
1308	}
1309	if(r23!=1)
1310	free(P0);
1311	free(P1);
1312	size_t exactDataNum = exactLeadNumArray->size;
1313
1314	TightDataPointStorageF* tdps;
1315
1316	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
1317	type, exactMidByteArray->array, exactMidByteArray->size,
1318	exactLeadNumArray->array,
1319	resiBitArray->array, resiBitArray->size,
1320	resiBitsLength,
1321	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1322
1323	//sdi:Debug
1324	/* int sum =0;
1325	for(i=0;i<dataLength;i++)
1326	if(type[i]==0) sum++;
1327	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
1328
1329
1330	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
1331	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
1332
1333	//free memory
1334	free_DIA(exactLeadNumArray);
1335	free_DIA(resiBitArray);
1336	free(type);
1337	free(vce);
1338	free(lce);
1339	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1340
1341	return tdps;
1342	}
1343
1344	char SZ_compress_args_float_NoCkRngeNoGzip_3D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
1345	{
1346	size_t dataLength = r1r2r3;
1347	char compressionType = 0;
1348	TightDataPointStorageF* tdps = NULL;
1349
1350	#ifdef HAVE_TIMECMPR
1351	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1352	{
1353	int timestep = sz_tsc->currentStep;
1354	if(timestep % confparams_cpr->snapshotCmprStep != 0)
1355	{
1356	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
1357	compressionType = 1; //time-series based compression
1358	}
1359	else
1360	{
1361	if(sz_with_regression == SZ_NO_REGRESSION)
1362	tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
1363	else
1364	*newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
1365	compressionType = 0; //snapshot-based compression
1366	multisteps->lastSnapshotStep = timestep;
1367	}
1368	}
1369	else
1370	#endif
1371	tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
1372
1373	if(tdps!=NULL)
1374	{
1375	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
1376	if(outSize>dataLengthsizeof(float))
1377	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1378	free_TightDataPointStorageF(tdps);
1379	}
1380
1381	return compressionType;
1382	}
1383
1384
1385	TightDataPointStorageF* SZ_compress_float_4D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, float valueRangeSize, float medianValue_f)
1386	{
1387	unsigned int quantization_intervals;
1388	if(exe_params->optQuantMode==1)
1389	{
1390	quantization_intervals = optimize_intervals_float_4D(oriData, r1, r2, r3, r4, realPrecision);
1391	updateQuantizationInfo(quantization_intervals);
1392	}
1393	else
1394	quantization_intervals = exe_params->intvCapacity;
1395
1396	size_t i,j,k;
1397	int reqLength;
1398	float pred1D, pred2D, pred3D;
1399	float diff = 0.0;
1400	double itvNum = 0;
1401	float P0, P1;
1402
1403	size_t dataLength = r1r2r3*r4;
1404
1405	size_t r234 = r2r3r4;
1406	size_t r34 = r3*r4;
1407
1408	P0 = (float)malloc(r34sizeof(float));
1409	P1 = (float)malloc(r34sizeof(float));
1410
1411	float medianValue = medianValue_f;
1412	short radExpo = getExponent_float(valueRangeSize/2);
1413	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
1414
1415	int* type = (int) malloc(dataLengthsizeof(int));
1416
1417	float* spaceFillingValue = oriData; //
1418
1419	DynamicIntArray *exactLeadNumArray;
1420	new_DIA(&exactLeadNumArray, DynArrayInitLen);
1421
1422	DynamicByteArray *exactMidByteArray;
1423	new_DBA(&exactMidByteArray, DynArrayInitLen);
1424
1425	DynamicIntArray *resiBitArray;
1426	new_DIA(&resiBitArray, DynArrayInitLen);
1427
1428	unsigned char preDataBytes[4];
1429	intToBytes_bigEndian(preDataBytes, 0);
1430
1431	int reqBytesLength = reqLength/8;
1432	int resiBitsLength = reqLength%8;
1433
1434	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
1435	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
1436
1437
1438	size_t l;
1439	for (l = 0; l < r1; l++)
1440	{
1441
1442	/////////////////////////// Process layer-0 ///////////////////////////
1443	/* Process Row-0 data 0*/
1444	size_t index = l*r234;
1445	size_t index2D = 0;
1446
1447	type[index] = 0;
1448	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1449	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1450	memcpy(preDataBytes,vce->curBytes,4);
1451	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1452	P1[index2D] = vce->data;
1453
1454	/* Process Row-0 data 1*/
1455	index = l*r234+1;
1456	index2D = 1;
1457
1458	pred1D = P1[index2D-1];
1459	diff = spaceFillingValue[index] - pred1D;
1460
1461	itvNum = fabs(diff)/realPrecision + 1;
1462
1463	if (itvNum < exe_params->intvCapacity)
1464	{
1465	if (diff < 0) itvNum = -itvNum;
1466	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1467	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1468	}
1469	else
1470	{
1471	type[index] = 0;
1472	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1473	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1474	memcpy(preDataBytes,vce->curBytes,4);
1475	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1476	P1[index2D] = vce->data;
1477	}
1478
1479	/* Process Row-0 data 2 --> data r4-1 */
1480	for (j = 2; j < r4; j++)
1481	{
1482	index = l*r234+j;
1483	index2D = j;
1484
1485	pred1D = 2*P1[index2D-1] - P1[index2D-2];
1486	diff = spaceFillingValue[index] - pred1D;
1487
1488	itvNum = fabs(diff)/realPrecision + 1;
1489
1490	if (itvNum < exe_params->intvCapacity)
1491	{
1492	if (diff < 0) itvNum = -itvNum;
1493	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1494	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1495	}
1496	else
1497	{
1498	type[index] = 0;
1499	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1500	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1501	memcpy(preDataBytes,vce->curBytes,4);
1502	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1503	P1[index2D] = vce->data;
1504	}
1505	}
1506
1507	/* Process Row-1 --> Row-r3-1 */
1508	for (i = 1; i < r3; i++)
1509	{
1510	/* Process row-i data 0 */
1511	index = lr234+ir4;
1512	index2D = i*r4;
1513
1514	pred1D = P1[index2D-r4];
1515	diff = spaceFillingValue[index] - pred1D;
1516
1517	itvNum = fabs(diff)/realPrecision + 1;
1518
1519	if (itvNum < exe_params->intvCapacity)
1520	{
1521	if (diff < 0) itvNum = -itvNum;
1522	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1523	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1524	}
1525	else
1526	{
1527	type[index] = 0;
1528	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1529	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1530	memcpy(preDataBytes,vce->curBytes,4);
1531	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1532	P1[index2D] = vce->data;
1533	}
1534
1535	/* Process row-i data 1 --> data r4-1*/
1536	for (j = 1; j < r4; j++)
1537	{
1538	index = lr234+ir4+j;
1539	index2D = i*r4+j;
1540
1541	pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1];
1542
1543	diff = spaceFillingValue[index] - pred2D;
1544
1545	itvNum = fabs(diff)/realPrecision + 1;
1546
1547	if (itvNum < exe_params->intvCapacity)
1548	{
1549	if (diff < 0) itvNum = -itvNum;
1550	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1551	P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1552	}
1553	else
1554	{
1555	type[index] = 0;
1556	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1557	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1558	memcpy(preDataBytes,vce->curBytes,4);
1559	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1560	P1[index2D] = vce->data;
1561	}
1562	}
1563	}
1564
1565
1566	/////////////////////////// Process layer-1 --> layer-r2-1 ///////////////////////////
1567
1568	for (k = 1; k < r2; k++)
1569	{
1570	/* Process Row-0 data 0*/
1571	index = lr234+kr34;
1572	index2D = 0;
1573
1574	pred1D = P1[index2D];
1575	diff = spaceFillingValue[index] - pred1D;
1576
1577	itvNum = fabs(diff)/realPrecision + 1;
1578
1579	if (itvNum < exe_params->intvCapacity)
1580	{
1581	if (diff < 0) itvNum = -itvNum;
1582	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1583	P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1584	}
1585	else
1586	{
1587	type[index] = 0;
1588	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1589	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1590	memcpy(preDataBytes,vce->curBytes,4);
1591	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1592	P0[index2D] = vce->data;
1593	}
1594
1595	/* Process Row-0 data 1 --> data r4-1 */
1596	for (j = 1; j < r4; j++)
1597	{
1598	index = lr234+kr34+j;
1599	index2D = j;
1600
1601	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
1602	diff = spaceFillingValue[index] - pred2D;
1603
1604	itvNum = fabs(diff)/realPrecision + 1;
1605
1606	if (itvNum < exe_params->intvCapacity)
1607	{
1608	if (diff < 0) itvNum = -itvNum;
1609	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1610	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1611	}
1612	else
1613	{
1614	type[index] = 0;
1615	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1616	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1617	memcpy(preDataBytes,vce->curBytes,4);
1618	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1619	P0[index2D] = vce->data;
1620	}
1621	}
1622
1623	/* Process Row-1 --> Row-r3-1 */
1624	for (i = 1; i < r3; i++)
1625	{
1626	/* Process Row-i data 0 */
1627	index = lr234+kr34+i*r4;
1628	index2D = i*r4;
1629
1630	pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4];
1631	diff = spaceFillingValue[index] - pred2D;
1632
1633	itvNum = fabs(diff)/realPrecision + 1;
1634
1635	if (itvNum < exe_params->intvCapacity)
1636	{
1637	if (diff < 0) itvNum = -itvNum;
1638	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1639	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1640	}
1641	else
1642	{
1643	type[index] = 0;
1644	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1645	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1646	memcpy(preDataBytes,vce->curBytes,4);
1647	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1648	P0[index2D] = vce->data;
1649	}
1650
1651	/* Process Row-i data 1 --> data r4-1 */
1652	for (j = 1; j < r4; j++)
1653	{
1654	index = lr234+kr34+i*r4+j;
1655	index2D = i*r4+j;
1656
1657	pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1];
1658	diff = spaceFillingValue[index] - pred3D;
1659
1660
1661	itvNum = fabs(diff)/realPrecision + 1;
1662
1663	if (itvNum < exe_params->intvCapacity)
1664	{
1665	if (diff < 0) itvNum = -itvNum;
1666	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1667	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1668	}
1669	else
1670	{
1671	type[index] = 0;
1672	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1673	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1674	memcpy(preDataBytes,vce->curBytes,4);
1675	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1676	P0[index2D] = vce->data;
1677	}
1678	}
1679	}
1680
1681	float *Pt;
1682	Pt = P1;
1683	P1 = P0;
1684	P0 = Pt;
1685	}
1686	}
1687
1688	free(P0);
1689	free(P1);
1690	size_t exactDataNum = exactLeadNumArray->size;
1691
1692	TightDataPointStorageF* tdps;
1693
1694	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
1695	type, exactMidByteArray->array, exactMidByteArray->size,
1696	exactLeadNumArray->array,
1697	resiBitArray->array, resiBitArray->size,
1698	resiBitsLength,
1699	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1700
1701	//free memory
1702	free_DIA(exactLeadNumArray);
1703	free_DIA(resiBitArray);
1704	free(type);
1705	free(vce);
1706	free(lce);
1707	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1708
1709	return tdps;
1710	}
1711
1712	char SZ_compress_args_float_NoCkRngeNoGzip_4D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
1713	{
1714	TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_f);
1715
1716	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
1717
1718	int dataLength = r1r2r3*r4;
1719	if(outSize>dataLengthsizeof(float))
1720	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1721
1722	free_TightDataPointStorageF(tdps);
1723
1724	return 0;
1725	}
1726
1727	void SZ_compress_args_float_withinRange(unsigned char** newByteData, float oriData, size_t dataLength, size_t outSize)
1728	{
1729	TightDataPointStorageF* tdps = (TightDataPointStorageF*) malloc(sizeof(TightDataPointStorageF));
1730	tdps->rtypeArray = NULL;
1731	tdps->typeArray = NULL;
1732	tdps->leadNumArray = NULL;
1733	tdps->residualMidBits = NULL;
1734
1735	tdps->allSameData = 1;
1736	tdps->dataSeriesLength = dataLength;
1737	tdps->exactMidBytes = (unsigned char)malloc(sizeof(unsigned char)4);
1738	tdps->pwrErrBoundBytes = NULL;
1739	tdps->isLossless = 0;
1740	float value = oriData[0];
1741	floatToBytes(tdps->exactMidBytes, value);
1742	tdps->exactMidBytes_size = 4;
1743
1744	size_t tmpOutSize;
1745	//unsigned char *tmpByteData;
1746	convertTDPStoFlatBytes_float(tdps, newByteData, &tmpOutSize);
1747
1748	//newByteData = (unsigned char)malloc(sizeof(unsigned char)*12); //for floating-point data (1+3+4+4)
1749	//memcpy(*newByteData, tmpByteData, 12);
1750	*outSize = tmpOutSize; //8+SZ_SIZE_TYPE; //8==3+1+4(float_size)
1751	free_TightDataPointStorageF(tdps);
1752	}
1753
1754	int SZ_compress_args_float_wRngeNoGzip(unsigned char** newByteData, float *oriData,
1755	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1756	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
1757	{
1758	int status = SZ_SCES;
1759	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1760	float valueRangeSize = 0, medianValue = 0;
1761
1762	float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
1763	float max = min+valueRangeSize;
1764	double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1765
1766	if(valueRangeSize <= realPrecision)
1767	{
1768	SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
1769	}
1770	else
1771	{
1772	// SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize);
1773	if(r5==0&&r4==0&&r3==0&&r2==0)
1774	{
1775	if(errBoundMode>=PW_REL)
1776	{
1777	SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
1778	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
1779	}
1780	else
1781	SZ_compress_args_float_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
1782	}
1783	else if(r5==0&&r4==0&&r3==0)
1784	{
1785	if(errBoundMode>=PW_REL)
1786	SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max);
1787	else
1788	SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1789	}
1790	else if(r5==0&&r4==0)
1791	{
1792	if(errBoundMode>=PW_REL)
1793	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max);
1794	else
1795	SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1796	}
1797	else if(r5==0)
1798	{
1799	if(errBoundMode>=PW_REL)
1800	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max);
1801	else
1802	SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1803	}
1804	}
1805	return status;
1806	}
1807
1808	int SZ_compress_args_float(unsigned char** newByteData, float *oriData,
1809	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1810	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
1811	{
1812	confparams_cpr->errorBoundMode = errBoundMode;
1813	if(errBoundMode==PW_REL)
1814	{
1815	confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
1816	//confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
1817	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE && r3 != 0 )
1818	{
1819	printf("Error: Current version doesn't support 3D data compression with point-wise relative error bound being based on pwrType=AVG\n");
1820	exit(0);
1821	return SZ_NSCS;
1822	}
1823	}
1824	int status = SZ_SCES;
1825	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1826
1827	if(dataLength <= MIN_NUM_OF_ELEMENTS)
1828	{
1829	*newByteData = SZ_skip_compress_float(oriData, dataLength, outSize);
1830	return status;
1831	}
1832
1833	float valueRangeSize = 0, medianValue = 0;
1834
1835	float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
1836	float max = min+valueRangeSize;
1837	double realPrecision = 0;
1838
1839	if(confparams_cpr->errorBoundMode==PSNR)
1840	{
1841	confparams_cpr->errorBoundMode = ABS;
1842	realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize);
1843	//printf("realPrecision=%lf\n", realPrecision);
1844	}
1845	else
1846	realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1847
1848	if(valueRangeSize <= realPrecision)
1849	{
1850	SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
1851	}
1852	else
1853	{
1854	size_t tmpOutSize = 0;
1855	unsigned char* tmpByteData;
1856
1857	if (r2==0)
1858	{
1859	if(confparams_cpr->errorBoundMode>=PW_REL)
1860	{
1861	SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
1862	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
1863	}
1864	else
1865	#ifdef HAVE_TIMECMPR
1866	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1867	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1868	else
1869	#endif
1870	SZ_compress_args_float_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1871	}
1872	else
1873	if (r3==0)
1874	{
1875	if(confparams_cpr->errorBoundMode>=PW_REL)
1876	SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
1877	else
1878	#ifdef HAVE_TIMECMPR
1879	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1880	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1881	else
1882	#endif
1883	{
1884	if(sz_with_regression == SZ_NO_REGRESSION)
1885	SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1886	else
1887	tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
1888	}
1889	}
1890	else
1891	if (r4==0)
1892	{
1893	if(confparams_cpr->errorBoundMode>=PW_REL)
1894	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
1895	else
1896	#ifdef HAVE_TIMECMPR
1897	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1898	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1899	else
1900	#endif
1901	{
1902	if(sz_with_regression == SZ_NO_REGRESSION)
1903	SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1904	else
1905	tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
1906	}
1907	}
1908	else
1909	if (r5==0)
1910	{
1911	if(confparams_cpr->errorBoundMode>=PW_REL)
1912	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
1913	//ToDO
1914	//SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr(&tmpByteData, oriData, r4, r3, r2, r1, &tmpOutSize, min, max);
1915	else
1916	#ifdef HAVE_TIMECMPR
1917	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1918	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1919	else
1920	#endif
1921	{
1922	if(sz_with_regression == SZ_NO_REGRESSION)
1923	SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1924	else
1925	tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
1926	}
1927	}
1928	else
1929	{
1930	printf("Error: doesn't support 5 dimensions for now.\n");
1931	status = SZ_DERR; //dimension error
1932	}
1933	//Call Gzip to do the further compression.
1934	if(confparams_cpr->szMode==SZ_BEST_SPEED)
1935	{
1936	*outSize = tmpOutSize;
1937	*newByteData = tmpByteData;
1938	}
1939	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION \|\| confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION)
1940	{
1941	*outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
1942	free(tmpByteData);
1943	}
1944	else
1945	{
1946	printf("Error: Wrong setting of confparams_cpr->szMode in the float compression.\n");
1947	status = SZ_MERR; //mode error
1948	}
1949	}
1950
1951	return status;
1952	}
1953
1954
1955	void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue)
1956	{
1957	short reqExpo = getPrecisionReqLength_double(realPrecision);
1958	*reqLength = 9+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength
1959	if(*reqLength<9)
1960	*reqLength = 9;
1961	if(*reqLength>32)
1962	{
1963	*reqLength = 32;
1964	*medianValue = 0;
1965	}
1966	}
1967
1968	//TODO
1969	int SZ_compress_args_float_subblock(unsigned char* compressedBytes, float *oriData,
1970	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
1971	size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
1972	size_t e5, size_t e4, size_t e3, size_t e2, size_t e1,
1973	size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio)
1974	{
1975	int status = SZ_SCES;
1976	float valueRangeSize = 0, medianValue = 0;
1977	computeRangeSize_float_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1);
1978
1979	double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1980
1981	if(valueRangeSize <= realPrecision)
1982	{
1983	//TODO
1984	//SZ_compress_args_float_withinRange_subblock();
1985	}
1986	else
1987	{
1988	if (r2==0)
1989	{
1990	if(errBoundMode>=PW_REL)
1991	{
1992	//TODO
1993	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_subblock();
1994	printf ("Current subblock version does not support point-wise relative error bound.\n");
1995	}
1996	else
1997	SZ_compress_args_float_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1);
1998	}
1999	else
2000	if (r3==0)
2001	{
2002	//TODO
2003	if(errBoundMode>=PW_REL)
2004	{
2005	//TODO
2006	//SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_subblock();
2007	printf ("Current subblock version does not support point-wise relative error bound.\n");
2008	}
2009	else
2010	SZ_compress_args_float_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1);
2011	}
2012	else
2013	if (r4==0)
2014	{
2015	if(errBoundMode>=PW_REL)
2016	{
2017	//TODO
2018	//SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_subblock();
2019	printf ("Current subblock version does not support point-wise relative error bound.\n");
2020	}
2021	else
2022	SZ_compress_args_float_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1);
2023	}
2024	else
2025	if (r5==0)
2026	{
2027	if(errBoundMode>=PW_REL)
2028	{
2029	//TODO
2030	//SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr_subblock();
2031	printf ("Current subblock version does not support point-wise relative error bound.\n");
2032	}
2033	else
2034	SZ_compress_args_float_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
2035	}
2036	else
2037	{
2038	printf("Error: doesn't support 5 dimensions for now.\n");
2039	status = SZ_DERR; //dimension error
2040	}
2041	}
2042	return status;
2043	}
2044
2045	void SZ_compress_args_float_NoCkRnge_1D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2046	size_t r1, size_t s1, size_t e1)
2047	{
2048	TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r1, s1, e1);
2049
2050	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2051	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2052	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2053	{
2054	unsigned char *tmpCompBytes;
2055	size_t tmpOutSize;
2056	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2057	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2058	free(tmpCompBytes);
2059	}
2060	else
2061	{
2062	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2063	}
2064
2065	//TODO
2066	// if(outSize>dataLengthsizeof(float))
2067	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2068
2069	free_TightDataPointStorageF(tdps);
2070	}
2071
2072	void SZ_compress_args_float_NoCkRnge_2D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2073	size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1)
2074	{
2075	TightDataPointStorageF* tdps = SZ_compress_float_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r2, r1, s2, s1, e2, e1);
2076
2077	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2078	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2079	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2080	{
2081	unsigned char *tmpCompBytes;
2082	size_t tmpOutSize;
2083	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2084	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2085	free(tmpCompBytes);
2086	}
2087	else
2088	{
2089	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2090	}
2091
2092	//TODO
2093	// if(outSize>dataLengthsizeof(float))
2094	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2095
2096	free_TightDataPointStorageF(tdps);
2097	}
2098
2099	void SZ_compress_args_float_NoCkRnge_3D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2100	size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1)
2101	{
2102	TightDataPointStorageF* tdps = SZ_compress_float_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r3, r2, r1, s3, s2, s1, e3, e2, e1);
2103
2104	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2105	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2106	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2107	{
2108	unsigned char *tmpCompBytes;
2109	size_t tmpOutSize;
2110	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2111	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2112	free(tmpCompBytes);
2113	}
2114	else
2115	{
2116	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2117	}
2118
2119	//TODO
2120	// if(outSize>dataLengthsizeof(float))
2121	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2122
2123	free_TightDataPointStorageF(tdps);
2124	}
2125
2126	void SZ_compress_args_float_NoCkRnge_4D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2127	size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1)
2128	{
2129	TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
2130
2131	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2132	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2133	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2134	{
2135	unsigned char *tmpCompBytes;
2136	size_t tmpOutSize;
2137	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2138	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2139	free(tmpCompBytes);
2140	}
2141	else
2142	{
2143	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2144	}
2145
2146	//TODO
2147	// if(outSize>dataLengthsizeof(float))
2148	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2149
2150	free_TightDataPointStorageF(tdps);
2151
2152	}
2153
2154	unsigned int optimize_intervals_float_1D_subblock(float *oriData, double realPrecision, size_t r1, size_t s1, size_t e1)
2155	{
2156	size_t dataLength = e1 - s1 + 1;
2157	oriData = oriData + s1;
2158
2159	size_t i = 0;
2160	unsigned long radiusIndex;
2161	float pred_value = 0, pred_err;
2162	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2163	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2164	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
2165	for(i=2;i<dataLength;i++)
2166	{
2167	if(i%confparams_cpr->sampleDistance==0)
2168	{
2169	pred_value = 2*oriData[i-1] - oriData[i-2];
2170	//pred_value = oriData[i-1];
2171	pred_err = fabs(pred_value - oriData[i]);
2172	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2173	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2174	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2175	intervals[radiusIndex]++;
2176	}
2177	}
2178	//compute the appropriate number
2179	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2180	size_t sum = 0;
2181	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2182	{
2183	sum += intervals[i];
2184	if(sum>targetCount)
2185	break;
2186	}
2187	if(i>=confparams_cpr->maxRangeRadius)
2188	i = confparams_cpr->maxRangeRadius-1;
2189
2190	unsigned int accIntervals = 2*(i+1);
2191	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2192
2193	if(powerOf2<32)
2194	powerOf2 = 32;
2195
2196	free(intervals);
2197	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
2198	return powerOf2;
2199	}
2200
2201	unsigned int optimize_intervals_float_2D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
2202	{
2203	size_t R1 = e1 - s1 + 1;
2204	size_t R2 = e2 - s2 + 1;
2205
2206	size_t i,j, index;
2207	unsigned long radiusIndex;
2208	float pred_value = 0, pred_err;
2209	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2210	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2211	size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance;
2212	for(i=s1+1;i<=e1;i++)
2213	{
2214	for(j=s2+1;j<=e2;j++)
2215	{
2216	if((i+j)%confparams_cpr->sampleDistance==0)
2217	{
2218	index = i*r2+j;
2219	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
2220	pred_err = fabs(pred_value - oriData[index]);
2221	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2222	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2223	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2224	intervals[radiusIndex]++;
2225	}
2226	}
2227	}
2228	//compute the appropriate number
2229	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2230	size_t sum = 0;
2231	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2232	{
2233	sum += intervals[i];
2234	if(sum>targetCount)
2235	break;
2236	}
2237	if(i>=confparams_cpr->maxRangeRadius)
2238	i = confparams_cpr->maxRangeRadius-1;
2239	unsigned int accIntervals = 2*(i+1);
2240	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2241
2242	if(powerOf2<32)
2243	powerOf2 = 32;
2244
2245	free(intervals);
2246	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
2247	return powerOf2;
2248	}
2249
2250	unsigned int optimize_intervals_float_3D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
2251	{
2252	size_t R1 = e1 - s1 + 1;
2253	size_t R2 = e2 - s2 + 1;
2254	size_t R3 = e3 - s3 + 1;
2255
2256	size_t r23 = r2*r3;
2257
2258	size_t i,j,k, index;
2259	unsigned long radiusIndex;
2260	float pred_value = 0, pred_err;
2261	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2262	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2263	size_t totalSampleSize = R1R2R3/confparams_cpr->sampleDistance;
2264	for(i=s1+1;i<=e1;i++)
2265	{
2266	for(j=s2+1;j<=e2;j++)
2267	{
2268	for(k=s3+1;k<=e3;k++)
2269	{
2270	if((i+j+k)%confparams_cpr->sampleDistance==0)
2271	{
2272	index = ir23+jr3+k;
2273	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
2274	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
2275	pred_err = fabs(pred_value - oriData[index]);
2276	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2277	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2278	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2279	intervals[radiusIndex]++;
2280	}
2281	}
2282	}
2283	}
2284	//compute the appropriate number
2285	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2286	size_t sum = 0;
2287	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2288	{
2289	sum += intervals[i];
2290	if(sum>targetCount)
2291	break;
2292	}
2293	if(i>=confparams_cpr->maxRangeRadius)
2294	i = confparams_cpr->maxRangeRadius-1;
2295	unsigned int accIntervals = 2*(i+1);
2296	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2297
2298	if(powerOf2<32)
2299	powerOf2 = 32;
2300
2301	free(intervals);
2302	return powerOf2;
2303	}
2304
2305	unsigned int optimize_intervals_float_4D_subblock(float *oriData, double realPrecision,
2306	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
2307	{
2308	size_t R1 = e1 - s1 + 1;
2309	size_t R2 = e2 - s2 + 1;
2310	size_t R3 = e3 - s3 + 1;
2311	size_t R4 = e4 - s4 + 1;
2312
2313	size_t r34 = r3*r4;
2314	size_t r234 = r2r3r4;
2315
2316	size_t i,j,k,l, index;
2317	unsigned long radiusIndex;
2318	float pred_value = 0, pred_err;
2319	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2320	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2321	size_t totalSampleSize = R1R2R3*R4/confparams_cpr->sampleDistance;
2322	for(i=s1+1;i<=e1;i++)
2323	{
2324	for(j=s2+1;j<=e2;j++)
2325	{
2326	for(k=s3+1;k<=e3;k++)
2327	{
2328	for (l=s4+1;l<=e4;l++)
2329	{
2330	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
2331	{
2332	index = ir234+jr34+k*r4+l;
2333	pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34]
2334	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
2335	pred_err = fabs(pred_value - oriData[index]);
2336	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2337	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2338	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2339	intervals[radiusIndex]++;
2340	}
2341	}
2342	}
2343	}
2344	}
2345	//compute the appropriate number
2346	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2347	size_t sum = 0;
2348	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2349	{
2350	sum += intervals[i];
2351	if(sum>targetCount)
2352	break;
2353	}
2354	if(i>=confparams_cpr->maxRangeRadius)
2355	i = confparams_cpr->maxRangeRadius-1;
2356
2357	unsigned int accIntervals = 2*(i+1);
2358	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2359
2360	if(powerOf2<32)
2361	powerOf2 = 32;
2362
2363	free(intervals);
2364	return powerOf2;
2365	}
2366
2367	TightDataPointStorageF* SZ_compress_float_1D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
2368	size_t r1, size_t s1, size_t e1)
2369	{
2370	size_t dataLength = e1 - s1 + 1;
2371	unsigned int quantization_intervals;
2372	if(exe_params->optQuantMode==1)
2373	quantization_intervals = optimize_intervals_float_1D_subblock(oriData, realPrecision, r1, s1, e1);
2374	else
2375	quantization_intervals = exe_params->intvCapacity;
2376	updateQuantizationInfo(quantization_intervals);
2377
2378	size_t i;
2379	int reqLength;
2380	float medianValue = medianValue_f;
2381	short radExpo = getExponent_float(valueRangeSize/2);
2382
2383	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
2384
2385	int* type = (int) malloc(dataLengthsizeof(int));
2386
2387	float* spaceFillingValue = oriData + s1;
2388
2389	DynamicIntArray *exactLeadNumArray;
2390	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2391
2392	DynamicByteArray *exactMidByteArray;
2393	new_DBA(&exactMidByteArray, DynArrayInitLen);
2394
2395	DynamicIntArray *resiBitArray;
2396	new_DIA(&resiBitArray, DynArrayInitLen);
2397
2398	type[0] = 0;
2399
2400	unsigned char preDataBytes[4];
2401	intToBytes_bigEndian(preDataBytes, 0);
2402
2403	int reqBytesLength = reqLength/8;
2404	int resiBitsLength = reqLength%8;
2405	float last3CmprsData[3] = {0};
2406
2407	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
2408	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2409
2410	//add the first data
2411	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2412	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2413	memcpy(preDataBytes,vce->curBytes,4);
2414	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2415	listAdd_float(last3CmprsData, vce->data);
2416
2417	//add the second data
2418	type[1] = 0;
2419	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2420	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2421	memcpy(preDataBytes,vce->curBytes,4);
2422	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2423	listAdd_float(last3CmprsData, vce->data);
2424
2425	int state;
2426	double checkRadius;
2427	float curData;
2428	float pred;
2429	float predAbsErr;
2430	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
2431	double interval = 2*realPrecision;
2432
2433	for(i=2;i<dataLength;i++)
2434	{
2435	curData = spaceFillingValue[i];
2436	pred = 2*last3CmprsData[0] - last3CmprsData[1];
2437	predAbsErr = fabs(curData - pred);
2438	if(predAbsErr<=checkRadius)
2439	{
2440	state = (predAbsErr/realPrecision+1)/2;
2441	if(curData>=pred)
2442	{
2443	type[i] = exe_params->intvRadius+state;
2444	pred = pred + state*interval;
2445	}
2446	else
2447	{
2448	type[i] = exe_params->intvRadius-state;
2449	pred = pred - state*interval;
2450	}
2451
2452	listAdd_float(last3CmprsData, pred);
2453	continue;
2454	}
2455
2456	//unpredictable data processing
2457	type[i] = 0;
2458	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2459	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2460	memcpy(preDataBytes,vce->curBytes,4);
2461	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2462
2463	listAdd_float(last3CmprsData, vce->data);
2464	}
2465
2466	size_t exactDataNum = exactLeadNumArray->size;
2467
2468	TightDataPointStorageF* tdps;
2469
2470	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
2471	type, exactMidByteArray->array, exactMidByteArray->size,
2472	exactLeadNumArray->array,
2473	resiBitArray->array, resiBitArray->size,
2474	resiBitsLength,
2475	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2476
2477	//free memory
2478	free_DIA(exactLeadNumArray);
2479	free_DIA(resiBitArray);
2480	free(type);
2481	free(vce);
2482	free(lce);
2483	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2484
2485	return tdps;
2486	}
2487
2488	TightDataPointStorageF* SZ_compress_float_2D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
2489	size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
2490	{
2491	unsigned int quantization_intervals;
2492	if(exe_params->optQuantMode==1)
2493	{
2494	quantization_intervals = optimize_intervals_float_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2);
2495	updateQuantizationInfo(quantization_intervals);
2496	}
2497	else
2498	quantization_intervals = exe_params->intvCapacity;
2499
2500	size_t i,j;
2501	int reqLength;
2502	float pred1D, pred2D;
2503	float diff = 0.0;
2504	double itvNum = 0;
2505	float P0, P1;
2506
2507	size_t R1 = e1 - s1 + 1;
2508	size_t R2 = e2 - s2 + 1;
2509	size_t dataLength = R1*R2;
2510
2511	P0 = (float)malloc(R2sizeof(float));
2512	memset(P0, 0, R2*sizeof(float));
2513	P1 = (float)malloc(R2sizeof(float));
2514	memset(P1, 0, R2*sizeof(float));
2515
2516	float medianValue = medianValue_f;
2517	short radExpo = getExponent_float(valueRangeSize/2);
2518	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
2519
2520	int* type = (int) malloc(dataLengthsizeof(int));
2521
2522	float* spaceFillingValue = oriData; //
2523
2524	DynamicIntArray *exactLeadNumArray;
2525	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2526
2527	DynamicByteArray *exactMidByteArray;
2528	new_DBA(&exactMidByteArray, DynArrayInitLen);
2529
2530	DynamicIntArray *resiBitArray;
2531	new_DIA(&resiBitArray, DynArrayInitLen);
2532
2533	unsigned char preDataBytes[4];
2534	intToBytes_bigEndian(preDataBytes, 0);
2535
2536	int reqBytesLength = reqLength/8;
2537	int resiBitsLength = reqLength%8;
2538
2539	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
2540	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2541
2542	/* Process Row-s1 data s2*/
2543	size_t gIndex;
2544	size_t lIndex;
2545
2546	gIndex = s1*r2+s2;
2547	lIndex = 0;
2548
2549	type[lIndex] = 0;
2550	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2551	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2552	memcpy(preDataBytes,vce->curBytes,4);
2553	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2554	P1[0] = vce->data;
2555
2556	/* Process Row-s1 data s2+1*/
2557	gIndex = s1*r2+(s2+1);
2558	lIndex = 1;
2559
2560	pred1D = P1[0];
2561	diff = spaceFillingValue[gIndex] - pred1D;
2562
2563	itvNum = fabs(diff)/realPrecision + 1;
2564
2565	if (itvNum < exe_params->intvCapacity)
2566	{
2567	if (diff < 0) itvNum = -itvNum;
2568	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2569	P1[1] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2570	}
2571	else
2572	{
2573	type[lIndex] = 0;
2574	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2575	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2576	memcpy(preDataBytes,vce->curBytes,4);
2577	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2578	P1[1] = vce->data;
2579	}
2580
2581	/* Process Row-s1 data s2+2 --> data e2 */
2582	for (j = 2; j < R2; j++)
2583	{
2584	gIndex = s1*r2+(s2+j);
2585	lIndex = j;
2586
2587	pred1D = 2*P1[j-1] - P1[j-2];
2588	diff = spaceFillingValue[gIndex] - pred1D;
2589
2590	itvNum = fabs(diff)/realPrecision + 1;
2591
2592	if (itvNum < exe_params->intvCapacity)
2593	{
2594	if (diff < 0) itvNum = -itvNum;
2595	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2596	P1[j] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2597	}
2598	else
2599	{
2600	type[lIndex] = 0;
2601	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2602	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2603	memcpy(preDataBytes,vce->curBytes,4);
2604	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2605	P1[j] = vce->data;
2606	}
2607	}
2608
2609	/* Process Row-s1+1 --> Row-e1 */
2610	for (i = 1; i < R1; i++)
2611	{
2612	/* Process row-s1+i data s2 */
2613	gIndex = (s1+i)*r2+s2;
2614	lIndex = i*R2;
2615
2616	pred1D = P1[0];
2617	diff = spaceFillingValue[gIndex] - pred1D;
2618
2619	itvNum = fabs(diff)/realPrecision + 1;
2620
2621	if (itvNum < exe_params->intvCapacity)
2622	{
2623	if (diff < 0) itvNum = -itvNum;
2624	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2625	P0[0] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2626	}
2627	else
2628	{
2629	type[lIndex] = 0;
2630	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2631	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2632	memcpy(preDataBytes,vce->curBytes,4);
2633	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2634	P0[0] = vce->data;
2635	}
2636
2637	/* Process row-s1+i data s2+1 --> e2 */
2638	for (j = 1; j < R2; j++)
2639	{
2640	gIndex = (s1+i)*r2+(s2+j);
2641	lIndex = i*R2+j;
2642
2643	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
2644
2645	pred2D = P0[j-1] + P1[j] - P1[j-1];
2646
2647	diff = spaceFillingValue[gIndex] - pred2D;
2648
2649	itvNum = fabs(diff)/realPrecision + 1;
2650
2651	if (itvNum < exe_params->intvCapacity)
2652	{
2653	if (diff < 0) itvNum = -itvNum;
2654	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2655	P0[j] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2656	}
2657	else
2658	{
2659	type[lIndex] = 0;
2660	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2661	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2662	memcpy(preDataBytes,vce->curBytes,4);
2663	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2664	P0[j] = vce->data;
2665	}
2666	}
2667
2668	float *Pt;
2669	Pt = P1;
2670	P1 = P0;
2671	P0 = Pt;
2672	}
2673
2674	free(P0);
2675	free(P1);
2676	size_t exactDataNum = exactLeadNumArray->size;
2677
2678	TightDataPointStorageF* tdps;
2679
2680	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
2681	type, exactMidByteArray->array, exactMidByteArray->size,
2682	exactLeadNumArray->array,
2683	resiBitArray->array, resiBitArray->size,
2684	resiBitsLength,
2685	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2686
2687	//free memory
2688	free_DIA(exactLeadNumArray);
2689	free_DIA(resiBitArray);
2690	free(type);
2691	free(vce);
2692	free(lce);
2693	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2694
2695	return tdps;
2696	}
2697
2698	TightDataPointStorageF* SZ_compress_float_3D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
2699	size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
2700	{
2701	unsigned int quantization_intervals;
2702	if(exe_params->optQuantMode==1)
2703	{
2704	quantization_intervals = optimize_intervals_float_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3);
2705	updateQuantizationInfo(quantization_intervals);
2706	}
2707	else
2708	quantization_intervals = exe_params->intvCapacity;
2709
2710	size_t i,j,k;
2711	int reqLength;
2712	float pred1D, pred2D, pred3D;
2713	float diff = 0.0;
2714	double itvNum = 0;
2715	float P0, P1;
2716
2717	size_t R1 = e1 - s1 + 1;
2718	size_t R2 = e2 - s2 + 1;
2719	size_t R3 = e3 - s3 + 1;
2720	size_t dataLength = R1R2R3;
2721
2722	size_t r23 = r2*r3;
2723	size_t R23 = R2*R3;
2724
2725	P0 = (float)malloc(R23sizeof(float));
2726	P1 = (float)malloc(R23sizeof(float));
2727
2728	float medianValue = medianValue_f;
2729	short radExpo = getExponent_float(valueRangeSize/2);
2730	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
2731
2732	int* type = (int) malloc(dataLengthsizeof(int));
2733	//type[dataLength]=0;
2734
2735	float* spaceFillingValue = oriData; //
2736
2737	DynamicIntArray *exactLeadNumArray;
2738	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2739
2740	DynamicByteArray *exactMidByteArray;
2741	new_DBA(&exactMidByteArray, DynArrayInitLen);
2742
2743	DynamicIntArray *resiBitArray;
2744	new_DIA(&resiBitArray, DynArrayInitLen);
2745
2746	unsigned char preDataBytes[4];
2747	intToBytes_bigEndian(preDataBytes, 0);
2748
2749	int reqBytesLength = reqLength/8;
2750	int resiBitsLength = reqLength%8;
2751
2752	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
2753	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2754
2755
2756	/////////////////////////// Process layer-s1 ///////////////////////////
2757	/* Process Row-s2 data s3*/
2758	size_t gIndex; //global index
2759	size_t lIndex; //local index
2760	size_t index2D; //local 2D index
2761
2762	gIndex = s1r23+s2r3+s3;
2763	lIndex = 0;
2764	index2D = 0;
2765
2766	type[lIndex] = 0;
2767	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2768	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2769	memcpy(preDataBytes,vce->curBytes,4);
2770	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2771	P1[index2D] = vce->data;
2772
2773	/* Process Row-s2 data s3+1*/
2774	gIndex = s1r23+s2r3+s3+1;
2775	lIndex = 1;
2776	index2D = 1;
2777
2778	pred1D = P1[index2D-1];
2779	diff = spaceFillingValue[gIndex] - pred1D;
2780
2781	itvNum = fabs(diff)/realPrecision + 1;
2782
2783	if (itvNum < exe_params->intvCapacity)
2784	{
2785	if (diff < 0) itvNum = -itvNum;
2786	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2787	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2788	}
2789	else
2790	{
2791	type[lIndex] = 0;
2792	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2793	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2794	memcpy(preDataBytes,vce->curBytes,4);
2795	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2796	P1[index2D] = vce->data;
2797	}
2798
2799	/* Process Row-s2 data s3+2 --> data e3 */
2800	for (j = 2; j < R3; j++)
2801	{
2802	gIndex = s1r23+s2r3+s3+j;
2803	lIndex = j;
2804	index2D = j;
2805
2806	pred1D = 2*P1[index2D-1] - P1[index2D-2];
2807	diff = spaceFillingValue[gIndex] - pred1D;
2808
2809	itvNum = fabs(diff)/realPrecision + 1;
2810
2811	if (itvNum < exe_params->intvCapacity)
2812	{
2813	if (diff < 0) itvNum = -itvNum;
2814	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2815	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2816	}
2817	else
2818	{
2819	type[lIndex] = 0;
2820	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2821	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2822	memcpy(preDataBytes,vce->curBytes,4);
2823	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2824	P1[index2D] = vce->data;
2825	}
2826	}
2827
2828	/* Process Row-s2+1 --> Row-e2 */
2829	for (i = 1; i < R2; i++)
2830	{
2831	/* Process row-s2+i data s3 */
2832	gIndex = s1r23+(s2+i)r3+s3;
2833	lIndex = i*R3;
2834	index2D = i*R3;
2835
2836	pred1D = P1[index2D-R3];
2837	diff = spaceFillingValue[gIndex] - pred1D;
2838
2839	itvNum = fabs(diff)/realPrecision + 1;
2840
2841	if (itvNum < exe_params->intvCapacity)
2842	{
2843	if (diff < 0) itvNum = -itvNum;
2844	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2845	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2846	}
2847	else
2848	{
2849	type[lIndex] = 0;
2850	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2851	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2852	memcpy(preDataBytes,vce->curBytes,4);
2853	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2854	P1[index2D] = vce->data;
2855	}
2856
2857	/* Process row-s2+i data s3+1 --> data e3*/
2858	for (j = 1; j < R3; j++)
2859	{
2860	gIndex = s1r23+(s2+i)r3+s3+j;
2861	lIndex = i*R3+j;
2862	index2D = i*R3+j;
2863
2864	pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1];
2865	diff = spaceFillingValue[gIndex] - pred2D;
2866
2867	itvNum = fabs(diff)/realPrecision + 1;
2868
2869	if (itvNum < exe_params->intvCapacity)
2870	{
2871	if (diff < 0) itvNum = -itvNum;
2872	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2873	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2874	}
2875	else
2876	{
2877	type[lIndex] = 0;
2878	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2879	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2880	memcpy(preDataBytes,vce->curBytes,4);
2881	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2882	P1[index2D] = vce->data;
2883	}
2884	}
2885	}
2886
2887
2888	/////////////////////////// Process layer-s1+1 --> layer-e1 ///////////////////////////
2889
2890	for (k = 1; k < R1; k++)
2891	{
2892	/* Process Row-s2 data s3*/
2893	gIndex = (s1+k)r23+s2r3+s3;
2894	lIndex = k*R23;
2895	index2D = 0;
2896
2897	pred1D = P1[index2D];
2898	diff = spaceFillingValue[gIndex] - pred1D;
2899
2900	itvNum = fabs(diff)/realPrecision + 1;
2901
2902	if (itvNum < exe_params->intvCapacity)
2903	{
2904	if (diff < 0) itvNum = -itvNum;
2905	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2906	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2907	}
2908	else
2909	{
2910	type[lIndex] = 0;
2911	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2912	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2913	memcpy(preDataBytes,vce->curBytes,4);
2914	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2915	P0[index2D] = vce->data;
2916	}
2917
2918	/* Process Row-s2 data s3+1 --> data e3 */
2919	for (j = 1; j < R3; j++)
2920	{
2921	gIndex = (s1+k)r23+s2r3+s3+j;
2922	lIndex = k*R23+j;
2923	index2D = j;
2924
2925	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
2926	diff = spaceFillingValue[gIndex] - pred2D;
2927
2928	itvNum = fabs(diff)/realPrecision + 1;
2929
2930	if (itvNum < exe_params->intvCapacity)
2931	{
2932	if (diff < 0) itvNum = -itvNum;
2933	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2934	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2935	}
2936	else
2937	{
2938	type[lIndex] = 0;
2939	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2940	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2941	memcpy(preDataBytes,vce->curBytes,4);
2942	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2943	P0[index2D] = vce->data;
2944	}
2945	}
2946
2947	/* Process Row-s2+1 --> Row-e2 */
2948	for (i = 1; i < R2; i++)
2949	{
2950	/* Process Row-s2+i data s3 */
2951	gIndex = (s1+k)r23+(s2+i)r3+s3;
2952	lIndex = kR23+iR3;
2953	index2D = i*R3;
2954
2955	pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3];
2956	diff = spaceFillingValue[gIndex] - pred2D;
2957
2958	itvNum = fabs(diff)/realPrecision + 1;
2959
2960	if (itvNum < exe_params->intvCapacity)
2961	{
2962	if (diff < 0) itvNum = -itvNum;
2963	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2964	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2965	}
2966	else
2967	{
2968	type[lIndex] = 0;
2969	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2970	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2971	memcpy(preDataBytes,vce->curBytes,4);
2972	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2973	P0[index2D] = vce->data;
2974	}
2975
2976	/* Process Row-s2+i data s3+1 --> data e3 */
2977	for (j = 1; j < R3; j++)
2978	{
2979	gIndex = (s1+k)r23+(s2+i)r3+s3+j;
2980	lIndex = kR23+iR3+j;
2981	index2D = i*R3+j;
2982
2983	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
2984
2985	pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1];
2986	diff = spaceFillingValue[gIndex] - pred3D;
2987
2988	itvNum = fabs(diff)/realPrecision + 1;
2989
2990	if (itvNum < exe_params->intvCapacity)
2991	{
2992	if (diff < 0) itvNum = -itvNum;
2993	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2994	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2995	}
2996	else
2997	{
2998	type[lIndex] = 0;
2999	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3000	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3001	memcpy(preDataBytes,vce->curBytes,4);
3002	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3003	P0[index2D] = vce->data;
3004	}
3005	}
3006	}
3007
3008	float *Pt;
3009	Pt = P1;
3010	P1 = P0;
3011	P0 = Pt;
3012	}
3013
3014	free(P0);
3015	free(P1);
3016	size_t exactDataNum = exactLeadNumArray->size;
3017
3018	TightDataPointStorageF* tdps;
3019
3020	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
3021	type, exactMidByteArray->array, exactMidByteArray->size,
3022	exactLeadNumArray->array,
3023	resiBitArray->array, resiBitArray->size,
3024	resiBitsLength,
3025	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
3026
3027	//free memory
3028	free_DIA(exactLeadNumArray);
3029	free_DIA(resiBitArray);
3030	free(type);
3031	free(vce);
3032	free(lce);
3033	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
3034
3035	return tdps;
3036	}
3037
3038	TightDataPointStorageF* SZ_compress_float_4D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
3039	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
3040	{
3041	unsigned int quantization_intervals;
3042	if(exe_params->optQuantMode==1)
3043	{
3044	quantization_intervals = optimize_intervals_float_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4);
3045	updateQuantizationInfo(quantization_intervals);
3046	}
3047	else
3048	quantization_intervals = exe_params->intvCapacity;
3049
3050	size_t i,j,k;
3051	int reqLength;
3052	float pred1D, pred2D, pred3D;
3053	float diff = 0.0;
3054	double itvNum = 0;
3055	float P0, P1;
3056
3057	size_t R1 = e1 - s1 + 1;
3058	size_t R2 = e2 - s2 + 1;
3059	size_t R3 = e3 - s3 + 1;
3060	size_t R4 = e4 - s4 + 1;
3061
3062	size_t dataLength = R1R2R3*R4;
3063
3064	size_t r34 = r3*r4;
3065	size_t r234 = r2r3r4;
3066	size_t R34 = R3*R4;
3067	size_t R234 = R2R3R4;
3068
3069	P0 = (float)malloc(R34sizeof(float));
3070	P1 = (float)malloc(R34sizeof(float));
3071
3072	float medianValue = medianValue_f;
3073	short radExpo = getExponent_float(valueRangeSize/2);
3074	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
3075
3076	int* type = (int) malloc(dataLengthsizeof(int));
3077
3078	float* spaceFillingValue = oriData; //
3079
3080	DynamicIntArray *exactLeadNumArray;
3081	new_DIA(&exactLeadNumArray, DynArrayInitLen);
3082
3083	DynamicByteArray *exactMidByteArray;
3084	new_DBA(&exactMidByteArray, DynArrayInitLen);
3085
3086	DynamicIntArray *resiBitArray;
3087	new_DIA(&resiBitArray, DynArrayInitLen);
3088
3089	unsigned char preDataBytes[4];
3090	intToBytes_bigEndian(preDataBytes, 0);
3091
3092	int reqBytesLength = reqLength/8;
3093	int resiBitsLength = reqLength%8;
3094
3095	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
3096	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
3097
3098
3099	size_t l;
3100	for (l = 0; l < R1; l++)
3101	{
3102
3103	/////////////////////////// Process layer-s2 ///////////////////////////
3104	/* Process Row-s3 data s4*/
3105	size_t gIndex; //global index
3106	size_t lIndex; //local index
3107	size_t index2D; //local 2D index
3108
3109	gIndex = (s1+l)r234+s2r34+s3*r4+s4;
3110	lIndex = l*R234;
3111	index2D = 0;
3112
3113	type[lIndex] = 0;
3114	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3115	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3116	memcpy(preDataBytes,vce->curBytes,4);
3117	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3118	P1[index2D] = vce->data;
3119
3120	/* Process Row-s3 data s4+1*/
3121	gIndex = (s1+l)r234+s2r34+s3*r4+s4+1;
3122	lIndex = l*R234+1;
3123	index2D = 1;
3124
3125	pred1D = P1[index2D-1];
3126	diff = spaceFillingValue[gIndex] - pred1D;
3127
3128	itvNum = fabs(diff)/realPrecision + 1;
3129
3130	if (itvNum < exe_params->intvCapacity)
3131	{
3132	if (diff < 0) itvNum = -itvNum;
3133	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3134	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3135	}
3136	else
3137	{
3138	type[lIndex] = 0;
3139	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3140	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3141	memcpy(preDataBytes,vce->curBytes,4);
3142	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3143	P1[index2D] = vce->data;
3144	}
3145
3146	/* Process Row-s3 data s4+2 --> data e4 */
3147	for (j = 2; j < R4; j++)
3148	{
3149	gIndex = (s1+l)r234+s2r34+s3*r4+s4+j;
3150	lIndex = l*R234+j;
3151	index2D = j;
3152
3153	pred1D = 2*P1[index2D-1] - P1[index2D-2];
3154	diff = spaceFillingValue[gIndex] - pred1D;
3155
3156	itvNum = fabs(diff)/realPrecision + 1;
3157
3158	if (itvNum < exe_params->intvCapacity)
3159	{
3160	if (diff < 0) itvNum = -itvNum;
3161	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3162	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3163	}
3164	else
3165	{
3166	type[lIndex] = 0;
3167	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3168	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3169	memcpy(preDataBytes,vce->curBytes,4);
3170	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3171	P1[index2D] = vce->data;
3172	}
3173	}
3174
3175	/* Process Row-s3+1 --> Row-e3 */
3176	for (i = 1; i < R3; i++)
3177	{
3178	/* Process row-s2+i data s3 */
3179	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4;
3180	lIndex = lR234+iR4;
3181	index2D = i*R4;
3182
3183	pred1D = P1[index2D-R4];
3184	diff = spaceFillingValue[gIndex] - pred1D;
3185
3186	itvNum = fabs(diff)/realPrecision + 1;
3187
3188	if (itvNum < exe_params->intvCapacity)
3189	{
3190	if (diff < 0) itvNum = -itvNum;
3191	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3192	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3193	}
3194	else
3195	{
3196	type[lIndex] = 0;
3197	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3198	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3199	memcpy(preDataBytes,vce->curBytes,4);
3200	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3201	P1[index2D] = vce->data;
3202	}
3203
3204	/* Process row-s3+i data s4+1 --> data e4*/
3205	for (j = 1; j < R4; j++)
3206	{
3207	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4+j;
3208	lIndex = lR234+iR4+j;
3209	index2D = i*R4+j;
3210
3211	pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1];
3212	diff = spaceFillingValue[gIndex] - pred2D;
3213
3214	itvNum = fabs(diff)/realPrecision + 1;
3215
3216	if (itvNum < exe_params->intvCapacity)
3217	{
3218	if (diff < 0) itvNum = -itvNum;
3219	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3220	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3221	}
3222	else
3223	{
3224	type[lIndex] = 0;
3225	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3226	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3227	memcpy(preDataBytes,vce->curBytes,4);
3228	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3229	P1[index2D] = vce->data;
3230	}
3231	}
3232	}
3233
3234
3235	/////////////////////////// Process layer-s2+1 --> layer-e2 ///////////////////////////
3236
3237	for (k = 1; k < R2; k++)
3238	{
3239	/* Process Row-s3 data s4*/
3240	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4;
3241	lIndex = lR234+kR34;
3242	index2D = 0;
3243
3244	pred1D = P1[index2D];
3245	diff = spaceFillingValue[gIndex] - pred1D;
3246
3247	itvNum = fabs(diff)/realPrecision + 1;
3248
3249	if (itvNum < exe_params->intvCapacity)
3250	{
3251	if (diff < 0) itvNum = -itvNum;
3252	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3253	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3254	}
3255	else
3256	{
3257	type[lIndex] = 0;
3258	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3259	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3260	memcpy(preDataBytes,vce->curBytes,4);
3261	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3262	P0[index2D] = vce->data;
3263	}
3264
3265	/* Process Row-s3 data s4+1 --> data e4 */
3266	for (j = 1; j < R4; j++)
3267	{
3268	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4+j;
3269	lIndex = lR234+kR34+j;
3270	index2D = j;
3271
3272	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
3273	diff = spaceFillingValue[gIndex] - pred2D;
3274
3275	itvNum = fabs(diff)/realPrecision + 1;
3276
3277	if (itvNum < exe_params->intvCapacity)
3278	{
3279	if (diff < 0) itvNum = -itvNum;
3280	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3281	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3282	}
3283	else
3284	{
3285	type[lIndex] = 0;
3286	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3287	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3288	memcpy(preDataBytes,vce->curBytes,4);
3289	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3290	P0[index2D] = vce->data;
3291	}
3292	}
3293
3294	/* Process Row-s3+1 --> Row-e3 */
3295	for (i = 1; i < R3; i++)
3296	{
3297	/* Process Row-s3+i data s4 */
3298	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4;
3299	lIndex = lR234+kR34+i*R4;
3300	index2D = i*R4;
3301
3302	pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4];
3303	diff = spaceFillingValue[gIndex] - pred2D;
3304
3305	itvNum = fabs(diff)/realPrecision + 1;
3306
3307	if (itvNum < exe_params->intvCapacity)
3308	{
3309	if (diff < 0) itvNum = -itvNum;
3310	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3311	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3312	}
3313	else
3314	{
3315	type[lIndex] = 0;
3316	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3317	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3318	memcpy(preDataBytes,vce->curBytes,4);
3319	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3320	P0[index2D] = vce->data;
3321	}
3322
3323	/* Process Row-s3+i data s4+1 --> data e4 */
3324	for (j = 1; j < R4; j++)
3325	{
3326	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4+j;
3327	lIndex = lR234+kR34+i*R4+j;
3328	index2D = i*R4+j;
3329
3330	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
3331
3332	pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1];
3333	diff = spaceFillingValue[gIndex] - pred3D;
3334
3335	itvNum = fabs(diff)/realPrecision + 1;
3336
3337	if (itvNum < exe_params->intvCapacity)
3338	{
3339	if (diff < 0) itvNum = -itvNum;
3340	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3341	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3342	}
3343	else
3344	{
3345	type[lIndex] = 0;
3346	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3347	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3348	memcpy(preDataBytes,vce->curBytes,4);
3349	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3350	P0[index2D] = vce->data;
3351	}
3352	}
3353	}
3354
3355	float *Pt;
3356	Pt = P1;
3357	P1 = P0;
3358	P0 = Pt;
3359	}
3360
3361	}
3362
3363	free(P0);
3364	free(P1);
3365	size_t exactDataNum = exactLeadNumArray->size;
3366
3367	TightDataPointStorageF* tdps;
3368
3369	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
3370	type, exactMidByteArray->array, exactMidByteArray->size,
3371	exactLeadNumArray->array,
3372	resiBitArray->array, resiBitArray->size,
3373	resiBitsLength,
3374	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
3375
3376	//free memory
3377	free_DIA(exactLeadNumArray);
3378	free_DIA(resiBitArray);
3379	free(type);
3380	free(vce);
3381	free(lce);
3382	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
3383
3384	return tdps;
3385	}
3386
3387	unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
3388	{
3389	size_t i;
3390	size_t radiusIndex;
3391	size_t r23=r2*r3;
3392	float pred_value = 0, pred_err;
3393	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3394	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3395	size_t totalSampleSize = 0;
3396
3397	size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
3398	size_t offset_count_2;
3399	float * data_pos = oriData + r23 + r3 + offset_count;
3400	size_t n1_count = 1, n2_count = 1; // count i,j sum
3401	size_t len = r1 * r2 * r3;
3402	while(data_pos - oriData < len){
3403	totalSampleSize++;
3404	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
3405	pred_err = fabs(pred_value - *data_pos);
3406	radiusIndex = (pred_err/realPrecision+1)/2;
3407	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3408	{
3409	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3410	}
3411	intervals[radiusIndex]++;
3412	offset_count += confparams_cpr->sampleDistance;
3413	if(offset_count >= r3){
3414	n2_count ++;
3415	if(n2_count == r2){
3416	n1_count ++;
3417	n2_count = 1;
3418	data_pos += r3;
3419	}
3420	offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance;
3421	data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3422	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3423	if(offset_count == 0) offset_count ++;
3424	}
3425	else data_pos += confparams_cpr->sampleDistance;
3426	}
3427	//compute the appropriate number
3428	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3429	size_t sum = 0;
3430	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3431	{
3432	sum += intervals[i];
3433	if(sum>targetCount)
3434	break;
3435	}
3436	if(i>=confparams_cpr->maxRangeRadius)
3437	i = confparams_cpr->maxRangeRadius-1;
3438	unsigned int accIntervals = 2*(i+1);
3439	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3440
3441	if(powerOf2<32)
3442	powerOf2 = 32;
3443	free(intervals);
3444	return powerOf2;
3445	}
3446
3447	size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){
3448
3449	size_t dim0_offset = dim_1 * dim_2;
3450	size_t dim1_offset = dim_2;
3451
3452	// data_pos = block_ori_data;
3453	// for(size_t i=0; i<block_dim_0; i++){
3454	// for(size_t j=0; j<block_dim_1; j++){
3455	// for(size_t k=0; k<block_dim_2; k++){
3456	// sum += *data_pos;
3457	// data_pos ++;
3458	// }
3459	// data_pos += dim1_offset - block_dim_2;
3460	// }
3461	// data_pos += dim0_offset - block_dim_1 * dim1_offset;
3462	// }
3463	// size_t num_elements = block_dim_0 * block_dim_1 * block_dim_2;
3464	// if(num_elements > 0) mean[0] = sum / num_elements;
3465	// else mean[0] = 0.0;
3466	mean[0] = block_ori_data[0];
3467
3468	size_t unpredictable_count = 0;
3469	size_t r1, r2, r3;
3470	r1 = block_dim_0;
3471	r2 = block_dim_1;
3472	r3 = block_dim_2;
3473
3474	float * cur_data_pos = block_ori_data;
3475	float curData;
3476	float pred1D, pred2D, pred3D;
3477	double itvNum;
3478	double diff;
3479	size_t i, j, k;
3480	size_t r23 = r2*r3;
3481	// Process Row-0 data 0
3482	pred1D = mean[0];
3483	curData = *cur_data_pos;
3484	diff = curData - pred1D;
3485	itvNum = fabs(diff)/realPrecision + 1;
3486	if (itvNum < exe_params->intvCapacity){
3487	if (diff < 0) itvNum = -itvNum;
3488	type[0] = (int) (itvNum/2) + exe_params->intvRadius;
3489	P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
3490	//ganrantee comporession error against the case of machine-epsilon
3491	if(fabs(curData-P1[0])>realPrecision){
3492	type[0] = 0;
3493	P1[0] = curData;
3494	unpredictable_data[unpredictable_count ++] = curData;
3495	}
3496	}
3497	else{
3498	type[0] = 0;
3499	P1[0] = curData;
3500	unpredictable_data[unpredictable_count ++] = curData;
3501	}
3502
3503	/* Process Row-0 data 1*/
3504	pred1D = P1[0];
3505	curData = cur_data_pos[1];
3506	diff = curData - pred1D;
3507	itvNum = fabs(diff)/realPrecision + 1;
3508	if (itvNum < exe_params->intvCapacity){
3509	if (diff < 0) itvNum = -itvNum;
3510	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
3511	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
3512	//ganrantee comporession error against the case of machine-epsilon
3513	if(fabs(curData-P1[1])>realPrecision){
3514	type[1] = 0;
3515	P1[1] = curData;
3516	unpredictable_data[unpredictable_count ++] = curData;
3517	}
3518	}
3519	else{
3520	type[1] = 0;
3521	P1[1] = curData;
3522	unpredictable_data[unpredictable_count ++] = curData;
3523	}
3524	/* Process Row-0 data 2 --> data r3-1 */
3525	for (j = 2; j < r3; j++){
3526	pred1D = 2*P1[j-1] - P1[j-2];
3527	curData = cur_data_pos[j];
3528	diff = curData - pred1D;
3529	itvNum = fabs(diff)/realPrecision + 1;
3530	if (itvNum < exe_params->intvCapacity){
3531	if (diff < 0) itvNum = -itvNum;
3532	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
3533	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
3534	//ganrantee comporession error against the case of machine-epsilon
3535	if(fabs(curData-P1[j])>realPrecision){
3536	type[j] = 0;
3537	P1[j] = curData;
3538	unpredictable_data[unpredictable_count ++] = curData;
3539	}
3540	}
3541	else{
3542	type[j] = 0;
3543	P1[j] = curData;
3544	unpredictable_data[unpredictable_count ++] = curData;
3545	}
3546	}
3547	cur_data_pos += dim1_offset;
3548
3549	/* Process Row-1 --> Row-r2-1 */
3550	size_t index;
3551	for (i = 1; i < r2; i++)
3552	{
3553	/* Process row-i data 0 */
3554	index = i*r3;
3555	pred1D = P1[index-r3];
3556	curData = *cur_data_pos;
3557	diff = curData - pred1D;
3558
3559	itvNum = fabs(diff)/realPrecision + 1;
3560
3561	if (itvNum < exe_params->intvCapacity)
3562	{
3563	if (diff < 0) itvNum = -itvNum;
3564	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3565	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3566
3567	//ganrantee comporession error against the case of machine-epsilon
3568	if(fabs(curData-P1[index])>realPrecision)
3569	{
3570	type[index] = 0;
3571	P1[index] = curData;
3572	unpredictable_data[unpredictable_count ++] = curData;
3573	}
3574	}
3575	else
3576	{
3577	type[index] = 0;
3578	P1[index] = curData;
3579	unpredictable_data[unpredictable_count ++] = curData;
3580	}
3581
3582	/* Process row-i data 1 --> data r3-1*/
3583	for (j = 1; j < r3; j++)
3584	{
3585	index = i*r3+j;
3586	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
3587
3588	curData = cur_data_pos[j];
3589	diff = curData - pred2D;
3590
3591	itvNum = fabs(diff)/realPrecision + 1;
3592
3593	if (itvNum < exe_params->intvCapacity)
3594	{
3595	if (diff < 0) itvNum = -itvNum;
3596	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3597	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3598
3599	//ganrantee comporession error against the case of machine-epsilon
3600	if(fabs(curData-P1[index])>realPrecision)
3601	{
3602	type[index] = 0;
3603	P1[index] = curData;
3604	unpredictable_data[unpredictable_count ++] = curData;
3605	}
3606	}
3607	else
3608	{
3609	type[index] = 0;
3610	P1[index] = curData;
3611	unpredictable_data[unpredictable_count ++] = curData;
3612	}
3613	}
3614	cur_data_pos += dim1_offset;
3615	}
3616	cur_data_pos += dim0_offset - r2 * dim1_offset;
3617
3618	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
3619
3620	for (k = 1; k < r1; k++)
3621	{
3622	/* Process Row-0 data 0*/
3623	index = k*r23;
3624	pred1D = P1[0];
3625	curData = *cur_data_pos;
3626	diff = curData - pred1D;
3627	itvNum = fabs(diff)/realPrecision + 1;
3628	if (itvNum < exe_params->intvCapacity)
3629	{
3630	if (diff < 0) itvNum = -itvNum;
3631	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3632	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3633	//ganrantee comporession error against the case of machine-epsilon
3634	if(fabs(curData-P0[0])>realPrecision)
3635	{
3636	type[index] = 0;
3637	P0[0] = curData;
3638	unpredictable_data[unpredictable_count ++] = curData;
3639	}
3640	}
3641	else
3642	{
3643	type[index] = 0;
3644	P0[0] = curData;
3645	unpredictable_data[unpredictable_count ++] = curData;
3646	}
3647	/* Process Row-0 data 1 --> data r3-1 */
3648	for (j = 1; j < r3; j++)
3649	{
3650	//index = kr2r3+j;
3651	index ++;
3652	pred2D = P0[j-1] + P1[j] - P1[j-1];
3653	curData = cur_data_pos[j];
3654	diff = curData - pred2D;
3655	itvNum = fabs(diff)/realPrecision + 1;
3656	if (itvNum < exe_params->intvCapacity)
3657	{
3658	if (diff < 0) itvNum = -itvNum;
3659	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3660	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3661	//ganrantee comporession error against the case of machine-epsilon
3662	if(fabs(curData-P0[j])>realPrecision)
3663	{
3664	type[index] = 0;
3665	P0[j] = curData;
3666	unpredictable_data[unpredictable_count ++] = curData;
3667	}
3668	}
3669	else
3670	{
3671	type[index] = 0;
3672	P0[j] = curData;
3673	unpredictable_data[unpredictable_count ++] = curData;
3674	}
3675	}
3676
3677	cur_data_pos += dim1_offset;
3678	/* Process Row-1 --> Row-r2-1 */
3679	size_t index2D;
3680	for (i = 1; i < r2; i++)
3681	{
3682	/* Process Row-i data 0 */
3683	index = kr23 + ir3;
3684	index2D = i*r3;
3685	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
3686	curData = *cur_data_pos;
3687	diff = curData - pred2D;
3688
3689	itvNum = fabs(diff)/realPrecision + 1;
3690
3691	if (itvNum < exe_params->intvCapacity)
3692	{
3693	if (diff < 0) itvNum = -itvNum;
3694	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3695	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3696	//ganrantee comporession error against the case of machine-epsilon
3697	if(fabs(curData-P0[index2D])>realPrecision)
3698	{
3699	type[index] = 0;
3700	P0[index2D] = curData;
3701	unpredictable_data[unpredictable_count ++] = curData;
3702	}
3703	}
3704	else
3705	{
3706	type[index] = 0;
3707	P0[index2D] = curData;
3708	unpredictable_data[unpredictable_count ++] = curData;
3709	}
3710
3711	/* Process Row-i data 1 --> data r3-1 */
3712	for (j = 1; j < r3; j++)
3713	{
3714	//index = kr2r3 + i*r3 + j;
3715	index ++;
3716	index2D = i*r3 + j;
3717	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
3718	curData = cur_data_pos[j];
3719	diff = curData - pred3D;
3720
3721	itvNum = fabs(diff)/realPrecision + 1;
3722
3723	if (itvNum < exe_params->intvCapacity)
3724	{
3725	if (diff < 0) itvNum = -itvNum;
3726	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3727	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3728
3729	//ganrantee comporession error against the case of machine-epsilon
3730	if(fabs(curData-P0[index2D])>realPrecision)
3731	{
3732	type[index] = 0;
3733	P0[index2D] = curData;
3734	unpredictable_data[unpredictable_count ++] = curData;
3735	}
3736	}
3737	else
3738	{
3739	type[index] = 0;
3740	P0[index2D] = curData;
3741	unpredictable_data[unpredictable_count ++] = curData;
3742	}
3743	}
3744	cur_data_pos += dim1_offset;
3745	}
3746	cur_data_pos += dim0_offset - r2 * dim1_offset;
3747	float *Pt;
3748	Pt = P1;
3749	P1 = P0;
3750	P0 = Pt;
3751	}
3752
3753	return unpredictable_count;
3754	}
3755
3756	unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r2, double realPrecision)
3757	{
3758	size_t i;
3759	size_t radiusIndex;
3760	float pred_value = 0, pred_err;
3761	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3762	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3763	size_t totalSampleSize = 0;
3764
3765	size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
3766	size_t offset_count_2;
3767	float * data_pos = oriData + r2 + offset_count;
3768	size_t n1_count = 1; // count i sum
3769	size_t len = r1 * r2;
3770	while(data_pos - oriData < len){
3771	totalSampleSize++;
3772	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
3773	pred_err = fabs(pred_value - *data_pos);
3774	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3775	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3776	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3777	intervals[radiusIndex]++;
3778
3779	offset_count += confparams_cpr->sampleDistance;
3780	if(offset_count >= r2){
3781	n1_count ++;
3782	offset_count_2 = n1_count % confparams_cpr->sampleDistance;
3783	data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3784	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3785	if(offset_count == 0) offset_count ++;
3786	}
3787	else data_pos += confparams_cpr->sampleDistance;
3788	}
3789
3790	//compute the appropriate number
3791	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3792	size_t sum = 0;
3793	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3794	{
3795	sum += intervals[i];
3796	if(sum>targetCount)
3797	break;
3798	}
3799	if(i>=confparams_cpr->maxRangeRadius)
3800	i = confparams_cpr->maxRangeRadius-1;
3801	unsigned int accIntervals = 2*(i+1);
3802	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3803
3804	if(powerOf2<32)
3805	powerOf2 = 32;
3806
3807	free(intervals);
3808	return powerOf2;
3809	}
3810
3811	unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision)
3812	{
3813	size_t i = 0, radiusIndex;
3814	float pred_value = 0, pred_err;
3815	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3816	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3817	size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance;
3818
3819	float * data_pos = oriData + 2;
3820	while(data_pos - oriData < dataLength){
3821	totalSampleSize++;
3822	pred_value = data_pos[-1];
3823	pred_err = fabs(pred_value - *data_pos);
3824	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3825	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3826	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3827	intervals[radiusIndex]++;
3828
3829	data_pos += confparams_cpr->sampleDistance;
3830	}
3831	//compute the appropriate number
3832	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3833	size_t sum = 0;
3834	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3835	{
3836	sum += intervals[i];
3837	if(sum>targetCount)
3838	break;
3839	}
3840	if(i>=confparams_cpr->maxRangeRadius)
3841	i = confparams_cpr->maxRangeRadius-1;
3842
3843	unsigned int accIntervals = 2*(i+1);
3844	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3845
3846	if(powerOf2<32)
3847	powerOf2 = 32;
3848
3849	free(intervals);
3850	return powerOf2;
3851	}
3852
3853	size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data){
3854
3855	mean[0] = block_ori_data[0];
3856	unsigned short unpredictable_count = 0;
3857
3858	float curData;
3859	double itvNum;
3860	double diff;
3861	float last_over_thres = mean[0];
3862	float pred1D;
3863	size_t type_index = 0;
3864	float * data_pos = block_ori_data;
3865	for(size_t i=0; i<block_dim_0; i++){
3866	curData = *data_pos;
3867
3868	pred1D = last_over_thres;
3869	diff = curData - pred1D;
3870	itvNum = fabs(diff)/realPrecision + 1;
3871	if (itvNum < exe_params->intvCapacity){
3872	if (diff < 0) itvNum = -itvNum;
3873	type[type_index] = (int) (itvNum/2) + exe_params->intvRadius;
3874	last_over_thres = pred1D + 2 * (type[type_index] - exe_params->intvRadius) * realPrecision;
3875	if(fabs(curData-last_over_thres)>realPrecision){
3876	type[type_index] = 0;
3877	last_over_thres = curData;
3878	unpredictable_data[unpredictable_count ++] = curData;
3879	}
3880
3881	}
3882	else{
3883	type[type_index] = 0;
3884	unpredictable_data[unpredictable_count ++] = curData;
3885	last_over_thres = curData;
3886	}
3887	type_index ++;
3888	data_pos ++;
3889	}
3890	return unpredictable_count;
3891
3892	}
3893
3894	size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t block_dim_0, size_t block_dim_1, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){
3895
3896	size_t dim0_offset = dim_1;
3897	mean[0] = block_ori_data[0];
3898
3899	size_t unpredictable_count = 0;
3900	size_t r1, r2;
3901	r1 = block_dim_0;
3902	r2 = block_dim_1;
3903
3904	float * cur_data_pos = block_ori_data;
3905	float curData;
3906	float pred1D, pred2D;
3907	double itvNum;
3908	double diff;
3909	size_t i, j;
3910	/* Process Row-0 data 0*/
3911	curData = *cur_data_pos;
3912	pred1D = mean[0];
3913	diff = curData - pred1D;
3914	itvNum = fabs(diff)/realPrecision + 1;
3915	if (itvNum < exe_params->intvCapacity){
3916	if (diff < 0) itvNum = -itvNum;
3917	type[0] = (int) (itvNum/2) + exe_params->intvRadius;
3918	P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
3919	//ganrantee comporession error against the case of machine-epsilon
3920	if(fabs(curData-P1[0])>realPrecision){
3921	type[0] = 0;
3922	P1[0] = curData;
3923	unpredictable_data[unpredictable_count ++] = curData;
3924	}
3925	}
3926	else{
3927	type[0] = 0;
3928	P1[0] = curData;
3929	unpredictable_data[unpredictable_count ++] = curData;
3930	}
3931
3932	/* Process Row-0 data 1*/
3933	curData = cur_data_pos[1];
3934	pred1D = P1[0];
3935	diff = curData - pred1D;
3936	itvNum = fabs(diff)/realPrecision + 1;
3937	if (itvNum < exe_params->intvCapacity){
3938	if (diff < 0) itvNum = -itvNum;
3939	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
3940	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
3941	//ganrantee comporession error against the case of machine-epsilon
3942	if(fabs(curData-P1[1])>realPrecision){
3943	type[1] = 0;
3944	P1[1] = curData;
3945	unpredictable_data[unpredictable_count ++] = curData;
3946	}
3947	}
3948	else{
3949	type[1] = 0;
3950	P1[1] = curData;
3951	unpredictable_data[unpredictable_count ++] = curData;
3952	}
3953
3954	/* Process Row-0 data 2 --> data r2-1 */
3955	for (j = 2; j < r2; j++)
3956	{
3957	curData = cur_data_pos[j];
3958	pred1D = 2*P1[j-1] - P1[j-2];
3959	diff = curData - pred1D;
3960	itvNum = fabs(diff)/realPrecision + 1;
3961	if (itvNum < exe_params->intvCapacity){
3962	if (diff < 0) itvNum = -itvNum;
3963	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
3964	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
3965	//ganrantee comporession error against the case of machine-epsilon
3966	if(fabs(curData-P1[j])>realPrecision){
3967	type[j] = 0;
3968	P1[j] = curData;
3969	unpredictable_data[unpredictable_count ++] = curData;
3970	}
3971	}
3972	else{
3973	type[j] = 0;
3974	P1[j] = curData;
3975	unpredictable_data[unpredictable_count ++] = curData;
3976	}
3977	}
3978	cur_data_pos += dim0_offset;
3979	/* Process Row-1 --> Row-r1-1 */
3980	size_t index;
3981	for (i = 1; i < r1; i++)
3982	{
3983	/* Process row-i data 0 */
3984	index = i*r2;
3985	curData = *cur_data_pos;
3986	pred1D = P1[0];
3987	diff = curData - pred1D;
3988	itvNum = fabs(diff)/realPrecision + 1;
3989	if (itvNum < exe_params->intvCapacity){
3990	if (diff < 0) itvNum = -itvNum;
3991	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3992	P0[0] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
3993	//ganrantee comporession error against the case of machine-epsilon
3994	if(fabs(curData-P0[0])>realPrecision){
3995	type[index] = 0;
3996	P0[0] = curData;
3997	unpredictable_data[unpredictable_count ++] = curData;
3998	}
3999	}
4000	else{
4001	type[index] = 0;
4002	P0[0] = curData;
4003	unpredictable_data[unpredictable_count ++] = curData;
4004	}
4005
4006	/* Process row-i data 1 --> r2-1*/
4007	for (j = 1; j < r2; j++)
4008	{
4009	index = i*r2+j;
4010	curData = cur_data_pos[j];
4011	pred2D = P0[j-1] + P1[j] - P1[j-1];
4012	diff = curData - pred2D;
4013	itvNum = fabs(diff)/realPrecision + 1;
4014	if (itvNum < exe_params->intvCapacity)
4015	{
4016	if (diff < 0) itvNum = -itvNum;
4017	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
4018	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
4019
4020	//ganrantee comporession error against the case of machine-epsilon
4021	if(fabs(curData-P0[j])>realPrecision)
4022	{
4023	type[index] = 0;
4024	P0[j] = curData;
4025	unpredictable_data[unpredictable_count ++] = curData;
4026	}
4027	}
4028	else
4029	{
4030	type[index] = 0;
4031	P0[j] = curData;
4032	unpredictable_data[unpredictable_count ++] = curData;
4033	}
4034	}
4035	cur_data_pos += dim0_offset;
4036
4037	float *Pt;
4038	Pt = P1;
4039	P1 = P0;
4040	P0 = Pt;
4041	}
4042	return unpredictable_count;
4043	}
4044
4045	/The above code is for sz 1.4.13; the following code is for sz 2.0/
4046
4047	unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float oriData, size_t r1, size_t r2, double realPrecision, float dense_pos, float * max_freq, float * mean_freq)
4048	{
4049	float mean = 0.0;
4050	size_t len = r1 * r2;
4051	size_t mean_distance = (int) (sqrt(len));
4052
4053	float * data_pos = oriData;
4054	size_t mean_count = 0;
4055	while(data_pos - oriData < len){
4056	mean += *data_pos;
4057	mean_count ++;
4058	data_pos += mean_distance;
4059	}
4060	if(mean_count > 0) mean /= mean_count;
4061	size_t range = 8192;
4062	size_t radius = 4096;
4063	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
4064	memset(freq_intervals, 0, range*sizeof(size_t));
4065
4066	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
4067	int sampleDistance = confparams_cpr->sampleDistance;
4068	float predThreshold = confparams_cpr->predThreshold;
4069
4070	size_t i;
4071	size_t radiusIndex;
4072	float pred_value = 0, pred_err;
4073	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
4074	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
4075
4076	float mean_diff;
4077	ptrdiff_t freq_index;
4078	size_t freq_count = 0;
4079	size_t n1_count = 1;
4080	size_t offset_count = sampleDistance - 1;
4081	size_t offset_count_2 = 0;
4082	size_t sample_count = 0;
4083	data_pos = oriData + r2 + offset_count;
4084	while(data_pos - oriData < len){
4085	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
4086	pred_err = fabs(pred_value - *data_pos);
4087	if(pred_err < realPrecision) freq_count ++;
4088	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
4089	if(radiusIndex>=maxRangeRadius)
4090	radiusIndex = maxRangeRadius - 1;
4091	intervals[radiusIndex]++;
4092
4093	mean_diff = *data_pos - mean;
4094	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
4095	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
4096	if(freq_index <= 0){
4097	freq_intervals[0] ++;
4098	}
4099	else if(freq_index >= range){
4100	freq_intervals[range - 1] ++;
4101	}
4102	else{
4103	freq_intervals[freq_index] ++;
4104	}
4105	offset_count += sampleDistance;
4106	if(offset_count >= r2){
4107	n1_count ++;
4108	offset_count_2 = n1_count % sampleDistance;
4109	data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
4110	offset_count = (sampleDistance - offset_count_2);
4111	if(offset_count == 0) offset_count ++;
4112	}
4113	else data_pos += sampleDistance;
4114	sample_count ++;
4115	}
4116	max_freq = freq_count 1.0/ sample_count;
4117
4118	//compute the appropriate number
4119	size_t targetCount = sample_count*predThreshold;
4120	size_t sum = 0;
4121	for(i=0;i<maxRangeRadius;i++)
4122	{
4123	sum += intervals[i];
4124	if(sum>targetCount)
4125	break;
4126	}
4127	if(i>=maxRangeRadius)
4128	i = maxRangeRadius-1;
4129	unsigned int accIntervals = 2*(i+1);
4130	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
4131
4132	if(powerOf2<32)
4133	powerOf2 = 32;
4134
4135	// collect frequency
4136	size_t max_sum = 0;
4137	size_t max_index = 0;
4138	size_t tmp_sum;
4139	size_t * freq_pos = freq_intervals + 1;
4140	for(size_t i=1; i<range-2; i++){
4141	tmp_sum = freq_pos[0] + freq_pos[1];
4142	if(tmp_sum > max_sum){
4143	max_sum = tmp_sum;
4144	max_index = i;
4145	}
4146	freq_pos ++;
4147	}
4148	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
4149	mean_freq = max_sum 1.0 / sample_count;
4150
4151	free(freq_intervals);
4152	free(intervals);
4153	return powerOf2;
4154	}
4155
4156	// 2D: modified for higher performance
4157	#define MIN(a, b) a<b? a : b
4158	unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float oriData, size_t r1, size_t r2, double realPrecision, size_t comp_size){
4159
4160	unsigned int quantization_intervals;
4161	float sz_sample_correct_freq = -1;//0.5; //-1
4162	float dense_pos;
4163	float mean_flush_freq;
4164	unsigned char use_mean = 0;
4165
4166	if(exe_params->optQuantMode==1)
4167	{
4168	quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
4169	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
4170	updateQuantizationInfo(quantization_intervals);
4171	}
4172	else{
4173	quantization_intervals = exe_params->intvCapacity;
4174	}
4175
4176	// calculate block dims
4177	size_t num_x, num_y;
4178	size_t block_size = 16;
4179
4180	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
4181	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
4182
4183	size_t split_index_x, split_index_y;
4184	size_t early_blockcount_x, early_blockcount_y;
4185	size_t late_blockcount_x, late_blockcount_y;
4186	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
4187	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
4188
4189	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
4190	size_t num_blocks = num_x * num_y;
4191	size_t num_elements = r1 * r2;
4192
4193	size_t dim0_offset = r2;
4194
4195	int * result_type = (int ) malloc(num_elements sizeof(int));
4196	size_t unpred_data_max_size = max_num_block_elements;
4197	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
4198	size_t total_unpred = 0;
4199	size_t unpredictable_count;
4200	float * data_pos = oriData;
4201	int * type = result_type;
4202	size_t offset_x, offset_y;
4203	size_t current_blockcount_x, current_blockcount_y;
4204
4205	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
4206	float * reg_params_pos = reg_params;
4207	// move regression part out
4208	size_t params_offset_b = num_blocks;
4209	size_t params_offset_c = 2*num_blocks;
4210	for(size_t i=0; i<num_x; i++){
4211	for(size_t j=0; j<num_y; j++){
4212	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4213	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4214	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4215	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4216
4217	data_pos = oriData + offset_x * dim0_offset + offset_y;
4218
4219	{
4220	float * cur_data_pos = data_pos;
4221	float fx = 0.0;
4222	float fy = 0.0;
4223	float f = 0;
4224	double sum_x;
4225	float curData;
4226	for(size_t i=0; i<current_blockcount_x; i++){
4227	sum_x = 0;
4228	for(size_t j=0; j<current_blockcount_y; j++){
4229	curData = *cur_data_pos;
4230	sum_x += curData;
4231	fy += curData * j;
4232	cur_data_pos ++;
4233	}
4234	fx += sum_x * i;
4235	f += sum_x;
4236	cur_data_pos += dim0_offset - current_blockcount_y;
4237	}
4238	float coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
4239	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
4240	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
4241	reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
4242	}
4243
4244	reg_params_pos ++;
4245	}
4246	}
4247
4248	//Compress coefficient arrays
4249	double precision_a, precision_b, precision_c;
4250	float rel_param_err = 0.15/3;
4251	precision_a = rel_param_err * realPrecision / late_blockcount_x;
4252	precision_b = rel_param_err * realPrecision / late_blockcount_y;
4253	precision_c = rel_param_err * realPrecision;
4254
4255	float mean = 0;
4256	use_mean = 0;
4257	if(use_mean){
4258	// compute mean
4259	double sum = 0.0;
4260	size_t mean_count = 0;
4261	for(size_t i=0; i<num_elements; i++){
4262	if(fabs(oriData[i] - dense_pos) < realPrecision){
4263	sum += oriData[i];
4264	mean_count ++;
4265	}
4266	}
4267	if(mean_count > 0) mean = sum / mean_count;
4268	}
4269
4270
4271	double tmp_realPrecision = realPrecision;
4272
4273	// use two prediction buffers for higher performance
4274	float * unpredictable_data = result_unpredictable_data;
4275	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
4276	memset(indicator, 0, num_blocks * sizeof(unsigned char));
4277	size_t reg_count = 0;
4278	size_t strip_dim_0 = early_blockcount_x + 1;
4279	size_t strip_dim_1 = r2 + 1;
4280	size_t strip_dim0_offset = strip_dim_1;
4281	unsigned char * indicator_pos = indicator;
4282	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
4283	float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
4284	memset(prediction_buffer_1, 0, prediction_buffer_size);
4285	float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
4286	memset(prediction_buffer_2, 0, prediction_buffer_size);
4287	float * cur_pb_buf = prediction_buffer_1;
4288	float * next_pb_buf = prediction_buffer_2;
4289	float * cur_pb_buf_pos;
4290	float * next_pb_buf_pos;
4291	int intvCapacity = exe_params->intvCapacity;
4292	int intvRadius = exe_params->intvRadius;
4293	int use_reg = 0;
4294
4295	reg_params_pos = reg_params;
4296	// compress the regression coefficients on the fly
4297	float last_coeffcients[3] = {0.0};
4298	int coeff_intvCapacity_sz = 65536;
4299	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
4300	int * coeff_type[3];
4301	int * coeff_result_type = (int ) malloc(num_blocks3*sizeof(int));
4302	float * coeff_unpred_data[3];
4303	float * coeff_unpredictable_data = (float ) malloc(num_blocks3*sizeof(float));
4304	double precision[3];
4305	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
4306	for(int i=0; i<3; i++){
4307	coeff_type[i] = coeff_result_type + i * num_blocks;
4308	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
4309	}
4310	int coeff_index = 0;
4311	unsigned int coeff_unpredictable_count[3] = {0};
4312	if(use_mean){
4313	type = result_type;
4314	int intvCapacity_sz = intvCapacity - 2;
4315	for(size_t i=0; i<num_x; i++){
4316	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4317	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4318	data_pos = oriData + offset_x * dim0_offset;
4319
4320	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
4321	next_pb_buf_pos = next_pb_buf + 1;
4322	float * pb_pos = cur_pb_buf_pos;
4323	float * next_pb_pos = next_pb_buf_pos;
4324
4325	for(size_t j=0; j<num_y; j++){
4326	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4327	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4328
4329	/sampling: decide which predictor to use (regression or lorenzo)/
4330	{
4331	float * cur_data_pos;
4332	float curData;
4333	float pred_reg, pred_sz;
4334	float err_sz = 0.0, err_reg = 0.0;
4335	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
4336	// [1, 9] [3, 7] [7, 3] [9, 1]
4337	int count = 0;
4338	for(int i=1; i<current_blockcount_x; i+=2){
4339	cur_data_pos = data_pos + i * dim0_offset + i;
4340	curData = *cur_data_pos;
4341	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4342	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
4343
4344	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
4345
4346	err_reg += fabs(pred_reg - curData);
4347
4348	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
4349	curData = *cur_data_pos;
4350	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4351	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
4352	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
4353
4354	err_reg += fabs(pred_reg - curData);
4355
4356	count += 2;
4357	}
4358
4359	use_reg = (err_reg < err_sz);
4360	}
4361	if(use_reg)
4362	{
4363	{
4364	/predict coefficients in current block via previous reg_block/
4365	float cur_coeff;
4366	double diff, itvNum;
4367	for(int e=0; e<3; e++){
4368	cur_coeff = reg_params_pos[e*num_blocks];
4369	diff = cur_coeff - last_coeffcients[e];
4370	itvNum = fabs(diff)/precision[e] + 1;
4371	if (itvNum < coeff_intvCapacity_sz){
4372	if (diff < 0) itvNum = -itvNum;
4373	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4374	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4375	//ganrantee comporession error against the case of machine-epsilon
4376	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4377	coeff_type[e][coeff_index] = 0;
4378	last_coeffcients[e] = cur_coeff;
4379	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4380	}
4381	}
4382	else{
4383	coeff_type[e][coeff_index] = 0;
4384	last_coeffcients[e] = cur_coeff;
4385	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4386	}
4387	}
4388	coeff_index ++;
4389	}
4390	float curData;
4391	float pred;
4392	double itvNum;
4393	double diff;
4394	size_t index = 0;
4395	size_t block_unpredictable_count = 0;
4396	float * cur_data_pos = data_pos;
4397	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4398	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4399	curData = *cur_data_pos;
4400	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4401	diff = curData - pred;
4402	itvNum = fabs(diff)/realPrecision + 1;
4403	if (itvNum < intvCapacity){
4404	if (diff < 0) itvNum = -itvNum;
4405	type[index] = (int) (itvNum/2) + intvRadius;
4406	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4407	//ganrantee comporession error against the case of machine-epsilon
4408	if(fabs(curData - pred)>realPrecision){
4409	type[index] = 0;
4410	pred = curData;
4411	unpredictable_data[block_unpredictable_count ++] = curData;
4412	}
4413	}
4414	else{
4415	type[index] = 0;
4416	pred = curData;
4417	unpredictable_data[block_unpredictable_count ++] = curData;
4418	}
4419	index ++;
4420	cur_data_pos ++;
4421	}
4422	/dealing with the last jj (boundary)/
4423	{
4424	size_t jj = current_blockcount_y - 1;
4425	curData = *cur_data_pos;
4426	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4427	diff = curData - pred;
4428	itvNum = fabs(diff)/realPrecision + 1;
4429	if (itvNum < intvCapacity){
4430	if (diff < 0) itvNum = -itvNum;
4431	type[index] = (int) (itvNum/2) + intvRadius;
4432	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4433	//ganrantee comporession error against the case of machine-epsilon
4434	if(fabs(curData - pred)>realPrecision){
4435	type[index] = 0;
4436	pred = curData;
4437	unpredictable_data[block_unpredictable_count ++] = curData;
4438	}
4439	}
4440	else{
4441	type[index] = 0;
4442	pred = curData;
4443	unpredictable_data[block_unpredictable_count ++] = curData;
4444	}
4445
4446	// assign value to block surfaces
4447	pb_pos[ii * strip_dim0_offset + jj] = pred;
4448	index ++;
4449	cur_data_pos ++;
4450	}
4451	cur_data_pos += dim0_offset - current_blockcount_y;
4452	}
4453	/dealing with the last ii (boundary)/
4454	{
4455	size_t ii = current_blockcount_x - 1;
4456	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4457	curData = *cur_data_pos;
4458	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4459	diff = curData - pred;
4460	itvNum = fabs(diff)/realPrecision + 1;
4461	if (itvNum < intvCapacity){
4462	if (diff < 0) itvNum = -itvNum;
4463	type[index] = (int) (itvNum/2) + intvRadius;
4464	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4465	//ganrantee comporession error against the case of machine-epsilon
4466	if(fabs(curData - pred)>realPrecision){
4467	type[index] = 0;
4468	pred = curData;
4469	unpredictable_data[block_unpredictable_count ++] = curData;
4470	}
4471	}
4472	else{
4473	type[index] = 0;
4474	pred = curData;
4475	unpredictable_data[block_unpredictable_count ++] = curData;
4476	}
4477	// assign value to next prediction buffer
4478	next_pb_pos[jj] = pred;
4479	index ++;
4480	cur_data_pos ++;
4481	}
4482	/dealing with the last jj (boundary)/
4483	{
4484	size_t jj = current_blockcount_y - 1;
4485	curData = *cur_data_pos;
4486	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4487	diff = curData - pred;
4488	itvNum = fabs(diff)/realPrecision + 1;
4489	if (itvNum < intvCapacity){
4490	if (diff < 0) itvNum = -itvNum;
4491	type[index] = (int) (itvNum/2) + intvRadius;
4492	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4493	//ganrantee comporession error against the case of machine-epsilon
4494	if(fabs(curData - pred)>realPrecision){
4495	type[index] = 0;
4496	pred = curData;
4497	unpredictable_data[block_unpredictable_count ++] = curData;
4498	}
4499	}
4500	else{
4501	type[index] = 0;
4502	pred = curData;
4503	unpredictable_data[block_unpredictable_count ++] = curData;
4504	}
4505
4506	// assign value to block surfaces
4507	pb_pos[ii * strip_dim0_offset + jj] = pred;
4508	// assign value to next prediction buffer
4509	next_pb_pos[jj] = pred;
4510
4511	index ++;
4512	cur_data_pos ++;
4513	}
4514	} // end ii == -1
4515	unpredictable_count = block_unpredictable_count;
4516	total_unpred += unpredictable_count;
4517	unpredictable_data += unpredictable_count;
4518	reg_count ++;
4519	}// end use_reg
4520	else{
4521	// use SZ
4522	// SZ predication
4523	unpredictable_count = 0;
4524	float * cur_pb_pos = pb_pos;
4525	float * cur_data_pos = data_pos;
4526	float curData;
4527	float pred2D;
4528	double itvNum, diff;
4529	size_t index = 0;
4530	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4531	for(size_t jj=0; jj<current_blockcount_y; jj++){
4532	curData = *cur_data_pos;
4533	if(fabs(curData - mean) <= realPrecision){
4534	// adjust type[index] to intvRadius for coherence with freq in reg
4535	type[index] = intvRadius;
4536	*cur_pb_pos = mean;
4537	}
4538	else
4539	{
4540	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4541	diff = curData - pred2D;
4542	itvNum = fabs(diff)/realPrecision + 1;
4543	if (itvNum < intvCapacity_sz){
4544	if (diff < 0) itvNum = -itvNum;
4545	type[index] = (int) (itvNum/2) + intvRadius;
4546	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4547	if(type[index] <= intvRadius) type[index] -= 1;
4548	//ganrantee comporession error against the case of machine-epsilon
4549	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4550	type[index] = 0;
4551	*cur_pb_pos = curData;
4552	unpredictable_data[unpredictable_count ++] = curData;
4553	}
4554	}
4555	else{
4556	type[index] = 0;
4557	*cur_pb_pos = curData;
4558	unpredictable_data[unpredictable_count ++] = curData;
4559	}
4560	}
4561	index ++;
4562	cur_pb_pos ++;
4563	cur_data_pos ++;
4564	}
4565	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
4566	cur_data_pos += dim0_offset - current_blockcount_y;
4567	}
4568	/dealing with the last ii (boundary)/
4569	{
4570	// ii == current_blockcount_x - 1
4571	for(size_t jj=0; jj<current_blockcount_y; jj++){
4572	curData = *cur_data_pos;
4573	if(fabs(curData - mean) <= realPrecision){
4574	// adjust type[index] to intvRadius for coherence with freq in reg
4575	type[index] = intvRadius;
4576	*cur_pb_pos = mean;
4577	}
4578	else
4579	{
4580	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4581	diff = curData - pred2D;
4582	itvNum = fabs(diff)/realPrecision + 1;
4583	if (itvNum < intvCapacity_sz){
4584	if (diff < 0) itvNum = -itvNum;
4585	type[index] = (int) (itvNum/2) + intvRadius;
4586	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4587	if(type[index] <= intvRadius) type[index] -= 1;
4588	//ganrantee comporession error against the case of machine-epsilon
4589	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4590	type[index] = 0;
4591	*cur_pb_pos = curData;
4592	unpredictable_data[unpredictable_count ++] = curData;
4593	}
4594	}
4595	else{
4596	type[index] = 0;
4597	*cur_pb_pos = curData;
4598	unpredictable_data[unpredictable_count ++] = curData;
4599	}
4600	}
4601	next_pb_pos[jj] = *cur_pb_pos;
4602	index ++;
4603	cur_pb_pos ++;
4604	cur_data_pos ++;
4605	}
4606	}
4607	total_unpred += unpredictable_count;
4608	unpredictable_data += unpredictable_count;
4609	// change indicator
4610	indicator_pos[j] = 1;
4611	}// end SZ
4612	reg_params_pos ++;
4613	data_pos += current_blockcount_y;
4614	pb_pos += current_blockcount_y;
4615	next_pb_pos += current_blockcount_y;
4616	type += current_blockcount_x * current_blockcount_y;
4617	}// end j
4618	indicator_pos += num_y;
4619	float * tmp;
4620	tmp = cur_pb_buf;
4621	cur_pb_buf = next_pb_buf;
4622	next_pb_buf = tmp;
4623	}// end i
4624	}// end use mean
4625	else{
4626	type = result_type;
4627	int intvCapacity_sz = intvCapacity - 2;
4628	for(size_t i=0; i<num_x; i++){
4629	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4630	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4631	data_pos = oriData + offset_x * dim0_offset;
4632
4633	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
4634	next_pb_buf_pos = next_pb_buf + 1;
4635	float * pb_pos = cur_pb_buf_pos;
4636	float * next_pb_pos = next_pb_buf_pos;
4637
4638	for(size_t j=0; j<num_y; j++){
4639	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4640	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4641	/sampling/
4642	{
4643	// sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
4644	float * cur_data_pos;
4645	float curData;
4646	float pred_reg, pred_sz;
4647	float err_sz = 0.0, err_reg = 0.0;
4648	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
4649	// [1, 9] [3, 7] [7, 3] [9, 1]
4650	int count = 0;
4651	for(int i=1; i<current_blockcount_x; i+=2){
4652	cur_data_pos = data_pos + i * dim0_offset + i;
4653	curData = *cur_data_pos;
4654	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4655	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
4656	err_sz += fabs(pred_sz - curData);
4657	err_reg += fabs(pred_reg - curData);
4658
4659	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
4660	curData = *cur_data_pos;
4661	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4662	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
4663	err_sz += fabs(pred_sz - curData);
4664	err_reg += fabs(pred_reg - curData);
4665
4666	count += 2;
4667	}
4668	err_sz += realPrecision * count * 0.81;
4669	use_reg = (err_reg < err_sz);
4670
4671	}
4672	if(use_reg)
4673	{
4674	{
4675	/predict coefficients in current block via previous reg_block/
4676	float cur_coeff;
4677	double diff, itvNum;
4678	for(int e=0; e<3; e++){
4679	cur_coeff = reg_params_pos[e*num_blocks];
4680	diff = cur_coeff - last_coeffcients[e];
4681	itvNum = fabs(diff)/precision[e] + 1;
4682	if (itvNum < coeff_intvCapacity_sz){
4683	if (diff < 0) itvNum = -itvNum;
4684	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4685	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4686	//ganrantee comporession error against the case of machine-epsilon
4687	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4688	coeff_type[e][coeff_index] = 0;
4689	last_coeffcients[e] = cur_coeff;
4690	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4691	}
4692	}
4693	else{
4694	coeff_type[e][coeff_index] = 0;
4695	last_coeffcients[e] = cur_coeff;
4696	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4697	}
4698	}
4699	coeff_index ++;
4700	}
4701	float curData;
4702	float pred;
4703	double itvNum;
4704	double diff;
4705	size_t index = 0;
4706	size_t block_unpredictable_count = 0;
4707	float * cur_data_pos = data_pos;
4708	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4709	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4710	curData = *cur_data_pos;
4711	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4712	diff = curData - pred;
4713	itvNum = fabs(diff)/realPrecision + 1;
4714	if (itvNum < intvCapacity){
4715	if (diff < 0) itvNum = -itvNum;
4716	type[index] = (int) (itvNum/2) + intvRadius;
4717	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4718	//ganrantee comporession error against the case of machine-epsilon
4719	if(fabs(curData - pred)>realPrecision){
4720	type[index] = 0;
4721	pred = curData;
4722	unpredictable_data[block_unpredictable_count ++] = curData;
4723	}
4724	}
4725	else{
4726	type[index] = 0;
4727	pred = curData;
4728	unpredictable_data[block_unpredictable_count ++] = curData;
4729	}
4730	index ++;
4731	cur_data_pos ++;
4732	}
4733	/dealing with the last jj (boundary)/
4734	{
4735	// jj == current_blockcount_y - 1
4736	size_t jj = current_blockcount_y - 1;
4737	curData = *cur_data_pos;
4738	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4739	diff = curData - pred;
4740	itvNum = fabs(diff)/realPrecision + 1;
4741	if (itvNum < intvCapacity){
4742	if (diff < 0) itvNum = -itvNum;
4743	type[index] = (int) (itvNum/2) + intvRadius;
4744	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4745	//ganrantee comporession error against the case of machine-epsilon
4746	if(fabs(curData - pred)>realPrecision){
4747	type[index] = 0;
4748	pred = curData;
4749	unpredictable_data[block_unpredictable_count ++] = curData;
4750	}
4751	}
4752	else{
4753	type[index] = 0;
4754	pred = curData;
4755	unpredictable_data[block_unpredictable_count ++] = curData;
4756	}
4757
4758	// assign value to block surfaces
4759	pb_pos[ii * strip_dim0_offset + jj] = pred;
4760	index ++;
4761	cur_data_pos ++;
4762	}
4763	cur_data_pos += dim0_offset - current_blockcount_y;
4764	}
4765	/dealing with the last ii (boundary)/
4766	{
4767	size_t ii = current_blockcount_x - 1;
4768	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4769	curData = *cur_data_pos;
4770	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4771	diff = curData - pred;
4772	itvNum = fabs(diff)/realPrecision + 1;
4773	if (itvNum < intvCapacity){
4774	if (diff < 0) itvNum = -itvNum;
4775	type[index] = (int) (itvNum/2) + intvRadius;
4776	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4777	//ganrantee comporession error against the case of machine-epsilon
4778	if(fabs(curData - pred)>realPrecision){
4779	type[index] = 0;
4780	pred = curData;
4781	unpredictable_data[block_unpredictable_count ++] = curData;
4782	}
4783	}
4784	else{
4785	type[index] = 0;
4786	pred = curData;
4787	unpredictable_data[block_unpredictable_count ++] = curData;
4788	}
4789	// assign value to next prediction buffer
4790	next_pb_pos[jj] = pred;
4791	index ++;
4792	cur_data_pos ++;
4793	}
4794	/dealing with the last jj (boundary)/
4795	{
4796	// jj == current_blockcount_y - 1
4797	size_t jj = current_blockcount_y - 1;
4798	curData = *cur_data_pos;
4799	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4800	diff = curData - pred;
4801	itvNum = fabs(diff)/realPrecision + 1;
4802	if (itvNum < intvCapacity){
4803	if (diff < 0) itvNum = -itvNum;
4804	type[index] = (int) (itvNum/2) + intvRadius;
4805	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4806	//ganrantee comporession error against the case of machine-epsilon
4807	if(fabs(curData - pred)>realPrecision){
4808	type[index] = 0;
4809	pred = curData;
4810	unpredictable_data[block_unpredictable_count ++] = curData;
4811	}
4812	}
4813	else{
4814	type[index] = 0;
4815	pred = curData;
4816	unpredictable_data[block_unpredictable_count ++] = curData;
4817	}
4818
4819	// assign value to block surfaces
4820	pb_pos[ii * strip_dim0_offset + jj] = pred;
4821	// assign value to next prediction buffer
4822	next_pb_pos[jj] = pred;
4823
4824	index ++;
4825	cur_data_pos ++;
4826	}
4827	} // end ii == -1
4828	unpredictable_count = block_unpredictable_count;
4829	total_unpred += unpredictable_count;
4830	unpredictable_data += unpredictable_count;
4831	reg_count ++;
4832	}// end use_reg
4833	else{
4834	// use SZ
4835	// SZ predication
4836	unpredictable_count = 0;
4837	float * cur_pb_pos = pb_pos;
4838	float * cur_data_pos = data_pos;
4839	float curData;
4840	float pred2D;
4841	double itvNum, diff;
4842	size_t index = 0;
4843	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4844	for(size_t jj=0; jj<current_blockcount_y; jj++){
4845	curData = *cur_data_pos;
4846
4847	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4848	diff = curData - pred2D;
4849	itvNum = fabs(diff)/realPrecision + 1;
4850	if (itvNum < intvCapacity_sz){
4851	if (diff < 0) itvNum = -itvNum;
4852	type[index] = (int) (itvNum/2) + intvRadius;
4853	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4854	//ganrantee comporession error against the case of machine-epsilon
4855	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4856	type[index] = 0;
4857	*cur_pb_pos = curData;
4858	unpredictable_data[unpredictable_count ++] = curData;
4859	}
4860	}
4861	else{
4862	type[index] = 0;
4863	*cur_pb_pos = curData;
4864	unpredictable_data[unpredictable_count ++] = curData;
4865	}
4866
4867	index ++;
4868	cur_pb_pos ++;
4869	cur_data_pos ++;
4870	}
4871	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
4872	cur_data_pos += dim0_offset - current_blockcount_y;
4873	}
4874	/dealing with the last ii (boundary)/
4875	{
4876	// ii == current_blockcount_x - 1
4877	for(size_t jj=0; jj<current_blockcount_y; jj++){
4878	curData = *cur_data_pos;
4879
4880	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4881	diff = curData - pred2D;
4882	itvNum = fabs(diff)/realPrecision + 1;
4883	if (itvNum < intvCapacity_sz){
4884	if (diff < 0) itvNum = -itvNum;
4885	type[index] = (int) (itvNum/2) + intvRadius;
4886	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4887	//ganrantee comporession error against the case of machine-epsilon
4888	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4889	type[index] = 0;
4890	*cur_pb_pos = curData;
4891	unpredictable_data[unpredictable_count ++] = curData;
4892	}
4893	}
4894	else{
4895	type[index] = 0;
4896	*cur_pb_pos = curData;
4897	unpredictable_data[unpredictable_count ++] = curData;
4898	}
4899	next_pb_pos[jj] = *cur_pb_pos;
4900	index ++;
4901	cur_pb_pos ++;
4902	cur_data_pos ++;
4903	}
4904	}
4905	total_unpred += unpredictable_count;
4906	unpredictable_data += unpredictable_count;
4907	// change indicator
4908	indicator_pos[j] = 1;
4909	}// end SZ
4910	reg_params_pos ++;
4911	data_pos += current_blockcount_y;
4912	pb_pos += current_blockcount_y;
4913	next_pb_pos += current_blockcount_y;
4914	type += current_blockcount_x * current_blockcount_y;
4915	}// end j
4916	indicator_pos += num_y;
4917	float * tmp;
4918	tmp = cur_pb_buf;
4919	cur_pb_buf = next_pb_buf;
4920	next_pb_buf = tmp;
4921	}// end i
4922	}
4923	free(prediction_buffer_1);
4924	free(prediction_buffer_2);
4925
4926	int stateNum = 2*quantization_intervals;
4927	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4928
4929	size_t nodeCount = 0;
4930	size_t i = 0;
4931	init(huffmanTree, result_type, num_elements);
4932	for (i = 0; i < stateNum; i++)
4933	if (huffmanTree->code[i]) nodeCount++;
4934	nodeCount = nodeCount*2-1;
4935
4936	unsigned char *treeBytes;
4937	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4938
4939	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
4940	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
4941	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
4942	unsigned char * result_pos = result;
4943	initRandomAccessBytes(result_pos);
4944	result_pos += meta_data_offset;
4945
4946	sizeToBytes(result_pos, num_elements);
4947	result_pos += exe_params->SZ_SIZE_TYPE;
4948
4949	intToBytes_bigEndian(result_pos, block_size);
4950	result_pos += sizeof(int);
4951	doubleToBytes(result_pos, realPrecision);
4952	result_pos += sizeof(double);
4953	intToBytes_bigEndian(result_pos, quantization_intervals);
4954	result_pos += sizeof(int);
4955	intToBytes_bigEndian(result_pos, treeByteSize);
4956	result_pos += sizeof(int);
4957	intToBytes_bigEndian(result_pos, nodeCount);
4958	result_pos += sizeof(int);
4959	memcpy(result_pos, treeBytes, treeByteSize);
4960	result_pos += treeByteSize;
4961	free(treeBytes);
4962
4963	memcpy(result_pos, &use_mean, sizeof(unsigned char));
4964	result_pos += sizeof(unsigned char);
4965	memcpy(result_pos, &mean, sizeof(float));
4966	result_pos += sizeof(float);
4967
4968	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
4969	result_pos += indicator_size;
4970
4971	//convert the lead/mid/resi to byte stream
4972	if(reg_count>0){
4973	for(int e=0; e<3; e++){
4974	int stateNum = 2*coeff_intvCapacity_sz;
4975	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4976	size_t nodeCount = 0;
4977	init(huffmanTree, coeff_type[e], reg_count);
4978	size_t i = 0;
4979	for (i = 0; i < huffmanTree->stateNum; i++)
4980	if (huffmanTree->code[i]) nodeCount++;
4981	nodeCount = nodeCount*2-1;
4982	unsigned char *treeBytes;
4983	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4984	doubleToBytes(result_pos, precision[e]);
4985	result_pos += sizeof(double);
4986	intToBytes_bigEndian(result_pos, coeff_intvRadius);
4987	result_pos += sizeof(int);
4988	intToBytes_bigEndian(result_pos, treeByteSize);
4989	result_pos += sizeof(int);
4990	intToBytes_bigEndian(result_pos, nodeCount);
4991	result_pos += sizeof(int);
4992	memcpy(result_pos, treeBytes, treeByteSize);
4993	result_pos += treeByteSize;
4994	free(treeBytes);
4995	size_t typeArray_size = 0;
4996	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
4997	sizeToBytes(result_pos, typeArray_size);
4998	result_pos += sizeof(size_t) + typeArray_size;
4999	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
5000	result_pos += sizeof(int);
5001	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
5002	result_pos += coeff_unpredictable_count[e]*sizeof(float);
5003	SZ_ReleaseHuffman(huffmanTree);
5004	}
5005	}
5006	free(coeff_result_type);
5007	free(coeff_unpredictable_data);
5008
5009	//record the number of unpredictable data and also store them
5010	memcpy(result_pos, &total_unpred, sizeof(size_t));
5011	result_pos += sizeof(size_t);
5012	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
5013	result_pos += total_unpred * sizeof(float);
5014	size_t typeArray_size = 0;
5015	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
5016	result_pos += typeArray_size;
5017
5018	size_t totalEncodeSize = result_pos - result;
5019	free(indicator);
5020	free(result_unpredictable_data);
5021	free(result_type);
5022	free(reg_params);
5023
5024	SZ_ReleaseHuffman(huffmanTree);
5025	*comp_size = totalEncodeSize;
5026
5027	return result;
5028	}
5029
5030	unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float dense_pos, float * max_freq, float * mean_freq)
5031	{
5032	float mean = 0.0;
5033	size_t len = r1 * r2 * r3;
5034	size_t mean_distance = (int) (sqrt(len));
5035	float * data_pos = oriData;
5036	size_t offset_count = 0;
5037	size_t offset_count_2 = 0;
5038	size_t mean_count = 0;
5039	while(data_pos - oriData < len){
5040	mean += *data_pos;
5041	mean_count ++;
5042	data_pos += mean_distance;
5043	offset_count += mean_distance;
5044	offset_count_2 += mean_distance;
5045	if(offset_count >= r3){
5046	offset_count = 0;
5047	data_pos -= 1;
5048	}
5049	if(offset_count_2 >= r2 * r3){
5050	offset_count_2 = 0;
5051	data_pos -= 1;
5052	}
5053	}
5054	if(mean_count > 0) mean /= mean_count;
5055	size_t range = 8192;
5056	size_t radius = 4096;
5057	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
5058	memset(freq_intervals, 0, range*sizeof(size_t));
5059
5060	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
5061	int sampleDistance = confparams_cpr->sampleDistance;
5062	float predThreshold = confparams_cpr->predThreshold;
5063
5064	size_t i;
5065	size_t radiusIndex;
5066	size_t r23=r2*r3;
5067	float pred_value = 0, pred_err;
5068	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
5069	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
5070
5071	float mean_diff;
5072	ptrdiff_t freq_index;
5073	size_t freq_count = 0;
5074	size_t sample_count = 0;
5075
5076	offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
5077	data_pos = oriData + r23 + r3 + offset_count;
5078	size_t n1_count = 1, n2_count = 1; // count i,j sum
5079
5080	while(data_pos - oriData < len){
5081
5082	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
5083	pred_err = fabs(pred_value - *data_pos);
5084	if(pred_err < realPrecision) freq_count ++;
5085	radiusIndex = (pred_err/realPrecision+1)/2;
5086	if(radiusIndex>=maxRangeRadius)
5087	{
5088	radiusIndex = maxRangeRadius - 1;
5089	}
5090	intervals[radiusIndex]++;
5091
5092	mean_diff = *data_pos - mean;
5093	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
5094	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
5095	if(freq_index <= 0){
5096	freq_intervals[0] ++;
5097	}
5098	else if(freq_index >= range){
5099	freq_intervals[range - 1] ++;
5100	}
5101	else{
5102	freq_intervals[freq_index] ++;
5103	}
5104	offset_count += sampleDistance;
5105	if(offset_count >= r3){
5106	n2_count ++;
5107	if(n2_count == r2){
5108	n1_count ++;
5109	n2_count = 1;
5110	data_pos += r3;
5111	}
5112	offset_count_2 = (n1_count + n2_count) % sampleDistance;
5113	data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
5114	offset_count = (sampleDistance - offset_count_2);
5115	if(offset_count == 0) offset_count ++;
5116	}
5117	else data_pos += sampleDistance;
5118	sample_count ++;
5119	}
5120	max_freq = freq_count 1.0/ sample_count;
5121
5122	//compute the appropriate number
5123	size_t targetCount = sample_count*predThreshold;
5124	size_t sum = 0;
5125	for(i=0;i<maxRangeRadius;i++)
5126	{
5127	sum += intervals[i];
5128	if(sum>targetCount)
5129	break;
5130	}
5131	if(i>=maxRangeRadius)
5132	i = maxRangeRadius-1;
5133	unsigned int accIntervals = 2*(i+1);
5134	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
5135
5136	if(powerOf2<32)
5137	powerOf2 = 32;
5138	// collect frequency
5139	size_t max_sum = 0;
5140	size_t max_index = 0;
5141	size_t tmp_sum;
5142	size_t * freq_pos = freq_intervals + 1;
5143	for(size_t i=1; i<range-2; i++){
5144	tmp_sum = freq_pos[0] + freq_pos[1];
5145	if(tmp_sum > max_sum){
5146	max_sum = tmp_sum;
5147	max_index = i;
5148	}
5149	freq_pos ++;
5150	}
5151	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
5152	mean_freq = max_sum 1.0 / sample_count;
5153
5154	free(freq_intervals);
5155	free(intervals);
5156	return powerOf2;
5157	}
5158
5159
5160	// 3D: modified for higher performance
5161	unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
5162
5163	#ifdef HAVE_TIMECMPR
5164	float* decData = NULL;
5165	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5166	decData = (float*)(multisteps->hist_data);
5167	#endif
5168
5169	unsigned int quantization_intervals;
5170	float sz_sample_correct_freq = -1;//0.5; //-1
5171	float dense_pos;
5172	float mean_flush_freq;
5173	unsigned char use_mean = 0;
5174
5175	// calculate block dims
5176	size_t num_x, num_y, num_z;
5177	size_t block_size = 6;
5178	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
5179	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
5180	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
5181
5182	size_t split_index_x, split_index_y, split_index_z;
5183	size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
5184	size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
5185	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
5186	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
5187	SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
5188
5189	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
5190	size_t num_blocks = num_x * num_y * num_z;
5191	size_t num_elements = r1 * r2 * r3;
5192
5193	size_t dim0_offset = r2 * r3;
5194	size_t dim1_offset = r3;
5195
5196	int * result_type = (int ) malloc(num_elements sizeof(int));
5197	size_t unpred_data_max_size = max_num_block_elements;
5198	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
5199	size_t total_unpred = 0;
5200	size_t unpredictable_count;
5201	size_t max_unpred_count = 0;
5202	float * data_pos = oriData;
5203	int * type = result_type;
5204	size_t type_offset;
5205	size_t offset_x, offset_y, offset_z;
5206	size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
5207
5208	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
5209	float * reg_params_pos = reg_params;
5210	// move regression part out
5211	size_t params_offset_b = num_blocks;
5212	size_t params_offset_c = 2*num_blocks;
5213	size_t params_offset_d = 3*num_blocks;
5214	for(size_t i=0; i<num_x; i++){
5215	for(size_t j=0; j<num_y; j++){
5216	for(size_t k=0; k<num_z; k++){
5217	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
5218	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
5219	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
5220	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
5221	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
5222	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
5223
5224	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
5225	/Calculate regression coefficients/
5226	{
5227	float * cur_data_pos = data_pos;
5228	float fx = 0.0;
5229	float fy = 0.0;
5230	float fz = 0.0;
5231	float f = 0;
5232	float sum_x, sum_y;
5233	float curData;
5234	for(size_t i=0; i<current_blockcount_x; i++){
5235	sum_x = 0;
5236	for(size_t j=0; j<current_blockcount_y; j++){
5237	sum_y = 0;
5238	for(size_t k=0; k<current_blockcount_z; k++){
5239	curData = *cur_data_pos;
5240	// f += curData;
5241	// fx += curData * i;
5242	// fy += curData * j;
5243	// fz += curData * k;
5244	sum_y += curData;
5245	fz += curData * k;
5246	cur_data_pos ++;
5247	}
5248	fy += sum_y * j;
5249	sum_x += sum_y;
5250	cur_data_pos += dim1_offset - current_blockcount_z;
5251	}
5252	fx += sum_x * i;
5253	f += sum_x;
5254	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5255	}
5256	float coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
5257	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
5258	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
5259	reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
5260	reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
5261	}
5262	reg_params_pos ++;
5263	}
5264	}
5265	}
5266
5267	//Compress coefficient arrays
5268	double precision_a, precision_b, precision_c, precision_d;
5269	float rel_param_err = 0.025;
5270	precision_a = rel_param_err * realPrecision / late_blockcount_x;
5271	precision_b = rel_param_err * realPrecision / late_blockcount_y;
5272	precision_c = rel_param_err * realPrecision / late_blockcount_z;
5273	precision_d = rel_param_err * realPrecision;
5274
5275	if(exe_params->optQuantMode==1)
5276	{
5277	quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
5278	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
5279	updateQuantizationInfo(quantization_intervals);
5280	}
5281	else{
5282	quantization_intervals = exe_params->intvCapacity;
5283	}
5284
5285	float mean = 0;
5286	if(use_mean){
5287	// compute mean
5288	double sum = 0.0;
5289	size_t mean_count = 0;
5290	for(size_t i=0; i<num_elements; i++){
5291	if(fabs(oriData[i] - dense_pos) < realPrecision){
5292	sum += oriData[i];
5293	mean_count ++;
5294	}
5295	}
5296	if(mean_count > 0) mean = sum / mean_count;
5297	}
5298
5299	double tmp_realPrecision = realPrecision;
5300
5301	// use two prediction buffers for higher performance
5302	float * unpredictable_data = result_unpredictable_data;
5303	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
5304	memset(indicator, 0, num_blocks * sizeof(unsigned char));
5305	size_t reg_count = 0;
5306	size_t strip_dim_0 = early_blockcount_x + 1;
5307	size_t strip_dim_1 = r2 + 1;
5308	size_t strip_dim_2 = r3 + 1;
5309	size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
5310	size_t strip_dim1_offset = strip_dim_2;
5311	unsigned char * indicator_pos = indicator;
5312
5313	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
5314	float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
5315	memset(prediction_buffer_1, 0, prediction_buffer_size);
5316	float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
5317	memset(prediction_buffer_2, 0, prediction_buffer_size);
5318	float * cur_pb_buf = prediction_buffer_1;
5319	float * next_pb_buf = prediction_buffer_2;
5320	float * cur_pb_buf_pos;
5321	float * next_pb_buf_pos;
5322	int intvCapacity = exe_params->intvCapacity;
5323	int intvRadius = exe_params->intvRadius;
5324	int use_reg = 0;
5325	float noise = realPrecision * 1.22;
5326
5327	reg_params_pos = reg_params;
5328	// compress the regression coefficients on the fly
5329	float last_coeffcients[4] = {0.0};
5330	int coeff_intvCapacity_sz = 65536;
5331	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
5332	int * coeff_type[4];
5333	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
5334	float * coeff_unpred_data[4];
5335	float * coeff_unpredictable_data = (float ) malloc(num_blocks4*sizeof(float));
5336	double precision[4];
5337	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
5338	for(int i=0; i<4; i++){
5339	coeff_type[i] = coeff_result_type + i * num_blocks;
5340	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
5341	}
5342	int coeff_index = 0;
5343	unsigned int coeff_unpredictable_count[4] = {0};
5344
5345	if(use_mean){
5346	int intvCapacity_sz = intvCapacity - 2;
5347	for(size_t i=0; i<num_x; i++){
5348	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
5349	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
5350	for(size_t j=0; j<num_y; j++){
5351	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
5352	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
5353	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
5354	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
5355	type = result_type + type_offset;
5356
5357	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
5358	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
5359	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
5360
5361	size_t current_blockcount_z;
5362	float * pb_pos = cur_pb_buf_pos;
5363	float * next_pb_pos = next_pb_buf_pos;
5364	size_t strip_unpredictable_count = 0;
5365	for(size_t k=0; k<num_z; k++){
5366	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
5367	#ifdef HAVE_TIMECMPR
5368	size_t offset_z = 0;
5369	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
5370	size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
5371	#endif
5372	/sampling and decide which predictor/
5373	{
5374	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
5375	float * cur_data_pos;
5376	float curData;
5377	float pred_reg, pred_sz;
5378	float err_sz = 0.0, err_reg = 0.0;
5379	int bmi = 0;
5380	if(i>0 && j>0 && k>0){
5381	for(int i=0; i<block_size; i++){
5382	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5383	curData = *cur_data_pos;
5384	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5385	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5386	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5387	err_reg += fabs(pred_reg - curData);
5388
5389	bmi = block_size - i;
5390	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5391	curData = *cur_data_pos;
5392	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5393	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5394	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5395	err_reg += fabs(pred_reg - curData);
5396
5397	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5398	curData = *cur_data_pos;
5399	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5400	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5401	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5402	err_reg += fabs(pred_reg - curData);
5403
5404	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5405	curData = *cur_data_pos;
5406	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5407	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5408	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5409	err_reg += fabs(pred_reg - curData);
5410	}
5411	}
5412	else{
5413	for(int i=1; i<block_size; i++){
5414	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5415	curData = *cur_data_pos;
5416	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5417	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5418	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5419	err_reg += fabs(pred_reg - curData);
5420
5421	bmi = block_size - i;
5422	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5423	curData = *cur_data_pos;
5424	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5425	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5426	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5427	err_reg += fabs(pred_reg - curData);
5428
5429	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5430	curData = *cur_data_pos;
5431	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5432	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5433	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5434	err_reg += fabs(pred_reg - curData);
5435
5436	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5437	curData = *cur_data_pos;
5438	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5439	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5440	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5441	err_reg += fabs(pred_reg - curData);
5442
5443	}
5444	}
5445	use_reg = (err_reg < err_sz);
5446	}
5447	if(use_reg){
5448	{
5449	/predict coefficients in current block via previous reg_block/
5450	float cur_coeff;
5451	double diff, itvNum;
5452	for(int e=0; e<4; e++){
5453	cur_coeff = reg_params_pos[e*num_blocks];
5454	diff = cur_coeff - last_coeffcients[e];
5455	itvNum = fabs(diff)/precision[e] + 1;
5456	if (itvNum < coeff_intvCapacity_sz){
5457	if (diff < 0) itvNum = -itvNum;
5458	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
5459	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
5460	//ganrantee comporession error against the case of machine-epsilon
5461	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
5462	coeff_type[e][coeff_index] = 0;
5463	last_coeffcients[e] = cur_coeff;
5464	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5465	}
5466	}
5467	else{
5468	coeff_type[e][coeff_index] = 0;
5469	last_coeffcients[e] = cur_coeff;
5470	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5471	}
5472	}
5473	coeff_index ++;
5474	}
5475	float curData;
5476	float pred;
5477	double itvNum;
5478	double diff;
5479	size_t index = 0;
5480	size_t block_unpredictable_count = 0;
5481	float * cur_data_pos = data_pos;
5482	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5483	for(size_t jj=0; jj<current_blockcount_y; jj++){
5484	for(size_t kk=0; kk<current_blockcount_z; kk++){
5485	curData = *cur_data_pos;
5486	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5487	diff = curData - pred;
5488	itvNum = fabs(diff)/tmp_realPrecision + 1;
5489	if (itvNum < intvCapacity){
5490	if (diff < 0) itvNum = -itvNum;
5491	type[index] = (int) (itvNum/2) + intvRadius;
5492	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5493	//ganrantee comporession error against the case of machine-epsilon
5494	if(fabs(curData - pred)>tmp_realPrecision){
5495	type[index] = 0;
5496	pred = curData;
5497	unpredictable_data[block_unpredictable_count ++] = curData;
5498	}
5499	}
5500	else{
5501	type[index] = 0;
5502	pred = curData;
5503	unpredictable_data[block_unpredictable_count ++] = curData;
5504	}
5505
5506	#ifdef HAVE_TIMECMPR
5507	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5508	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5509	decData[block_offset + point_offset] = pred;
5510	#endif
5511
5512	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5513	// assign value to block surfaces
5514	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5515	}
5516	index ++;
5517	cur_data_pos ++;
5518	}
5519	cur_data_pos += dim1_offset - current_blockcount_z;
5520	}
5521	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5522	}
5523	/dealing with the last ii (boundary)/
5524	{
5525	// ii == current_blockcount_x - 1
5526	size_t ii = current_blockcount_x - 1;
5527	for(size_t jj=0; jj<current_blockcount_y; jj++){
5528	for(size_t kk=0; kk<current_blockcount_z; kk++){
5529	curData = *cur_data_pos;
5530	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5531	diff = curData - pred;
5532	itvNum = fabs(diff)/tmp_realPrecision + 1;
5533	if (itvNum < intvCapacity){
5534	if (diff < 0) itvNum = -itvNum;
5535	type[index] = (int) (itvNum/2) + intvRadius;
5536	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5537	//ganrantee comporession error against the case of machine-epsilon
5538	if(fabs(curData - pred)>tmp_realPrecision){
5539	type[index] = 0;
5540	pred = curData;
5541	unpredictable_data[block_unpredictable_count ++] = curData;
5542	}
5543	}
5544	else{
5545	type[index] = 0;
5546	pred = curData;
5547	unpredictable_data[block_unpredictable_count ++] = curData;
5548	}
5549
5550	#ifdef HAVE_TIMECMPR
5551	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5552	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5553	decData[block_offset + point_offset] = pred;
5554	#endif
5555
5556	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5557	// assign value to block surfaces
5558	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5559	}
5560	// assign value to next prediction buffer
5561	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
5562	index ++;
5563	cur_data_pos ++;
5564	}
5565	cur_data_pos += dim1_offset - current_blockcount_z;
5566	}
5567	}
5568	unpredictable_count = block_unpredictable_count;
5569	strip_unpredictable_count += unpredictable_count;
5570	unpredictable_data += unpredictable_count;
5571
5572	reg_count ++;
5573	}
5574	else{
5575	// use SZ
5576	// SZ predication
5577	unpredictable_count = 0;
5578	float * cur_pb_pos = pb_pos;
5579	float * cur_data_pos = data_pos;
5580	float curData;
5581	float pred3D;
5582	double itvNum, diff;
5583	size_t index = 0;
5584	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5585	for(size_t jj=0; jj<current_blockcount_y; jj++){
5586	for(size_t kk=0; kk<current_blockcount_z; kk++){
5587
5588	curData = *cur_data_pos;
5589	if(fabs(curData - mean) <= realPrecision){
5590	// adjust type[index] to intvRadius for coherence with freq in reg
5591	type[index] = intvRadius;
5592	*cur_pb_pos = mean;
5593	}
5594	else
5595	{
5596	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5597	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5598	diff = curData - pred3D;
5599	itvNum = fabs(diff)/realPrecision + 1;
5600	if (itvNum < intvCapacity_sz){
5601	if (diff < 0) itvNum = -itvNum;
5602	type[index] = (int) (itvNum/2) + intvRadius;
5603	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5604	if(type[index] <= intvRadius) type[index] -= 1;
5605	//ganrantee comporession error against the case of machine-epsilon
5606	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5607	type[index] = 0;
5608	*cur_pb_pos = curData;
5609	unpredictable_data[unpredictable_count ++] = curData;
5610	}
5611	}
5612	else{
5613	type[index] = 0;
5614	*cur_pb_pos = curData;
5615	unpredictable_data[unpredictable_count ++] = curData;
5616	}
5617	}
5618	#ifdef HAVE_TIMECMPR
5619	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5620	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5621	decData[block_offset + point_offset] = *cur_pb_pos;
5622	#endif
5623
5624	index ++;
5625	cur_pb_pos ++;
5626	cur_data_pos ++;
5627	}
5628	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5629	cur_data_pos += dim1_offset - current_blockcount_z;
5630	}
5631	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
5632	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5633	}
5634	/dealing with the last ii (boundary)/
5635	{
5636	// ii == current_blockcount_x - 1
5637	for(size_t jj=0; jj<current_blockcount_y; jj++){
5638	for(size_t kk=0; kk<current_blockcount_z; kk++){
5639
5640	curData = *cur_data_pos;
5641	if(fabs(curData - mean) <= realPrecision){
5642	// adjust type[index] to intvRadius for coherence with freq in reg
5643	type[index] = intvRadius;
5644	*cur_pb_pos = mean;
5645	}
5646	else
5647	{
5648	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5649	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5650	diff = curData - pred3D;
5651	itvNum = fabs(diff)/realPrecision + 1;
5652	if (itvNum < intvCapacity_sz){
5653	if (diff < 0) itvNum = -itvNum;
5654	type[index] = (int) (itvNum/2) + intvRadius;
5655	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5656	if(type[index] <= intvRadius) type[index] -= 1;
5657	//ganrantee comporession error against the case of machine-epsilon
5658	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5659	type[index] = 0;
5660	*cur_pb_pos = curData;
5661	unpredictable_data[unpredictable_count ++] = curData;
5662	}
5663	}
5664	else{
5665	type[index] = 0;
5666	*cur_pb_pos = curData;
5667	unpredictable_data[unpredictable_count ++] = curData;
5668	}
5669	}
5670	#ifdef HAVE_TIMECMPR
5671	size_t ii = current_blockcount_x - 1;
5672	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5673	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5674	decData[block_offset + point_offset] = *cur_pb_pos;
5675	#endif
5676
5677	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
5678	index ++;
5679	cur_pb_pos ++;
5680	cur_data_pos ++;
5681	}
5682	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5683	cur_data_pos += dim1_offset - current_blockcount_z;
5684	}
5685	}
5686	strip_unpredictable_count += unpredictable_count;
5687	unpredictable_data += unpredictable_count;
5688	// change indicator
5689	indicator_pos[k] = 1;
5690	}// end SZ
5691
5692	reg_params_pos ++;
5693	data_pos += current_blockcount_z;
5694	pb_pos += current_blockcount_z;
5695	next_pb_pos += current_blockcount_z;
5696	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
5697
5698	} // end k
5699
5700	if(strip_unpredictable_count > max_unpred_count){
5701	max_unpred_count = strip_unpredictable_count;
5702	}
5703	total_unpred += strip_unpredictable_count;
5704	indicator_pos += num_z;
5705	}// end j
5706	float * tmp;
5707	tmp = cur_pb_buf;
5708	cur_pb_buf = next_pb_buf;
5709	next_pb_buf = tmp;
5710	}// end i
5711	}
5712	else{
5713	int intvCapacity_sz = intvCapacity - 2;
5714	for(size_t i=0; i<num_x; i++){
5715	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
5716	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
5717
5718	for(size_t j=0; j<num_y; j++){
5719	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
5720	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
5721	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
5722	// copy bottom plane from plane buffer
5723	// memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(float));
5724	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
5725	type = result_type + type_offset;
5726
5727	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
5728	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
5729	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
5730
5731	size_t current_blockcount_z;
5732	float * pb_pos = cur_pb_buf_pos;
5733	float * next_pb_pos = next_pb_buf_pos;
5734	size_t strip_unpredictable_count = 0;
5735	for(size_t k=0; k<num_z; k++){
5736	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
5737	#ifdef HAVE_TIMECMPR
5738	size_t offset_z = 0;
5739	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
5740	size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
5741	#endif
5742	/sampling/
5743	{
5744	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
5745	float * cur_data_pos;
5746	float curData;
5747	float pred_reg, pred_sz;
5748	float err_sz = 0.0, err_reg = 0.0;
5749	int bmi;
5750	if(i>0 && j>0 && k>0){
5751	for(int i=0; i<block_size; i++){
5752	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5753	curData = *cur_data_pos;
5754	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5755	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5756	err_sz += fabs(pred_sz - curData) + noise;
5757	err_reg += fabs(pred_reg - curData);
5758
5759	bmi = block_size - i;
5760	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5761	curData = *cur_data_pos;
5762	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5763	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5764	err_sz += fabs(pred_sz - curData) + noise;
5765	err_reg += fabs(pred_reg - curData);
5766
5767	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5768	curData = *cur_data_pos;
5769	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5770	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5771	err_sz += fabs(pred_sz - curData) + noise;
5772	err_reg += fabs(pred_reg - curData);
5773
5774	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5775	curData = *cur_data_pos;
5776	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5777	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5778	err_sz += fabs(pred_sz - curData) + noise;
5779	err_reg += fabs(pred_reg - curData);
5780	}
5781	}
5782	else{
5783	for(int i=1; i<block_size; i++){
5784	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5785	curData = *cur_data_pos;
5786	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5787	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5788	err_sz += fabs(pred_sz - curData) + noise;
5789	err_reg += fabs(pred_reg - curData);
5790
5791	bmi = block_size - i;
5792	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5793	curData = *cur_data_pos;
5794	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5795	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5796	err_sz += fabs(pred_sz - curData) + noise;
5797	err_reg += fabs(pred_reg - curData);
5798
5799	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5800	curData = *cur_data_pos;
5801	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5802	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5803	err_sz += fabs(pred_sz - curData) + noise;
5804	err_reg += fabs(pred_reg - curData);
5805
5806	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5807	curData = *cur_data_pos;
5808	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5809	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5810	err_sz += fabs(pred_sz - curData) + noise;
5811	err_reg += fabs(pred_reg - curData);
5812	}
5813	}
5814	use_reg = (err_reg < err_sz);
5815
5816	}
5817	if(use_reg)
5818	{
5819	{
5820	/predict coefficients in current block via previous reg_block/
5821	float cur_coeff;
5822	double diff, itvNum;
5823	for(int e=0; e<4; e++){
5824	cur_coeff = reg_params_pos[e*num_blocks];
5825	diff = cur_coeff - last_coeffcients[e];
5826	itvNum = fabs(diff)/precision[e] + 1;
5827	if (itvNum < coeff_intvCapacity_sz){
5828	if (diff < 0) itvNum = -itvNum;
5829	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
5830	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
5831	//ganrantee comporession error against the case of machine-epsilon
5832	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
5833	coeff_type[e][coeff_index] = 0;
5834	last_coeffcients[e] = cur_coeff;
5835	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5836	}
5837	}
5838	else{
5839	coeff_type[e][coeff_index] = 0;
5840	last_coeffcients[e] = cur_coeff;
5841	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5842	}
5843	}
5844	coeff_index ++;
5845	}
5846	float curData;
5847	float pred;
5848	double itvNum;
5849	double diff;
5850	size_t index = 0;
5851	size_t block_unpredictable_count = 0;
5852	float * cur_data_pos = data_pos;
5853	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5854	for(size_t jj=0; jj<current_blockcount_y; jj++){
5855	for(size_t kk=0; kk<current_blockcount_z; kk++){
5856
5857	curData = *cur_data_pos;
5858	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5859	diff = curData - pred;
5860	itvNum = fabs(diff)/tmp_realPrecision + 1;
5861	if (itvNum < intvCapacity){
5862	if (diff < 0) itvNum = -itvNum;
5863	type[index] = (int) (itvNum/2) + intvRadius;
5864	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5865	//ganrantee comporession error against the case of machine-epsilon
5866	if(fabs(curData - pred)>tmp_realPrecision){
5867	type[index] = 0;
5868	pred = curData;
5869	unpredictable_data[block_unpredictable_count ++] = curData;
5870	}
5871	}
5872	else{
5873	type[index] = 0;
5874	pred = curData;
5875	unpredictable_data[block_unpredictable_count ++] = curData;
5876	}
5877
5878	#ifdef HAVE_TIMECMPR
5879	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5880	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5881	decData[block_offset + point_offset] = pred;
5882	#endif
5883
5884
5885	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5886	// assign value to block surfaces
5887	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5888	}
5889	index ++;
5890	cur_data_pos ++;
5891	}
5892	cur_data_pos += dim1_offset - current_blockcount_z;
5893	}
5894	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5895	}
5896	/dealing with the last ii (boundary)/
5897	{
5898	// ii == current_blockcount_x - 1
5899	size_t ii = current_blockcount_x - 1;
5900	for(size_t jj=0; jj<current_blockcount_y; jj++){
5901	for(size_t kk=0; kk<current_blockcount_z; kk++){
5902	curData = *cur_data_pos;
5903	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5904	diff = curData - pred;
5905	itvNum = fabs(diff)/tmp_realPrecision + 1;
5906	if (itvNum < intvCapacity){
5907	if (diff < 0) itvNum = -itvNum;
5908	type[index] = (int) (itvNum/2) + intvRadius;
5909	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5910	//ganrantee comporession error against the case of machine-epsilon
5911	if(fabs(curData - pred)>tmp_realPrecision){
5912	type[index] = 0;
5913	pred = curData;
5914	unpredictable_data[block_unpredictable_count ++] = curData;
5915	}
5916	}
5917	else{
5918	type[index] = 0;
5919	pred = curData;
5920	unpredictable_data[block_unpredictable_count ++] = curData;
5921	}
5922
5923	#ifdef HAVE_TIMECMPR
5924	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5925	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5926	decData[block_offset + point_offset] = pred;
5927	#endif
5928
5929	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5930	// assign value to block surfaces
5931	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5932	}
5933	// assign value to next prediction buffer
5934	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
5935	index ++;
5936	cur_data_pos ++;
5937	}
5938	cur_data_pos += dim1_offset - current_blockcount_z;
5939	}
5940	}
5941	unpredictable_count = block_unpredictable_count;
5942	strip_unpredictable_count += unpredictable_count;
5943	unpredictable_data += unpredictable_count;
5944	reg_count ++;
5945	}
5946	else{
5947	// use SZ
5948	// SZ predication
5949	unpredictable_count = 0;
5950	float * cur_pb_pos = pb_pos;
5951	float * cur_data_pos = data_pos;
5952	float curData;
5953	float pred3D;
5954	double itvNum, diff;
5955	size_t index = 0;
5956	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5957	for(size_t jj=0; jj<current_blockcount_y; jj++){
5958	for(size_t kk=0; kk<current_blockcount_z; kk++){
5959
5960	curData = *cur_data_pos;
5961	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5962	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5963	diff = curData - pred3D;
5964	itvNum = fabs(diff)/realPrecision + 1;
5965	if (itvNum < intvCapacity_sz){
5966	if (diff < 0) itvNum = -itvNum;
5967	type[index] = (int) (itvNum/2) + intvRadius;
5968	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5969	//ganrantee comporession error against the case of machine-epsilon
5970	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5971	type[index] = 0;
5972	*cur_pb_pos = curData;
5973	unpredictable_data[unpredictable_count ++] = curData;
5974	}
5975	}
5976	else{
5977	type[index] = 0;
5978	*cur_pb_pos = curData;
5979	unpredictable_data[unpredictable_count ++] = curData;
5980	}
5981
5982	#ifdef HAVE_TIMECMPR
5983	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5984	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5985	decData[block_offset + point_offset] = *cur_pb_pos;
5986	#endif
5987	index ++;
5988	cur_pb_pos ++;
5989	cur_data_pos ++;
5990	}
5991	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5992	cur_data_pos += dim1_offset - current_blockcount_z;
5993	}
5994	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
5995	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5996	}
5997	/dealing with the last ii (boundary)/
5998	{
5999	// ii == current_blockcount_x - 1
6000	for(size_t jj=0; jj<current_blockcount_y; jj++){
6001	for(size_t kk=0; kk<current_blockcount_z; kk++){
6002
6003	curData = *cur_data_pos;
6004	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
6005	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6006	diff = curData - pred3D;
6007	itvNum = fabs(diff)/realPrecision + 1;
6008	if (itvNum < intvCapacity_sz){
6009	if (diff < 0) itvNum = -itvNum;
6010	type[index] = (int) (itvNum/2) + intvRadius;
6011	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
6012	//ganrantee comporession error against the case of machine-epsilon
6013	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
6014	type[index] = 0;
6015	*cur_pb_pos = curData;
6016	unpredictable_data[unpredictable_count ++] = curData;
6017	}
6018	}
6019	else{
6020	type[index] = 0;
6021	*cur_pb_pos = curData;
6022	unpredictable_data[unpredictable_count ++] = curData;
6023	}
6024
6025	#ifdef HAVE_TIMECMPR
6026	size_t ii = current_blockcount_x - 1;
6027	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
6028	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
6029	decData[block_offset + point_offset] = *cur_pb_pos;
6030	#endif
6031
6032	// assign value to next prediction buffer
6033	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
6034	index ++;
6035	cur_pb_pos ++;
6036	cur_data_pos ++;
6037	}
6038	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
6039	cur_data_pos += dim1_offset - current_blockcount_z;
6040	}
6041	}
6042	strip_unpredictable_count += unpredictable_count;
6043	unpredictable_data += unpredictable_count;
6044	// change indicator
6045	indicator_pos[k] = 1;
6046	}// end SZ
6047
6048	reg_params_pos ++;
6049	data_pos += current_blockcount_z;
6050	pb_pos += current_blockcount_z;
6051	next_pb_pos += current_blockcount_z;
6052	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
6053
6054	}
6055
6056	if(strip_unpredictable_count > max_unpred_count){
6057	max_unpred_count = strip_unpredictable_count;
6058	}
6059	total_unpred += strip_unpredictable_count;
6060	indicator_pos += num_z;
6061	}
6062	float * tmp;
6063	tmp = cur_pb_buf;
6064	cur_pb_buf = next_pb_buf;
6065	next_pb_buf = tmp;
6066	}
6067	}
6068
6069	free(prediction_buffer_1);
6070	free(prediction_buffer_2);
6071
6072	int stateNum = 2*quantization_intervals;
6073	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6074
6075	size_t nodeCount = 0;
6076	init(huffmanTree, result_type, num_elements);
6077	size_t i = 0;
6078	for (i = 0; i < huffmanTree->stateNum; i++)
6079	if (huffmanTree->code[i]) nodeCount++;
6080	nodeCount = nodeCount*2-1;
6081
6082	unsigned char *treeBytes;
6083	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6084
6085	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
6086	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
6087	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
6088	unsigned char * result_pos = result;
6089	initRandomAccessBytes(result_pos);
6090
6091	result_pos += meta_data_offset;
6092
6093	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
6094	result_pos += exe_params->SZ_SIZE_TYPE;
6095
6096	intToBytes_bigEndian(result_pos, block_size);
6097	result_pos += sizeof(int);
6098	doubleToBytes(result_pos, realPrecision);
6099	result_pos += sizeof(double);
6100	intToBytes_bigEndian(result_pos, quantization_intervals);
6101	result_pos += sizeof(int);
6102	intToBytes_bigEndian(result_pos, treeByteSize);
6103	result_pos += sizeof(int);
6104	intToBytes_bigEndian(result_pos, nodeCount);
6105	result_pos += sizeof(int);
6106	memcpy(result_pos, treeBytes, treeByteSize);
6107	result_pos += treeByteSize;
6108	free(treeBytes);
6109
6110	memcpy(result_pos, &use_mean, sizeof(unsigned char));
6111	result_pos += sizeof(unsigned char);
6112	memcpy(result_pos, &mean, sizeof(float));
6113	result_pos += sizeof(float);
6114	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
6115	result_pos += indicator_size;
6116
6117	//convert the lead/mid/resi to byte stream
6118	if(reg_count > 0){
6119	for(int e=0; e<4; e++){
6120	int stateNum = 2*coeff_intvCapacity_sz;
6121	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6122	size_t nodeCount = 0;
6123	init(huffmanTree, coeff_type[e], reg_count);
6124	size_t i = 0;
6125	for (i = 0; i < huffmanTree->stateNum; i++)
6126	if (huffmanTree->code[i]) nodeCount++;
6127	nodeCount = nodeCount*2-1;
6128	unsigned char *treeBytes;
6129	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6130	doubleToBytes(result_pos, precision[e]);
6131	result_pos += sizeof(double);
6132	intToBytes_bigEndian(result_pos, coeff_intvRadius);
6133	result_pos += sizeof(int);
6134	intToBytes_bigEndian(result_pos, treeByteSize);
6135	result_pos += sizeof(int);
6136	intToBytes_bigEndian(result_pos, nodeCount);
6137	result_pos += sizeof(int);
6138	memcpy(result_pos, treeBytes, treeByteSize);
6139	result_pos += treeByteSize;
6140	free(treeBytes);
6141	size_t typeArray_size = 0;
6142	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
6143	sizeToBytes(result_pos, typeArray_size);
6144	result_pos += sizeof(size_t) + typeArray_size;
6145	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
6146	result_pos += sizeof(int);
6147	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
6148	result_pos += coeff_unpredictable_count[e]*sizeof(float);
6149	SZ_ReleaseHuffman(huffmanTree);
6150	}
6151	}
6152	free(coeff_result_type);
6153	free(coeff_unpredictable_data);
6154
6155	//record the number of unpredictable data and also store them
6156	memcpy(result_pos, &total_unpred, sizeof(size_t));
6157	result_pos += sizeof(size_t);
6158	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
6159	result_pos += total_unpred * sizeof(float);
6160	size_t typeArray_size = 0;
6161	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
6162	result_pos += typeArray_size;
6163	size_t totalEncodeSize = result_pos - result;
6164	free(indicator);
6165	free(result_unpredictable_data);
6166	free(result_type);
6167	free(reg_params);
6168
6169
6170	SZ_ReleaseHuffman(huffmanTree);
6171	*comp_size = totalEncodeSize;
6172	return result;
6173	}
6174
6175	unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
6176
6177	unsigned int quantization_intervals;
6178	float sz_sample_correct_freq = -1;//0.5; //-1
6179	float dense_pos;
6180	float mean_flush_freq;
6181	unsigned char use_mean = 0;
6182
6183	// calculate block dims
6184	size_t num_x, num_y, num_z;
6185	size_t block_size = 6;
6186	num_x = (r1 - 1) / block_size + 1;
6187	num_y = (r2 - 1) / block_size + 1;
6188	num_z = (r3 - 1) / block_size + 1;
6189
6190	size_t max_num_block_elements = block_size * block_size * block_size;
6191	size_t num_blocks = num_x * num_y * num_z;
6192	size_t num_elements = r1 * r2 * r3;
6193
6194	size_t dim0_offset = r2 * r3;
6195	size_t dim1_offset = r3;
6196
6197	int * result_type = (int ) malloc(num_blocksmax_num_block_elements * sizeof(int));
6198	size_t unpred_data_max_size = max_num_block_elements;
6199	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
6200	size_t total_unpred = 0;
6201	size_t unpredictable_count;
6202	float * data_pos = oriData;
6203	int * type = result_type;
6204	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
6205	float * reg_params_pos = reg_params;
6206	// move regression part out
6207	size_t params_offset_b = num_blocks;
6208	size_t params_offset_c = 2*num_blocks;
6209	size_t params_offset_d = 3*num_blocks;
6210	float * pred_buffer = (float ) malloc((block_size+1)(block_size+1)(block_size+1)sizeof(float));
6211	float * pred_buffer_pos = NULL;
6212	float * block_data_pos_x = NULL;
6213	float * block_data_pos_y = NULL;
6214	float * block_data_pos_z = NULL;
6215	for(size_t i=0; i<num_x; i++){
6216	for(size_t j=0; j<num_y; j++){
6217	for(size_t k=0; k<num_z; k++){
6218	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
6219	pred_buffer_pos = pred_buffer;
6220	block_data_pos_x = data_pos;
6221	// use the buffer as block_sizeblock_sizeblock_size
6222	for(int ii=0; ii<block_size; ii++){
6223	block_data_pos_y = block_data_pos_x;
6224	for(int jj=0; jj<block_size; jj++){
6225	block_data_pos_z = block_data_pos_y;
6226	for(int kk=0; kk<block_size; kk++){
6227	pred_buffer_pos = block_data_pos_z;
6228	if(k*block_size + kk + 1 < r3) block_data_pos_z ++;
6229	pred_buffer_pos ++;
6230	}
6231	if(j*block_size + jj + 1 < r2) block_data_pos_y += dim1_offset;
6232	}
6233	if(i*block_size + ii + 1 < r1) block_data_pos_x += dim0_offset;
6234	}
6235	/Calculate regression coefficients/
6236	{
6237	float * cur_data_pos = pred_buffer;
6238	float fx = 0.0;
6239	float fy = 0.0;
6240	float fz = 0.0;
6241	float f = 0;
6242	float sum_x, sum_y;
6243	float curData;
6244	for(size_t i=0; i<block_size; i++){
6245	sum_x = 0;
6246	for(size_t j=0; j<block_size; j++){
6247	sum_y = 0;
6248	for(size_t k=0; k<block_size; k++){
6249	curData = *cur_data_pos;
6250	sum_y += curData;
6251	fz += curData * k;
6252	cur_data_pos ++;
6253	}
6254	fy += sum_y * j;
6255	sum_x += sum_y;
6256	}
6257	fx += sum_x * i;
6258	f += sum_x;
6259	}
6260	float coeff = 1.0 / (block_size * block_size * block_size);
6261	reg_params_pos[0] = (2 * fx / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
6262	reg_params_pos[params_offset_b] = (2 * fy / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
6263	reg_params_pos[params_offset_c] = (2 * fz / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
6264	reg_params_pos[params_offset_d] = f * coeff - ((block_size - 1) * reg_params_pos[0] / 2 + (block_size - 1) * reg_params_pos[params_offset_b] / 2 + (block_size - 1) * reg_params_pos[params_offset_c] / 2);
6265	}
6266	reg_params_pos ++;
6267	}
6268	}
6269	}
6270
6271	//Compress coefficient arrays
6272	double precision_a, precision_b, precision_c, precision_d;
6273	float rel_param_err = 0.025;
6274	precision_a = rel_param_err * realPrecision / block_size;
6275	precision_b = rel_param_err * realPrecision / block_size;
6276	precision_c = rel_param_err * realPrecision / block_size;
6277	precision_d = rel_param_err * realPrecision;
6278
6279	if(exe_params->optQuantMode==1)
6280	{
6281	quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
6282	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
6283	updateQuantizationInfo(quantization_intervals);
6284	}
6285	else{
6286	quantization_intervals = exe_params->intvCapacity;
6287	}
6288
6289	float mean = 0;
6290	if(use_mean){
6291	// compute mean
6292	double sum = 0.0;
6293	size_t mean_count = 0;
6294	for(size_t i=0; i<num_elements; i++){
6295	if(fabs(oriData[i] - dense_pos) < realPrecision){
6296	sum += oriData[i];
6297	mean_count ++;
6298	}
6299	}
6300	if(mean_count > 0) mean = sum / mean_count;
6301	}
6302
6303	double tmp_realPrecision = realPrecision;
6304
6305	// use two prediction buffers for higher performance
6306	float * unpredictable_data = result_unpredictable_data;
6307	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
6308	memset(indicator, 0, num_blocks * sizeof(unsigned char));
6309	size_t reg_count = 0;
6310	unsigned char * indicator_pos = indicator;
6311
6312	int intvCapacity = exe_params->intvCapacity;
6313	int intvRadius = exe_params->intvRadius;
6314	int use_reg = 0;
6315	float noise = realPrecision * 1.22;
6316
6317	reg_params_pos = reg_params;
6318	// compress the regression coefficients on the fly
6319	float last_coeffcients[4] = {0.0};
6320	int coeff_intvCapacity_sz = 65536;
6321	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
6322	int * coeff_type[4];
6323	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
6324	float * coeff_unpred_data[4];
6325	float * coeff_unpredictable_data = (float ) malloc(num_blocks4*sizeof(float));
6326	double precision[4];
6327	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
6328	for(int i=0; i<4; i++){
6329	coeff_type[i] = coeff_result_type + i * num_blocks;
6330	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
6331	}
6332	int coeff_index = 0;
6333	unsigned int coeff_unpredictable_count[4] = {0};
6334
6335	memset(pred_buffer, 0, (block_size+1)(block_size+1)(block_size+1)*sizeof(float));
6336	int pred_buffer_block_size = block_size + 1;
6337	int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size;
6338	int strip_dim1_offset = pred_buffer_block_size;
6339
6340	if(use_mean){
6341	int intvCapacity_sz = intvCapacity - 2;
6342	type = result_type;
6343	for(size_t i=0; i<num_x; i++){
6344	for(size_t j=0; j<num_y; j++){
6345	for(size_t k=0; k<num_z; k++){
6346	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
6347	// add 1 in x, y, z offset
6348	pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6349	block_data_pos_x = data_pos;
6350	for(int ii=0; ii<block_size; ii++){
6351	block_data_pos_y = block_data_pos_x;
6352	for(int jj=0; jj<block_size; jj++){
6353	block_data_pos_z = block_data_pos_y;
6354	for(int kk=0; kk<block_size; kk++){
6355	pred_buffer_pos = block_data_pos_z;
6356	if(k*block_size + kk + 1< r3) block_data_pos_z ++;
6357	pred_buffer_pos ++;
6358	}
6359	// add 1 in z offset
6360	pred_buffer_pos ++;
6361	if(j*block_size + jj + 1< r2) block_data_pos_y += dim1_offset;
6362	}
6363	// add 1 in y offset
6364	pred_buffer_pos += pred_buffer_block_size;
6365	if(i*block_size + ii + 1< r1) block_data_pos_x += dim0_offset;
6366	}
6367	/sampling and decide which predictor/
6368	{
6369	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
6370	float * cur_data_pos;
6371	float curData;
6372	float pred_reg, pred_sz;
6373	float err_sz = 0.0, err_reg = 0.0;
6374	int bmi = 0;
6375	for(int i=2; i<=block_size; i++){
6376	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + i;
6377	curData = *cur_data_pos;
6378	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6379	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6380	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6381	err_reg += fabs(pred_reg - curData);
6382
6383	bmi = block_size - i;
6384	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + bmi;
6385	curData = *cur_data_pos;
6386	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6387	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6388	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6389	err_reg += fabs(pred_reg - curData);
6390
6391	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + i;
6392	curData = *cur_data_pos;
6393	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6394	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6395	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6396	err_reg += fabs(pred_reg - curData);
6397
6398	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
6399	curData = *cur_data_pos;
6400	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6401	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6402	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6403	err_reg += fabs(pred_reg - curData);
6404	}
6405
6406	use_reg = (err_reg < err_sz);
6407	}
6408	if(use_reg){
6409	{
6410	/predict coefficients in current block via previous reg_block/
6411	float cur_coeff;
6412	double diff, itvNum;
6413	for(int e=0; e<4; e++){
6414	cur_coeff = reg_params_pos[e*num_blocks];
6415	diff = cur_coeff - last_coeffcients[e];
6416	itvNum = fabs(diff)/precision[e] + 1;
6417	if (itvNum < coeff_intvCapacity_sz){
6418	if (diff < 0) itvNum = -itvNum;
6419	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
6420	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
6421	//ganrantee comporession error against the case of machine-epsilon
6422	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
6423	coeff_type[e][coeff_index] = 0;
6424	last_coeffcients[e] = cur_coeff;
6425	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6426	}
6427	}
6428	else{
6429	coeff_type[e][coeff_index] = 0;
6430	last_coeffcients[e] = cur_coeff;
6431	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6432	}
6433	}
6434	coeff_index ++;
6435	}
6436	float curData;
6437	float pred;
6438	double itvNum;
6439	double diff;
6440	size_t index = 0;
6441	size_t block_unpredictable_count = 0;
6442	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6443	for(size_t ii=0; ii<block_size; ii++){
6444	for(size_t jj=0; jj<block_size; jj++){
6445	for(size_t kk=0; kk<block_size; kk++){
6446	curData = *cur_data_pos;
6447	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
6448	diff = curData - pred;
6449	itvNum = fabs(diff)/tmp_realPrecision + 1;
6450	if (itvNum < intvCapacity){
6451	if (diff < 0) itvNum = -itvNum;
6452	type[index] = (int) (itvNum/2) + intvRadius;
6453	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
6454	//ganrantee comporession error against the case of machine-epsilon
6455	if(fabs(curData - pred)>tmp_realPrecision){
6456	type[index] = 0;
6457	pred = curData;
6458	unpredictable_data[block_unpredictable_count ++] = curData;
6459	}
6460	}
6461	else{
6462	type[index] = 0;
6463	pred = curData;
6464	unpredictable_data[block_unpredictable_count ++] = curData;
6465	}
6466	index ++;
6467	cur_data_pos ++;
6468	}
6469	cur_data_pos ++;
6470	}
6471	cur_data_pos += pred_buffer_block_size;
6472	}
6473
6474	total_unpred += block_unpredictable_count;
6475	unpredictable_data += block_unpredictable_count;
6476	reg_count ++;
6477	}
6478	else{
6479	// use SZ
6480	// SZ predication
6481	unpredictable_count = 0;
6482	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6483	float curData;
6484	float pred3D;
6485	double itvNum, diff;
6486	size_t index = 0;
6487	for(size_t ii=0; ii<block_size; ii++){
6488	for(size_t jj=0; jj<block_size; jj++){
6489	for(size_t kk=0; kk<block_size; kk++){
6490
6491	curData = *cur_data_pos;
6492	if(fabs(curData - mean) <= realPrecision){
6493	type[index] = 1;
6494	*cur_data_pos = mean;
6495	}
6496	else
6497	{
6498	pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
6499	- cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6500	diff = curData - pred3D;
6501	itvNum = fabs(diff)/realPrecision + 1;
6502	if (itvNum < intvCapacity_sz){
6503	if (diff < 0) itvNum = -itvNum;
6504	type[index] = (int) (itvNum/2) + intvRadius;
6505	cur_data_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
6506	//ganrantee comporession error against the case of machine-epsilon
6507	if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
6508	type[index] = 0;
6509	*cur_data_pos = curData;
6510	unpredictable_data[unpredictable_count ++] = curData;
6511	}
6512	}
6513	else{
6514	type[index] = 0;
6515	*cur_data_pos = curData;
6516	unpredictable_data[unpredictable_count ++] = curData;
6517	}
6518	}
6519	index ++;
6520	cur_data_pos ++;
6521	}
6522	cur_data_pos ++;
6523	}
6524	cur_data_pos += pred_buffer_block_size;
6525	}
6526	total_unpred += unpredictable_count;
6527	unpredictable_data += unpredictable_count;
6528	// change indicator
6529	indicator_pos[k] = 1;
6530	}// end SZ
6531	reg_params_pos ++;
6532	type += block_size * block_size * block_size;
6533	} // end k
6534	indicator_pos += num_z;
6535	}// end j
6536	}// end i
6537	}
6538	else{
6539	int intvCapacity_sz = intvCapacity - 2;
6540	type = result_type;
6541	for(size_t i=0; i<num_x; i++){
6542	for(size_t j=0; j<num_y; j++){
6543	for(size_t k=0; k<num_z; k++){
6544	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
6545	// add 1 in x, y, z offset
6546	pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6547	block_data_pos_x = data_pos;
6548	for(int ii=0; ii<block_size; ii++){
6549	block_data_pos_y = block_data_pos_x;
6550	for(int jj=0; jj<block_size; jj++){
6551	block_data_pos_z = block_data_pos_y;
6552	for(int kk=0; kk<block_size; kk++){
6553	pred_buffer_pos = block_data_pos_z;
6554	if(k*block_size + kk < r3) block_data_pos_z ++;
6555	pred_buffer_pos ++;
6556	}
6557	// add 1 in z offset
6558	pred_buffer_pos ++;
6559	if(j*block_size + jj < r2) block_data_pos_y += dim1_offset;
6560	}
6561	// add 1 in y offset
6562	pred_buffer_pos += pred_buffer_block_size;
6563	if(i*block_size + ii < r1) block_data_pos_x += dim0_offset;
6564	}
6565	/sampling/
6566	{
6567	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
6568	float * cur_data_pos;
6569	float curData;
6570	float pred_reg, pred_sz;
6571	float err_sz = 0.0, err_reg = 0.0;
6572	int bmi;
6573	for(int i=2; i<=block_size; i++){
6574	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + i;
6575	curData = *cur_data_pos;
6576	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6577	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6578	err_sz += fabs(pred_sz - curData) + noise;
6579	err_reg += fabs(pred_reg - curData);
6580
6581	bmi = block_size - i;
6582	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + bmi;
6583	curData = *cur_data_pos;
6584	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6585	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6586	err_sz += fabs(pred_sz - curData) + noise;
6587	err_reg += fabs(pred_reg - curData);
6588
6589	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + i;
6590	curData = *cur_data_pos;
6591	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6592	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6593	err_sz += fabs(pred_sz - curData) + noise;
6594	err_reg += fabs(pred_reg - curData);
6595
6596	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
6597	curData = *cur_data_pos;
6598	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6599	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6600	err_sz += fabs(pred_sz - curData) + noise;
6601	err_reg += fabs(pred_reg - curData);
6602	}
6603
6604	use_reg = (err_reg < err_sz);
6605
6606	}
6607	if(use_reg)
6608	{
6609	{
6610	/predict coefficients in current block via previous reg_block/
6611	float cur_coeff;
6612	double diff, itvNum;
6613	for(int e=0; e<4; e++){
6614	cur_coeff = reg_params_pos[e*num_blocks];
6615	diff = cur_coeff - last_coeffcients[e];
6616	itvNum = fabs(diff)/precision[e] + 1;
6617	if (itvNum < coeff_intvCapacity_sz){
6618	if (diff < 0) itvNum = -itvNum;
6619	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
6620	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
6621	//ganrantee comporession error against the case of machine-epsilon
6622	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
6623	coeff_type[e][coeff_index] = 0;
6624	last_coeffcients[e] = cur_coeff;
6625	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6626	}
6627	}
6628	else{
6629	coeff_type[e][coeff_index] = 0;
6630	last_coeffcients[e] = cur_coeff;
6631	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6632	}
6633	}
6634	coeff_index ++;
6635	}
6636	float curData;
6637	float pred;
6638	double itvNum;
6639	double diff;
6640	size_t index = 0;
6641	size_t block_unpredictable_count = 0;
6642	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6643	for(size_t ii=0; ii<block_size; ii++){
6644	for(size_t jj=0; jj<block_size; jj++){
6645	for(size_t kk=0; kk<block_size; kk++){
6646	curData = *cur_data_pos;
6647	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
6648	diff = curData - pred;
6649	itvNum = fabs(diff)/tmp_realPrecision + 1;
6650	if (itvNum < intvCapacity){
6651	if (diff < 0) itvNum = -itvNum;
6652	type[index] = (int) (itvNum/2) + intvRadius;
6653	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
6654	//ganrantee comporession error against the case of machine-epsilon
6655	if(fabs(curData - pred)>tmp_realPrecision){
6656	type[index] = 0;
6657	pred = curData;
6658	unpredictable_data[block_unpredictable_count ++] = curData;
6659	}
6660	}
6661	else{
6662	type[index] = 0;
6663	pred = curData;
6664	unpredictable_data[block_unpredictable_count ++] = curData;
6665	}
6666	index ++;
6667	cur_data_pos ++;
6668	}
6669	cur_data_pos ++;
6670	}
6671	cur_data_pos += pred_buffer_block_size;
6672	}
6673	total_unpred += block_unpredictable_count;
6674	unpredictable_data += block_unpredictable_count;
6675	reg_count ++;
6676	}
6677	else{
6678	// use SZ
6679	// SZ predication
6680	unpredictable_count = 0;
6681	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6682	float curData;
6683	float pred3D;
6684	double itvNum, diff;
6685	size_t index = 0;
6686	for(size_t ii=0; ii<block_size; ii++){
6687	for(size_t jj=0; jj<block_size; jj++){
6688	for(size_t kk=0; kk<block_size; kk++){
6689	curData = *cur_data_pos;
6690	pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
6691	- cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6692	diff = curData - pred3D;
6693	itvNum = fabs(diff)/realPrecision + 1;
6694	if (itvNum < intvCapacity_sz){
6695	if (diff < 0) itvNum = -itvNum;
6696	type[index] = (int) (itvNum/2) + intvRadius;
6697	cur_data_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
6698	//ganrantee comporession error against the case of machine-epsilon
6699	if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
6700	type[index] = 0;
6701	*cur_data_pos = curData;
6702	unpredictable_data[unpredictable_count ++] = curData;
6703	}
6704	}
6705	else{
6706	type[index] = 0;
6707	*cur_data_pos = curData;
6708	unpredictable_data[unpredictable_count ++] = curData;
6709	}
6710	index ++;
6711	cur_data_pos ++;
6712	}
6713	cur_data_pos ++;
6714	}
6715	cur_data_pos += pred_buffer_block_size;
6716	}
6717	total_unpred += unpredictable_count;
6718	unpredictable_data += unpredictable_count;
6719	// change indicator
6720	indicator_pos[k] = 1;
6721	}// end SZ
6722	reg_params_pos ++;
6723	type += block_size * block_size * block_size;
6724	}
6725	indicator_pos += num_z;
6726	}
6727	}
6728	}
6729	free(pred_buffer);
6730	int stateNum = 2*quantization_intervals;
6731	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6732
6733	size_t nodeCount = 0;
6734	init(huffmanTree, result_type, num_blocks*max_num_block_elements);
6735	size_t i = 0;
6736	for (i = 0; i < huffmanTree->stateNum; i++)
6737	if (huffmanTree->code[i]) nodeCount++;
6738	nodeCount = nodeCount*2-1;
6739
6740	unsigned char *treeBytes;
6741	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6742
6743	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
6744	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
6745	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
6746	unsigned char * result_pos = result;
6747	initRandomAccessBytes(result_pos);
6748
6749	result_pos += meta_data_offset;
6750
6751	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
6752	result_pos += exe_params->SZ_SIZE_TYPE;
6753
6754	intToBytes_bigEndian(result_pos, block_size);
6755	result_pos += sizeof(int);
6756	doubleToBytes(result_pos, realPrecision);
6757	result_pos += sizeof(double);
6758	intToBytes_bigEndian(result_pos, quantization_intervals);
6759	result_pos += sizeof(int);
6760	intToBytes_bigEndian(result_pos, treeByteSize);
6761	result_pos += sizeof(int);
6762	intToBytes_bigEndian(result_pos, nodeCount);
6763	result_pos += sizeof(int);
6764	memcpy(result_pos, treeBytes, treeByteSize);
6765	result_pos += treeByteSize;
6766	free(treeBytes);
6767
6768	memcpy(result_pos, &use_mean, sizeof(unsigned char));
6769	result_pos += sizeof(unsigned char);
6770	memcpy(result_pos, &mean, sizeof(float));
6771	result_pos += sizeof(float);
6772	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
6773	result_pos += indicator_size;
6774
6775	//convert the lead/mid/resi to byte stream
6776	if(reg_count > 0){
6777	for(int e=0; e<4; e++){
6778	int stateNum = 2*coeff_intvCapacity_sz;
6779	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6780	size_t nodeCount = 0;
6781	init(huffmanTree, coeff_type[e], reg_count);
6782	size_t i = 0;
6783	for (i = 0; i < huffmanTree->stateNum; i++)
6784	if (huffmanTree->code[i]) nodeCount++;
6785	nodeCount = nodeCount*2-1;
6786	unsigned char *treeBytes;
6787	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6788	doubleToBytes(result_pos, precision[e]);
6789	result_pos += sizeof(double);
6790	intToBytes_bigEndian(result_pos, coeff_intvRadius);
6791	result_pos += sizeof(int);
6792	intToBytes_bigEndian(result_pos, treeByteSize);
6793	result_pos += sizeof(int);
6794	intToBytes_bigEndian(result_pos, nodeCount);
6795	result_pos += sizeof(int);
6796	memcpy(result_pos, treeBytes, treeByteSize);
6797	result_pos += treeByteSize;
6798	free(treeBytes);
6799	size_t typeArray_size = 0;
6800	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
6801	sizeToBytes(result_pos, typeArray_size);
6802	result_pos += sizeof(size_t) + typeArray_size;
6803	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
6804	result_pos += sizeof(int);
6805	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
6806	result_pos += coeff_unpredictable_count[e]*sizeof(float);
6807	SZ_ReleaseHuffman(huffmanTree);
6808	}
6809	}
6810	free(coeff_result_type);
6811	free(coeff_unpredictable_data);
6812
6813	//record the number of unpredictable data and also store them
6814	memcpy(result_pos, &total_unpred, sizeof(size_t));
6815	result_pos += sizeof(size_t);
6816	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
6817	result_pos += total_unpred * sizeof(float);
6818	size_t typeArray_size = 0;
6819	encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size);
6820	result_pos += typeArray_size;
6821	size_t totalEncodeSize = result_pos - result;
6822	free(indicator);
6823	free(result_unpredictable_data);
6824	free(result_type);
6825	free(reg_params);
6826
6827
6828	SZ_ReleaseHuffman(huffmanTree);
6829	*comp_size = totalEncodeSize;
6830	return result;
6831	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: