Context Navigation

source: thirdparty/SZ/sz/src/sz_double.c @ e6aa0eb

Revision e6aa0eb, 186.1 KB checked in by Hal Finkel <hfinkel@…>, 6 years ago (diff)
add stddef.h for ptrdiff_t
Property mode set to `100644`

Line
1	/**
2	* @file sz_double.c
3	* @author Sheng Di and Dingwen Tao
4	* @date Aug, 2016
5	* @brief SZ_Init, Compression and Decompression functions
6	* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
7	* See COPYRIGHT in top-level directory.
8	*/
9
10
11	#include <stdio.h>
12	#include <stdlib.h>
13	#include <stddef.h>
14	#include <string.h>
15	#include <unistd.h>
16	#include <math.h>
17	#include "sz.h"
18	#include "CompressElement.h"
19	#include "DynamicByteArray.h"
20	#include "DynamicIntArray.h"
21	#include "TightDataPointStorageD.h"
22	#include "sz_double.h"
23	#include "sz_double_pwr.h"
24	#include "szd_double.h"
25	#include "szd_double_pwr.h"
26	#include "zlib.h"
27	#include "rw.h"
28	#include "sz_double_ts.h"
29	#include "utility.h"
30
31	unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize)
32	{
33	outSize = dataLengthsizeof(double);
34	unsigned char* out = (unsigned char)malloc(dataLengthsizeof(double));
35	memcpy(out, data, dataLength*sizeof(double));
36	return out;
37	}
38
39	void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue)
40	{
41	short reqExpo = getPrecisionReqLength_double(realPrecision);
42	*reqLength = 12+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength
43	if(*reqLength<12)
44	*reqLength = 12;
45	if(*reqLength>64)
46	{
47	*reqLength = 64;
48	*medianValue = 0;
49	}
50	}
51
52	unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision)
53	{
54	size_t i = 0, radiusIndex;
55	double pred_value = 0, pred_err;
56	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
57	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
58	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
59	for(i=2;i<dataLength;i++)
60	{
61	if(i%confparams_cpr->sampleDistance==0)
62	{
63	//pred_value = 2*oriData[i-1] - oriData[i-2];
64	pred_value = oriData[i-1];
65	pred_err = fabs(pred_value - oriData[i]);
66	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
67	if(radiusIndex>=confparams_cpr->maxRangeRadius)
68	radiusIndex = confparams_cpr->maxRangeRadius - 1;
69	intervals[radiusIndex]++;
70	}
71	}
72	//compute the appropriate number
73	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
74	size_t sum = 0;
75	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
76	{
77	sum += intervals[i];
78	if(sum>targetCount)
79	break;
80	}
81
82	if(i>=confparams_cpr->maxRangeRadius)
83	i = confparams_cpr->maxRangeRadius-1;
84	unsigned int accIntervals = 2*(i+1);
85	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
86
87	if(powerOf2<32)
88	powerOf2 = 32;
89
90	free(intervals);
91	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
92	return powerOf2;
93	}
94
95	unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision)
96	{
97	size_t i,j, index;
98	size_t radiusIndex;
99	double pred_value = 0, pred_err;
100	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
101	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
102	size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance;
103	for(i=1;i<r1;i++)
104	{
105	for(j=1;j<r2;j++)
106	{
107	if((i+j)%confparams_cpr->sampleDistance==0)
108	{
109	index = i*r2+j;
110	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
111	pred_err = fabs(pred_value - oriData[index]);
112	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
113	if(radiusIndex>=confparams_cpr->maxRangeRadius)
114	radiusIndex = confparams_cpr->maxRangeRadius - 1;
115	intervals[radiusIndex]++;
116	}
117	}
118	}
119	//compute the appropriate number
120	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
121	size_t sum = 0;
122	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
123	{
124	sum += intervals[i];
125	if(sum>targetCount)
126	break;
127	}
128	if(i>=confparams_cpr->maxRangeRadius)
129	i = confparams_cpr->maxRangeRadius-1;
130	unsigned int accIntervals = 2*(i+1);
131	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
132	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
133
134	if(powerOf2<32)
135	powerOf2 = 32;
136
137	free(intervals);
138	return powerOf2;
139	}
140
141	unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
142	{
143	size_t i,j,k, index;
144	size_t radiusIndex;
145	size_t r23=r2*r3;
146	double pred_value = 0, pred_err;
147	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
148	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
149	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)/confparams_cpr->sampleDistance;
150	for(i=1;i<r1;i++)
151	{
152	for(j=1;j<r2;j++)
153	{
154	for(k=1;k<r3;k++)
155	{
156	if((i+j+k)%confparams_cpr->sampleDistance==0)
157	{
158	index = ir23+jr3+k;
159	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
160	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
161	pred_err = fabs(pred_value - oriData[index]);
162	radiusIndex = (pred_err/realPrecision+1)/2;
163	if(radiusIndex>=confparams_cpr->maxRangeRadius)
164	radiusIndex = confparams_cpr->maxRangeRadius - 1;
165	intervals[radiusIndex]++;
166	}
167	}
168
169	}
170	}
171	//compute the appropriate number
172	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
173	size_t sum = 0;
174	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
175	{
176	sum += intervals[i];
177	if(sum>targetCount)
178	break;
179	}
180	if(i>=confparams_cpr->maxRangeRadius)
181	i = confparams_cpr->maxRangeRadius-1;
182
183	unsigned int accIntervals = 2*(i+1);
184	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
185
186	if(powerOf2<32)
187	powerOf2 = 32;
188
189	free(intervals);
190	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
191	return powerOf2;
192	}
193
194	unsigned int optimize_intervals_double_4D(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision)
195	{
196	size_t i,j,k,l, index;
197	size_t radiusIndex;
198	size_t r234=r2r3r4;
199	size_t r34=r3*r4;
200	double pred_value = 0, pred_err;
201	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
202	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
203	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)*(r4-1)/confparams_cpr->sampleDistance;
204	for(i=1;i<r1;i++)
205	{
206	for(j=1;j<r2;j++)
207	{
208	for(k=1;k<r3;k++)
209	{
210	for (l=1;l<r4;l++)
211	{
212	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
213	{
214	index = ir234+jr34+k*r4+l;
215	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34]
216	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
217	pred_err = fabs(pred_value - oriData[index]);
218	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
219	if(radiusIndex>=confparams_cpr->maxRangeRadius)
220	radiusIndex = confparams_cpr->maxRangeRadius - 1;
221	intervals[radiusIndex]++;
222	}
223	}
224	}
225	}
226	}
227	//compute the appropriate number
228	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
229	size_t sum = 0;
230	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
231	{
232	sum += intervals[i];
233	if(sum>targetCount)
234	break;
235	}
236	if(i>=confparams_cpr->maxRangeRadius)
237	i = confparams_cpr->maxRangeRadius-1;
238
239	unsigned int accIntervals = 2*(i+1);
240	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
241
242	if(powerOf2<32)
243	powerOf2 = 32;
244
245	free(intervals);
246	return powerOf2;
247	}
248
249	TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData,
250	size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d)
251	{
252	#ifdef HAVE_TIMECMPR
253	double* decData = NULL;
254	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
255	decData = (double*)(multisteps->hist_data);
256	#endif
257
258	unsigned int quantization_intervals;
259	if(exe_params->optQuantMode==1)
260	quantization_intervals = optimize_intervals_double_1D_opt(oriData, dataLength, realPrecision);
261	else
262	quantization_intervals = exe_params->intvCapacity;
263	updateQuantizationInfo(quantization_intervals);
264
265	size_t i;
266	int reqLength;
267	double medianValue = medianValue_d;
268	short radExpo = getExponent_double(valueRangeSize/2);
269
270	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
271
272	int* type = (int) malloc(dataLengthsizeof(int));
273
274	double* spaceFillingValue = oriData; //
275
276	DynamicIntArray *exactLeadNumArray;
277	new_DIA(&exactLeadNumArray, DynArrayInitLen);
278
279	DynamicByteArray *exactMidByteArray;
280	new_DBA(&exactMidByteArray, DynArrayInitLen);
281
282	DynamicIntArray *resiBitArray;
283	new_DIA(&resiBitArray, DynArrayInitLen);
284
285	unsigned char preDataBytes[8];
286	longToBytes_bigEndian(preDataBytes, 0);
287
288	int reqBytesLength = reqLength/8;
289	int resiBitsLength = reqLength%8;
290	double last3CmprsData[3] = {0};
291
292	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
293	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
294
295	//add the first data
296	type[0] = 0;
297	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
298	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
299	memcpy(preDataBytes,vce->curBytes,8);
300	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
301	listAdd_double(last3CmprsData, vce->data);
302	#ifdef HAVE_TIMECMPR
303	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
304	decData[0] = vce->data;
305	#endif
306
307	//add the second data
308	type[1] = 0;
309	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
310	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
311	memcpy(preDataBytes,vce->curBytes,8);
312	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
313	listAdd_double(last3CmprsData, vce->data);
314	#ifdef HAVE_TIMECMPR
315	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
316	decData[1] = vce->data;
317	#endif
318	int state;
319	double checkRadius;
320	double curData;
321	double pred;
322	double predAbsErr;
323	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
324	double interval = 2*realPrecision;
325
326	for(i=2;i<dataLength;i++)
327	{
328	//printf("%.30G\n",last3CmprsData[0]);
329	curData = spaceFillingValue[i];
330	//pred = 2*last3CmprsData[0] - last3CmprsData[1];
331	pred = last3CmprsData[0];
332	predAbsErr = fabs(curData - pred);
333	if(predAbsErr<checkRadius)
334	{
335	state = (predAbsErr/realPrecision+1)/2;
336	if(curData>=pred)
337	{
338	type[i] = exe_params->intvRadius+state;
339	pred = pred + state*interval;
340	}
341	else //curData<pred
342	{
343	type[i] = exe_params->intvRadius-state;
344	pred = pred - state*interval;
345	}
346	listAdd_double(last3CmprsData, pred);
347	#ifdef HAVE_TIMECMPR
348	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
349	decData[i] = pred;
350	#endif
351	continue;
352	}
353
354	//unpredictable data processing
355	type[i] = 0;
356	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
357	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
358	memcpy(preDataBytes,vce->curBytes,8);
359	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
360
361	listAdd_double(last3CmprsData, vce->data);
362	#ifdef HAVE_TIMECMPR
363	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
364	decData[i] = vce->data;
365	#endif
366
367	}//end of for
368
369	int exactDataNum = exactLeadNumArray->size;
370
371	TightDataPointStorageD* tdps;
372
373	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
374	type, exactMidByteArray->array, exactMidByteArray->size,
375	exactLeadNumArray->array,
376	resiBitArray->array, resiBitArray->size,
377	resiBitsLength,
378	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
379
380	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
381	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
382
383	//free memory
384	free_DIA(exactLeadNumArray);
385	free_DIA(resiBitArray);
386	free(type);
387	free(vce);
388	free(lce);
389	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
390
391	return tdps;
392	}
393
394	void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, TightDataPointStorageD* tdps,
395	unsigned char** newByteData, size_t *outSize)
396	{
397	int doubleSize = sizeof(double);
398	size_t k = 0, i;
399	tdps->isLossless = 1;
400	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength;
401	newByteData = (unsigned char)malloc(totalByteLength);
402
403	unsigned char dsLengthBytes[8];
404	for (i = 0; i < 3; i++)//3
405	(*newByteData)[k++] = versionNumber[i];
406
407	if(exe_params->SZ_SIZE_TYPE==4)//1
408	(*newByteData)[k++] = 16; //00010000
409	else
410	(*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8
411
412	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
413	k = k + MetaDataByteLength;
414
415	sizeToBytes(dsLengthBytes,dataLength);
416	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8
417	(*newByteData)[k++] = dsLengthBytes[i];
418
419	if(sysEndianType==BIG_ENDIAN_SYSTEM)
420	memcpy((newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLengthdoubleSize);
421	else
422	{
423	unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
424	for(i=0;i<dataLength;i++,p+=doubleSize)
425	doubleToBytes(p, oriData[i]);
426	}
427	*outSize = totalByteLength;
428	}
429
430
431	char SZ_compress_args_double_NoCkRngeNoGzip_1D(unsigned char** newByteData, double *oriData,
432	size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d)
433	{
434	char compressionType = 0;
435	TightDataPointStorageD* tdps = NULL;
436	#ifdef HAVE_TIMECMPR
437	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
438	{
439	int timestep = sz_tsc->currentStep;
440	if(timestep % confparams_cpr->snapshotCmprStep != 0)
441	{
442	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
443	compressionType = 1; //time-series based compression
444	}
445	else
446	{
447	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
448	compressionType = 0; //snapshot-based compression
449	multisteps->lastSnapshotStep = timestep;
450	}
451	}
452	else
453	#endif
454	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
455
456	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
457
458	if(outSize>dataLengthsizeof(double))
459	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
460
461	free_TightDataPointStorageD(tdps);
462	return compressionType;
463	}
464
465	TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_d)
466	{
467	#ifdef HAVE_TIMECMPR
468	double* decData = NULL;
469	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
470	decData = (double*)(multisteps->hist_data);
471	#endif
472
473	unsigned int quantization_intervals;
474	if(exe_params->optQuantMode==1)
475	{
476	quantization_intervals = optimize_intervals_double_2D_opt(oriData, r1, r2, realPrecision);
477	updateQuantizationInfo(quantization_intervals);
478	}
479	else
480	quantization_intervals = exe_params->intvCapacity;
481	size_t i,j;
482	int reqLength;
483	double pred1D, pred2D;
484	double diff = 0.0;
485	double itvNum = 0;
486	double P0, P1;
487
488	size_t dataLength = r1*r2;
489
490	P0 = (double)malloc(r2sizeof(double));
491	memset(P0, 0, r2*sizeof(double));
492	P1 = (double)malloc(r2sizeof(double));
493	memset(P1, 0, r2*sizeof(double));
494
495	double medianValue = medianValue_d;
496	short radExpo = getExponent_double(valueRangeSize/2);
497	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
498
499	int* type = (int) malloc(dataLengthsizeof(int));
500	//type[dataLength]=0;
501
502	double* spaceFillingValue = oriData; //
503
504	DynamicIntArray *exactLeadNumArray;
505	new_DIA(&exactLeadNumArray, DynArrayInitLen);
506
507	DynamicByteArray *exactMidByteArray;
508	new_DBA(&exactMidByteArray, DynArrayInitLen);
509
510	DynamicIntArray *resiBitArray;
511	new_DIA(&resiBitArray, DynArrayInitLen);
512
513	type[0] = 0;
514
515	unsigned char preDataBytes[8];
516	longToBytes_bigEndian(preDataBytes, 0);
517
518	int reqBytesLength = reqLength/8;
519	int resiBitsLength = reqLength%8;
520
521	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
522	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
523
524	/* Process Row-0 data 0*/
525	type[0] = 0;
526	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
527	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
528	memcpy(preDataBytes,vce->curBytes,8);
529	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
530	P1[0] = vce->data;
531	#ifdef HAVE_TIMECMPR
532	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
533	decData[0] = vce->data;
534	#endif
535
536	/* Process Row-0 data 1*/
537	pred1D = P1[0];
538	diff = spaceFillingValue[1] - pred1D;
539
540	itvNum = fabs(diff)/realPrecision + 1;
541
542	if (itvNum < exe_params->intvCapacity)
543	{
544	if (diff < 0) itvNum = -itvNum;
545	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
546	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
547	}
548	else
549	{
550	type[1] = 0;
551	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
552	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
553	memcpy(preDataBytes,vce->curBytes,8);
554	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
555	P1[1] = vce->data;
556	}
557	#ifdef HAVE_TIMECMPR
558	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
559	decData[1] = P1[1];
560	#endif
561
562	/* Process Row-0 data 2 --> data r2-1 */
563	for (j = 2; j < r2; j++)
564	{
565	pred1D = 2*P1[j-1] - P1[j-2];
566	diff = spaceFillingValue[j] - pred1D;
567
568	itvNum = fabs(diff)/realPrecision + 1;
569
570	if (itvNum < exe_params->intvCapacity)
571	{
572	if (diff < 0) itvNum = -itvNum;
573	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
574	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
575	}
576	else
577	{
578	type[j] = 0;
579	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
580	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
581	memcpy(preDataBytes,vce->curBytes,8);
582	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
583	P1[j] = vce->data;
584	}
585	#ifdef HAVE_TIMECMPR
586	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
587	decData[j] = P1[j];
588	#endif
589	}
590
591	/* Process Row-1 --> Row-r1-1 */
592	size_t index;
593	for (i = 1; i < r1; i++)
594	{
595	/* Process row-i data 0 */
596	index = i*r2;
597	pred1D = P1[0];
598	diff = spaceFillingValue[index] - pred1D;
599
600	itvNum = fabs(diff)/realPrecision + 1;
601
602	if (itvNum < exe_params->intvCapacity)
603	{
604	if (diff < 0) itvNum = -itvNum;
605	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
606	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
607	}
608	else
609	{
610	type[index] = 0;
611	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
612	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
613	memcpy(preDataBytes,vce->curBytes,8);
614	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
615	P0[0] = vce->data;
616	}
617	#ifdef HAVE_TIMECMPR
618	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
619	decData[index] = P0[0];
620	#endif
621
622	/* Process row-i data 1 --> r2-1*/
623	for (j = 1; j < r2; j++)
624	{
625	index = i*r2+j;
626	pred2D = P0[j-1] + P1[j] - P1[j-1];
627
628	diff = spaceFillingValue[index] - pred2D;
629
630	itvNum = fabs(diff)/realPrecision + 1;
631
632	if (itvNum < exe_params->intvCapacity)
633	{
634	if (diff < 0) itvNum = -itvNum;
635	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
636	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
637	}
638	else
639	{
640	type[index] = 0;
641	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
642	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
643	memcpy(preDataBytes,vce->curBytes,8);
644	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
645	P0[j] = vce->data;
646	}
647	#ifdef HAVE_TIMECMPR
648	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
649	decData[index] = P0[j];
650	#endif
651	}
652
653	double *Pt;
654	Pt = P1;
655	P1 = P0;
656	P0 = Pt;
657	}
658
659	if(r2!=1)
660	free(P0);
661	free(P1);
662	size_t exactDataNum = exactLeadNumArray->size;
663
664	TightDataPointStorageD* tdps;
665
666	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
667	type, exactMidByteArray->array, exactMidByteArray->size,
668	exactLeadNumArray->array,
669	resiBitArray->array, resiBitArray->size,
670	resiBitsLength,
671	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
672
673	/* int sum =0;
674	for(i=0;i<dataLength;i++)
675	if(type[i]==0) sum++;
676	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);
677
678	for(i=0;i<dataLength;i++)
679	printf("%d ", type[i]);
680	printf("\n");*/
681
682	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
683	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
684
685	// for(i = 3800;i<3844;i++)
686	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
687
688	//free memory
689	free_DIA(exactLeadNumArray);
690	free_DIA(resiBitArray);
691	free(type);
692	free(vce);
693	free(lce);
694	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
695
696	return tdps;
697	}
698
699	/**
700	*
701	* Note: @r1 is high dimension
702	* @r2 is low dimension
703	* */
704	char SZ_compress_args_double_NoCkRngeNoGzip_2D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
705	{
706	size_t dataLength = r1*r2;
707	char compressionType = 0;
708	TightDataPointStorageD* tdps = NULL;
709	#ifdef HAVE_TIMECMPR
710	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
711	{
712	int timestep = sz_tsc->currentStep;
713	if(timestep % confparams_cpr->snapshotCmprStep != 0)
714	{
715	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
716	compressionType = 1; //time-series based compression
717	}
718	else
719	{
720	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
721	compressionType = 0; //snapshot-based compression
722	multisteps->lastSnapshotStep = timestep;
723	}
724	}
725	else
726	#endif
727	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
728
729	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
730
731	if(outSize>dataLengthsizeof(double))
732	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
733
734	free_TightDataPointStorageD(tdps);
735	return compressionType;
736	}
737
738	TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_d)
739	{
740	#ifdef HAVE_TIMECMPR
741	double* decData = NULL;
742	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
743	decData = (double*)(multisteps->hist_data);
744	#endif
745
746	unsigned int quantization_intervals;
747	if(exe_params->optQuantMode==1)
748	{
749	quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision);
750	updateQuantizationInfo(quantization_intervals);
751	}
752	else
753	quantization_intervals = exe_params->intvCapacity;
754	size_t i,j,k;
755	int reqLength;
756	double pred1D, pred2D, pred3D;
757	double diff = 0.0;
758	double itvNum = 0;
759	double P0, P1;
760
761	size_t dataLength = r1r2r3;
762
763	size_t r23 = r2*r3;
764
765	P0 = (double)malloc(r23sizeof(double));
766	P1 = (double)malloc(r23sizeof(double));
767
768	double medianValue = medianValue_d;
769	short radExpo = getExponent_double(valueRangeSize/2);
770	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
771
772	int* type = (int) malloc(dataLengthsizeof(int));
773	//type[dataLength]=0;
774
775	double* spaceFillingValue = oriData; //
776
777	DynamicIntArray *exactLeadNumArray;
778	new_DIA(&exactLeadNumArray, DynArrayInitLen);
779
780	DynamicByteArray *exactMidByteArray;
781	new_DBA(&exactMidByteArray, DynArrayInitLen);
782
783	DynamicIntArray *resiBitArray;
784	new_DIA(&resiBitArray, DynArrayInitLen);
785
786	type[0] = 0;
787
788	unsigned char preDataBytes[8];
789	longToBytes_bigEndian(preDataBytes, 0);
790
791	int reqBytesLength = reqLength/8;
792	int resiBitsLength = reqLength%8;
793
794	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
795	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
796
797
798	/////////////////////////// Process layer-0 ///////////////////////////
799	/* Process Row-0 data 0*/
800	type[0] = 0;
801	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
802	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
803	memcpy(preDataBytes,vce->curBytes,8);
804	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
805	P1[0] = vce->data;
806	#ifdef HAVE_TIMECMPR
807	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
808	decData[0] = P1[0];
809	#endif
810
811	/* Process Row-0 data 1*/
812	pred1D = P1[0];
813	diff = spaceFillingValue[1] - pred1D;
814
815	itvNum = fabs(diff)/realPrecision + 1;
816
817	if (itvNum < exe_params->intvCapacity)
818	{
819	if (diff < 0) itvNum = -itvNum;
820	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
821	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
822	}
823	else
824	{
825	type[1] = 0;
826	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
827	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
828	memcpy(preDataBytes,vce->curBytes,8);
829	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
830	P1[1] = vce->data;
831	}
832	#ifdef HAVE_TIMECMPR
833	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
834	decData[1] = P1[1];
835	#endif
836
837	/* Process Row-0 data 2 --> data r3-1 */
838	for (j = 2; j < r3; j++)
839	{
840	pred1D = 2*P1[j-1] - P1[j-2];
841	diff = spaceFillingValue[j] - pred1D;
842
843	itvNum = fabs(diff)/realPrecision + 1;
844
845	if (itvNum < exe_params->intvCapacity)
846	{
847	if (diff < 0) itvNum = -itvNum;
848	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
849	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
850	}
851	else
852	{
853	type[j] = 0;
854	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
855	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
856	memcpy(preDataBytes,vce->curBytes,8);
857	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
858	P1[j] = vce->data;
859	}
860	#ifdef HAVE_TIMECMPR
861	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
862	decData[j] = P1[j];
863	#endif
864	}
865
866	/* Process Row-1 --> Row-r2-1 */
867	size_t index;
868	for (i = 1; i < r2; i++)
869	{
870	/* Process row-i data 0 */
871	index = i*r3;
872	pred1D = P1[index-r3];
873	diff = spaceFillingValue[index] - pred1D;
874
875	itvNum = fabs(diff)/realPrecision + 1;
876
877	if (itvNum < exe_params->intvCapacity)
878	{
879	if (diff < 0) itvNum = -itvNum;
880	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
881	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
882	}
883	else
884	{
885	type[index] = 0;
886	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
887	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
888	memcpy(preDataBytes,vce->curBytes,8);
889	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
890	P1[index] = vce->data;
891	}
892	#ifdef HAVE_TIMECMPR
893	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
894	decData[index] = P1[index];
895	#endif
896
897	/* Process row-i data 1 --> data r3-1*/
898	for (j = 1; j < r3; j++)
899	{
900	index = i*r3+j;
901	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
902
903	diff = spaceFillingValue[index] - pred2D;
904
905	itvNum = fabs(diff)/realPrecision + 1;
906
907	if (itvNum < exe_params->intvCapacity)
908	{
909	if (diff < 0) itvNum = -itvNum;
910	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
911	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
912	}
913	else
914	{
915	type[index] = 0;
916	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
917	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
918	memcpy(preDataBytes,vce->curBytes,8);
919	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
920	P1[index] = vce->data;
921	}
922	#ifdef HAVE_TIMECMPR
923	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
924	decData[index] = P1[index];
925	#endif
926	}
927	}
928
929
930	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
931
932	for (k = 1; k < r1; k++)
933	{
934	/* Process Row-0 data 0*/
935	index = k*r23;
936	pred1D = P1[0];
937	diff = spaceFillingValue[index] - pred1D;
938
939	itvNum = fabs(diff)/realPrecision + 1;
940
941	if (itvNum < exe_params->intvCapacity)
942	{
943	if (diff < 0) itvNum = -itvNum;
944	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
945	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
946	}
947	else
948	{
949	type[index] = 0;
950	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
951	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
952	memcpy(preDataBytes,vce->curBytes,8);
953	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
954	P0[0] = vce->data;
955	}
956	#ifdef HAVE_TIMECMPR
957	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
958	decData[index] = P0[0];
959	#endif
960
961	/* Process Row-0 data 1 --> data r3-1 */
962	for (j = 1; j < r3; j++)
963	{
964	//index = kr2r3+j;
965	index ++;
966	pred2D = P0[j-1] + P1[j] - P1[j-1];
967	diff = spaceFillingValue[index] - pred2D;
968
969	itvNum = fabs(diff)/realPrecision + 1;
970
971	if (itvNum < exe_params->intvCapacity)
972	{
973	if (diff < 0) itvNum = -itvNum;
974	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
975	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
976	}
977	else
978	{
979	type[index] = 0;
980	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
981	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
982	memcpy(preDataBytes,vce->curBytes,8);
983	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
984	P0[j] = vce->data;
985	}
986	#ifdef HAVE_TIMECMPR
987	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
988	decData[index] = P0[j];
989	#endif
990	}
991
992	/* Process Row-1 --> Row-r2-1 */
993	size_t index2D;
994	for (i = 1; i < r2; i++)
995	{
996	/* Process Row-i data 0 */
997	index = kr23 + ir3;
998	index2D = i*r3;
999	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
1000	diff = spaceFillingValue[index] - pred2D;
1001
1002	itvNum = fabs(diff)/realPrecision + 1;
1003
1004	if (itvNum < exe_params->intvCapacity)
1005	{
1006	if (diff < 0) itvNum = -itvNum;
1007	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1008	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1009	}
1010	else
1011	{
1012	type[index] = 0;
1013	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1014	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1015	memcpy(preDataBytes,vce->curBytes,8);
1016	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1017	P0[index2D] = vce->data;
1018	}
1019	#ifdef HAVE_TIMECMPR
1020	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1021	decData[index] = P0[index2D];
1022	#endif
1023
1024	/* Process Row-i data 1 --> data r3-1 */
1025	for (j = 1; j < r3; j++)
1026	{
1027	//index = kr2r3 + i*r3 + j;
1028	index ++;
1029	index2D = i*r3 + j;
1030	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
1031	diff = spaceFillingValue[index] - pred3D;
1032
1033	itvNum = fabs(diff)/realPrecision + 1;
1034
1035	if (itvNum < exe_params->intvCapacity)
1036	{
1037	if (diff < 0) itvNum = -itvNum;
1038	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1039	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1040	}
1041	else
1042	{
1043	type[index] = 0;
1044	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1045	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1046	memcpy(preDataBytes,vce->curBytes,8);
1047	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1048	P0[index2D] = vce->data;
1049	}
1050	#ifdef HAVE_TIMECMPR
1051	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1052	decData[index] = P0[index2D];
1053	#endif
1054	}
1055	}
1056
1057	double *Pt;
1058	Pt = P1;
1059	P1 = P0;
1060	P0 = Pt;
1061	}
1062	if(r23!=1)
1063	free(P0);
1064	free(P1);
1065	size_t exactDataNum = exactLeadNumArray->size;
1066
1067	TightDataPointStorageD* tdps;
1068
1069	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
1070	type, exactMidByteArray->array, exactMidByteArray->size,
1071	exactLeadNumArray->array,
1072	resiBitArray->array, resiBitArray->size,
1073	resiBitsLength,
1074	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1075
1076	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
1077	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
1078
1079	// for(i = 3800;i<3844;i++)
1080	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
1081
1082	//free memory
1083	free_DIA(exactLeadNumArray);
1084	free_DIA(resiBitArray);
1085	free(type);
1086	free(vce);
1087	free(lce);
1088	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1089
1090	return tdps;
1091	}
1092
1093
1094	char SZ_compress_args_double_NoCkRngeNoGzip_3D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
1095	{
1096	size_t dataLength = r1r2r3;
1097	char compressionType = 0;
1098	TightDataPointStorageD* tdps = NULL;
1099	#ifdef HAVE_TIMECMPR
1100	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1101	{
1102	int timestep = sz_tsc->currentStep;
1103	if(timestep % confparams_cpr->snapshotCmprStep != 0)
1104	{
1105	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
1106	compressionType = 1; //time-series based compression
1107	}
1108	else
1109	{
1110	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
1111	compressionType = 0; //snapshot-based compression
1112	multisteps->lastSnapshotStep = timestep;
1113	}
1114	}
1115	else
1116	#endif
1117	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
1118
1119	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
1120
1121	if(outSize>dataLengthsizeof(double))
1122	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1123
1124	free_TightDataPointStorageD(tdps);
1125	return compressionType;
1126	}
1127
1128	TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, double valueRangeSize, double medianValue_d)
1129	{
1130	unsigned int quantization_intervals;
1131	if(exe_params->optQuantMode==1)
1132	{
1133	quantization_intervals = optimize_intervals_double_4D(oriData, r1, r2, r3, r4, realPrecision);
1134	updateQuantizationInfo(quantization_intervals);
1135	}
1136	else
1137	quantization_intervals = exe_params->intvCapacity;
1138
1139	size_t i,j,k;
1140	int reqLength;
1141	double pred1D, pred2D, pred3D;
1142	double diff = 0.0;
1143	double itvNum = 0;
1144	double P0, P1;
1145
1146	size_t dataLength = r1r2r3*r4;
1147
1148	size_t r234 = r2r3r4;
1149	size_t r34 = r3*r4;
1150
1151	P0 = (double)malloc(r34sizeof(double));
1152	P1 = (double)malloc(r34sizeof(double));
1153
1154	double medianValue = medianValue_d;
1155	short radExpo = getExponent_double(valueRangeSize/2);
1156	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
1157
1158	int* type = (int) malloc(dataLengthsizeof(int));
1159
1160	double* spaceFillingValue = oriData; //
1161
1162	DynamicIntArray *exactLeadNumArray;
1163	new_DIA(&exactLeadNumArray, DynArrayInitLen);
1164
1165	DynamicByteArray *exactMidByteArray;
1166	new_DBA(&exactMidByteArray, DynArrayInitLen);
1167
1168	DynamicIntArray *resiBitArray;
1169	new_DIA(&resiBitArray, DynArrayInitLen);
1170
1171	unsigned char preDataBytes[8];
1172	longToBytes_bigEndian(preDataBytes, 0);
1173
1174	int reqBytesLength = reqLength/8;
1175	int resiBitsLength = reqLength%8;
1176
1177	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
1178	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
1179
1180
1181	size_t l;
1182	for (l = 0; l < r1; l++)
1183	{
1184
1185	/////////////////////////// Process layer-0 ///////////////////////////
1186	/* Process Row-0 data 0*/
1187	size_t index = l*r234;
1188	size_t index2D = 0;
1189
1190	type[index] = 0;
1191	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1192	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1193	memcpy(preDataBytes,vce->curBytes,8);
1194	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1195	P1[index2D] = vce->data;
1196
1197	/* Process Row-0 data 1*/
1198	index = l*r234+1;
1199	index2D = 1;
1200
1201	pred1D = P1[index2D-1];
1202	diff = spaceFillingValue[index] - pred1D;
1203
1204	itvNum = fabs(diff)/realPrecision + 1;
1205
1206	if (itvNum < exe_params->intvCapacity)
1207	{
1208	if (diff < 0) itvNum = -itvNum;
1209	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1210	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1211	}
1212	else
1213	{
1214	type[index] = 0;
1215	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1216	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1217	memcpy(preDataBytes,vce->curBytes,8);
1218	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1219	P1[index2D] = vce->data;
1220	}
1221
1222	/* Process Row-0 data 2 --> data r4-1 */
1223	for (j = 2; j < r4; j++)
1224	{
1225	index = l*r234+j;
1226	index2D = j;
1227
1228	pred1D = 2*P1[index2D-1] - P1[index2D-2];
1229	diff = spaceFillingValue[index] - pred1D;
1230
1231	itvNum = fabs(diff)/realPrecision + 1;
1232
1233	if (itvNum < exe_params->intvCapacity)
1234	{
1235	if (diff < 0) itvNum = -itvNum;
1236	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1237	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1238	}
1239	else
1240	{
1241	type[index] = 0;
1242	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1243	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1244	memcpy(preDataBytes,vce->curBytes,8);
1245	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1246	P1[index2D] = vce->data;
1247	}
1248	}
1249
1250	/* Process Row-1 --> Row-r3-1 */
1251	for (i = 1; i < r3; i++)
1252	{
1253	/* Process row-i data 0 */
1254	index = lr234+ir4;
1255	index2D = i*r4;
1256
1257	pred1D = P1[index2D-r4];
1258	diff = spaceFillingValue[index] - pred1D;
1259
1260	itvNum = fabs(diff)/realPrecision + 1;
1261
1262	if (itvNum < exe_params->intvCapacity)
1263	{
1264	if (diff < 0) itvNum = -itvNum;
1265	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1266	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1267	}
1268	else
1269	{
1270	type[index] = 0;
1271	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1272	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1273	memcpy(preDataBytes,vce->curBytes,8);
1274	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1275	P1[index2D] = vce->data;
1276	}
1277
1278	/* Process row-i data 1 --> data r4-1*/
1279	for (j = 1; j < r4; j++)
1280	{
1281	index = lr234+ir4+j;
1282	index2D = i*r4+j;
1283
1284	pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1];
1285
1286	diff = spaceFillingValue[index] - pred2D;
1287
1288	itvNum = fabs(diff)/realPrecision + 1;
1289
1290	if (itvNum < exe_params->intvCapacity)
1291	{
1292	if (diff < 0) itvNum = -itvNum;
1293	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1294	P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1295	}
1296	else
1297	{
1298	type[index] = 0;
1299	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1300	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1301	memcpy(preDataBytes,vce->curBytes,8);
1302	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1303	P1[index2D] = vce->data;
1304	}
1305	}
1306	}
1307
1308
1309	/////////////////////////// Process layer-1 --> layer-r2-1 ///////////////////////////
1310
1311	for (k = 1; k < r2; k++)
1312	{
1313	/* Process Row-0 data 0*/
1314	index = lr234+kr34;
1315	index2D = 0;
1316
1317	pred1D = P1[index2D];
1318	diff = spaceFillingValue[index] - pred1D;
1319
1320	itvNum = fabs(diff)/realPrecision + 1;
1321
1322	if (itvNum < exe_params->intvCapacity)
1323	{
1324	if (diff < 0) itvNum = -itvNum;
1325	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1326	P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1327	}
1328	else
1329	{
1330	type[index] = 0;
1331	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1332	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1333	memcpy(preDataBytes,vce->curBytes,8);
1334	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1335	P0[index2D] = vce->data;
1336	}
1337
1338
1339	/* Process Row-0 data 1 --> data r4-1 */
1340	for (j = 1; j < r4; j++)
1341	{
1342	index = lr234+kr34+j;
1343	index2D = j;
1344
1345	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
1346	diff = spaceFillingValue[index] - pred2D;
1347
1348	itvNum = fabs(diff)/realPrecision + 1;
1349
1350	if (itvNum < exe_params->intvCapacity)
1351	{
1352	if (diff < 0) itvNum = -itvNum;
1353	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1354	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1355	}
1356	else
1357	{
1358	type[index] = 0;
1359	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1360	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1361	memcpy(preDataBytes,vce->curBytes,8);
1362	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1363	P0[index2D] = vce->data;
1364	}
1365	}
1366
1367	/* Process Row-1 --> Row-r3-1 */
1368	for (i = 1; i < r3; i++)
1369	{
1370	/* Process Row-i data 0 */
1371	index = lr234+kr34+i*r4;
1372	index2D = i*r4;
1373
1374	pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4];
1375	diff = spaceFillingValue[index] - pred2D;
1376
1377	itvNum = fabs(diff)/realPrecision + 1;
1378
1379	if (itvNum < exe_params->intvCapacity)
1380	{
1381	if (diff < 0) itvNum = -itvNum;
1382	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1383	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1384	}
1385	else
1386	{
1387	type[index] = 0;
1388	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1389	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1390	memcpy(preDataBytes,vce->curBytes,8);
1391	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1392	P0[index2D] = vce->data;
1393	}
1394
1395	/* Process Row-i data 1 --> data r4-1 */
1396	for (j = 1; j < r4; j++)
1397	{
1398	index = lr234+kr34+i*r4+j;
1399	index2D = i*r4+j;
1400
1401	pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1];
1402	diff = spaceFillingValue[index] - pred3D;
1403
1404
1405	itvNum = fabs(diff)/realPrecision + 1;
1406
1407	if (itvNum < exe_params->intvCapacity)
1408	{
1409	if (diff < 0) itvNum = -itvNum;
1410	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1411	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1412	}
1413	else
1414	{
1415	type[index] = 0;
1416	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1417	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1418	memcpy(preDataBytes,vce->curBytes,8);
1419	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1420	P0[index2D] = vce->data;
1421	}
1422	}
1423	}
1424
1425	double *Pt;
1426	Pt = P1;
1427	P1 = P0;
1428	P0 = Pt;
1429	}
1430	}
1431
1432	free(P0);
1433	free(P1);
1434	size_t exactDataNum = exactLeadNumArray->size;
1435
1436	TightDataPointStorageD* tdps;
1437
1438	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
1439	type, exactMidByteArray->array, exactMidByteArray->size,
1440	exactLeadNumArray->array,
1441	resiBitArray->array, resiBitArray->size,
1442	resiBitsLength,
1443	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1444
1445	//free memory
1446	free_DIA(exactLeadNumArray);
1447	free_DIA(resiBitArray);
1448	free(type);
1449	free(vce);
1450	free(lce);
1451	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1452
1453	return tdps;
1454	}
1455
1456
1457	char SZ_compress_args_double_NoCkRngeNoGzip_4D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
1458	{
1459	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_d);
1460
1461	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
1462
1463	size_t dataLength = r1r2r3*r4;
1464	if(outSize>dataLengthsizeof(double))
1465	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1466
1467	free_TightDataPointStorageD(tdps);
1468	return 0;
1469	}
1470
1471	void SZ_compress_args_double_withinRange(unsigned char** newByteData, double oriData, size_t dataLength, size_t outSize)
1472	{
1473	TightDataPointStorageD* tdps = (TightDataPointStorageD*) malloc(sizeof(TightDataPointStorageD));
1474	tdps->rtypeArray = NULL;
1475	tdps->typeArray = NULL;
1476	tdps->leadNumArray = NULL;
1477	tdps->residualMidBits = NULL;
1478
1479	tdps->allSameData = 1;
1480	tdps->dataSeriesLength = dataLength;
1481	tdps->exactMidBytes = (unsigned char)malloc(sizeof(unsigned char)8);
1482	tdps->pwrErrBoundBytes = NULL;
1483	tdps->isLossless = 0;
1484	double value = oriData[0];
1485	doubleToBytes(tdps->exactMidBytes, value);
1486	tdps->exactMidBytes_size = 8;
1487
1488	size_t tmpOutSize;
1489	//unsigned char *tmpByteData;
1490	convertTDPStoFlatBytes_double(tdps, newByteData, &tmpOutSize);
1491	//convertTDPStoFlatBytes_double(tdps, &tmpByteData, &tmpOutSize);
1492
1493	//newByteData = (unsigned char)malloc(sizeof(unsigned char)*16); //for floating-point data (1+3+4+4)
1494	//memcpy(*newByteData, tmpByteData, 16);
1495	*outSize = tmpOutSize;//12==3+1+8(double_size)+MetaDataByteLength
1496	free_TightDataPointStorageD(tdps);
1497	}
1498
1499	int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData,
1500	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1501	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
1502	{
1503	int status = SZ_SCES;
1504	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1505	double valueRangeSize = 0, medianValue = 0;
1506
1507	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
1508	double max = min+valueRangeSize;
1509	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1510
1511	if(valueRangeSize <= realPrecision)
1512	{
1513	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
1514	}
1515	else
1516	{
1517	if(r5==0&&r4==0&&r3==0&&r2==0)
1518	{
1519	if(errBoundMode>=PW_REL)
1520	{
1521	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
1522	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
1523	}
1524	else
1525	SZ_compress_args_double_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
1526	}
1527	else if(r5==0&&r4==0&&r3==0)
1528	{
1529	if(errBoundMode>=PW_REL)
1530	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(newByteData, oriData, realPrecision, r2, r1, outSize, min, max);
1531	else
1532	SZ_compress_args_double_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1533	}
1534	else if(r5==0&&r4==0)
1535	{
1536	if(errBoundMode>=PW_REL)
1537	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max);
1538	else
1539	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1540	}
1541	else if(r5==0)
1542	{
1543	if(errBoundMode>=PW_REL)
1544	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max);
1545	else
1546	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1547	}
1548	}
1549	return status;
1550	}
1551
1552	int SZ_compress_args_double(unsigned char** newByteData, double *oriData,
1553	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1554	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
1555	{
1556	confparams_cpr->errorBoundMode = errBoundMode;
1557	if(errBoundMode==PW_REL)
1558	{
1559	confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
1560	//confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
1561	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE && r3 != 0 )
1562	{
1563	printf("Error: Current version doesn't support 3D data compression with point-wise relative error bound being based on pwrType=AVG\n");
1564	exit(0);
1565	return SZ_NSCS;
1566	}
1567	}
1568
1569	int status = SZ_SCES;
1570	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1571
1572	if(dataLength <= MIN_NUM_OF_ELEMENTS)
1573	{
1574	*newByteData = SZ_skip_compress_double(oriData, dataLength, outSize);
1575	return status;
1576	}
1577
1578	double valueRangeSize = 0, medianValue = 0;
1579
1580	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
1581	double max = min+valueRangeSize;
1582
1583	double realPrecision = 0;
1584
1585	if(confparams_cpr->errorBoundMode==PSNR)
1586	{
1587	confparams_cpr->errorBoundMode = ABS;
1588	realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, valueRangeSize);
1589	}
1590	else
1591	realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1592
1593	if(valueRangeSize <= realPrecision)
1594	{
1595	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
1596	}
1597	else
1598	{
1599	size_t tmpOutSize = 0;
1600	unsigned char* tmpByteData;
1601	if (r2==0)
1602	{
1603	if(confparams_cpr->errorBoundMode>=PW_REL)
1604	{
1605	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
1606	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
1607	}
1608	else
1609	#ifdef HAVE_TIMECMPR
1610	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1611	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1612	else
1613	#endif
1614	SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1615	}
1616	else
1617	if (r3==0)
1618	{
1619	if(confparams_cpr->errorBoundMode>=PW_REL)
1620	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
1621	else
1622	#ifdef HAVE_TIMECMPR
1623	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1624	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1625	else
1626	#endif
1627	{
1628	if(sz_with_regression == SZ_NO_REGRESSION)
1629	SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1630	else
1631	tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
1632	}
1633	}
1634	else
1635	if (r4==0)
1636	{
1637	if(confparams_cpr->errorBoundMode>=PW_REL)
1638	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
1639	else
1640	#ifdef HAVE_TIMECMPR
1641	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1642	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1643	else
1644	#endif
1645	{
1646	if(sz_with_regression == SZ_NO_REGRESSION)
1647	SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1648	else
1649	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
1650	}
1651
1652
1653	}
1654	else
1655	if (r5==0)
1656	{
1657	if(confparams_cpr->errorBoundMode>=PW_REL)
1658	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
1659	else
1660	#ifdef HAVE_TIMECMPR
1661	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1662	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1663	else
1664	#endif
1665	{
1666	if(sz_with_regression == SZ_NO_REGRESSION)
1667	SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1668	else
1669	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
1670	}
1671
1672	}
1673	else
1674	{
1675	printf("Error: doesn't support 5 dimensions for now.\n");
1676	status = SZ_DERR;
1677	}
1678
1679	//Call Gzip to do the further compression.
1680	if(confparams_cpr->szMode==SZ_BEST_SPEED)
1681	{
1682	*outSize = tmpOutSize;
1683	*newByteData = tmpByteData;
1684	}
1685	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1686	{
1687	*outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
1688	free(tmpByteData);
1689	}
1690	else
1691	{
1692	printf("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1693	status = SZ_MERR;
1694	}
1695	}
1696
1697	return status;
1698	}
1699
1700	//TODO
1701	int SZ_compress_args_double_subblock(unsigned char* compressedBytes, double *oriData,
1702	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
1703	size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
1704	size_t e5, size_t e4, size_t e3, size_t e2, size_t e1,
1705	size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio)
1706	{
1707	int status = SZ_SCES;
1708	double valueRangeSize = 0, medianValue = 0;
1709	computeRangeSize_double_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1);
1710
1711	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1712
1713	if(valueRangeSize <= realPrecision)
1714	{
1715	//TODO
1716	//SZ_compress_args_double_withinRange_subblock();
1717	}
1718	else
1719	{
1720	if (r2==0)
1721	{
1722	//TODO
1723	if(errBoundMode==PW_REL)
1724	{
1725	//TODO
1726	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_subblock();
1727	printf ("Current subblock version does not support point-wise relative error bound.\n");
1728	}
1729	else
1730	SZ_compress_args_double_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1);
1731	}
1732	else
1733	if (r3==0)
1734	{
1735	if(errBoundMode==PW_REL)
1736	{
1737	//TODO
1738	//SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_subblock();
1739	printf ("Current subblock version does not support point-wise relative error bound.\n");
1740	}
1741	else
1742	SZ_compress_args_double_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1);
1743	}
1744	else
1745	if (r4==0)
1746	{
1747	if(errBoundMode==PW_REL)
1748	{
1749	//TODO
1750	//SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_subblock();
1751	printf ("Current subblock version does not support point-wise relative error bound.\n");
1752	}
1753	else
1754	SZ_compress_args_double_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1);
1755	}
1756	else
1757	if (r5==0)
1758	{
1759	if(errBoundMode==PW_REL)
1760	{
1761	//TODO
1762	//SZ_compress_args_double_NoCkRngeNoGzip_4D_pwr_subblock();
1763	printf ("Current subblock version does not support point-wise relative error bound.\n");
1764	}
1765	else
1766	SZ_compress_args_double_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
1767	}
1768	else
1769	{
1770	printf("Error: doesn't support 5 dimensions for now.\n");
1771	status = SZ_DERR; //dimension error
1772	}
1773	}
1774	return status;
1775	}
1776
1777	void SZ_compress_args_double_NoCkRnge_1D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1778	size_t r1, size_t s1, size_t e1)
1779	{
1780	TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r1, s1, e1);
1781
1782	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1783	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1784	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1785	{
1786	unsigned char *tmpCompBytes;
1787	size_t tmpOutSize;
1788	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1789	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1790	free(tmpCompBytes);
1791	}
1792	else
1793	{
1794	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1795	}
1796
1797	//TODO
1798	// if(outSize>dataLengthsizeof(double))
1799	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1800
1801	free_TightDataPointStorageD(tdps);
1802	}
1803
1804	void SZ_compress_args_double_NoCkRnge_2D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1805	size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1)
1806	{
1807	TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r2, r1, s2, s1, e2, e1);
1808
1809	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1810	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1811	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1812	{
1813	unsigned char *tmpCompBytes;
1814	size_t tmpOutSize;
1815	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1816	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1817	free(tmpCompBytes);
1818	}
1819	else
1820	{
1821	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1822	}
1823
1824	//TODO
1825	// if(outSize>dataLengthsizeof(double))
1826	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1827
1828	free_TightDataPointStorageD(tdps);
1829	}
1830
1831	void SZ_compress_args_double_NoCkRnge_3D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1832	size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1)
1833	{
1834	TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r3, r2, r1, s3, s2, s1, e3, e2, e1);
1835
1836	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1837	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1838	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1839	{
1840	unsigned char *tmpCompBytes;
1841	size_t tmpOutSize;
1842	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1843	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1844	free(tmpCompBytes);
1845	}
1846	else
1847	{
1848	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1849	}
1850
1851	//TODO
1852	// if(outSize>dataLengthsizeof(double))
1853	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1854
1855	free_TightDataPointStorageD(tdps);
1856	}
1857
1858	void SZ_compress_args_double_NoCkRnge_4D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1859	size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1)
1860	{
1861	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
1862
1863	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1864	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1865	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1866	{
1867	unsigned char *tmpCompBytes;
1868	size_t tmpOutSize;
1869	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1870	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1871	free(tmpCompBytes);
1872	}
1873	else
1874	{
1875	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1876	}
1877
1878	//TODO
1879	// if(outSize>dataLengthsizeof(double))
1880	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1881
1882	free_TightDataPointStorageD(tdps);
1883	}
1884
1885
1886	unsigned int optimize_intervals_double_1D_subblock(double *oriData, double realPrecision, size_t r1, size_t s1, size_t e1)
1887	{
1888	size_t dataLength = e1 - s1 + 1;
1889	oriData = oriData + s1;
1890
1891	size_t i = 0;
1892	unsigned long radiusIndex;
1893	double pred_value = 0, pred_err;
1894	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
1895	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
1896	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
1897	for(i=2;i<dataLength;i++)
1898	{
1899	if(i%confparams_cpr->sampleDistance==0)
1900	{
1901	pred_value = 2*oriData[i-1] - oriData[i-2];
1902	//pred_value = oriData[i-1];
1903	pred_err = fabs(pred_value - oriData[i]);
1904	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
1905	if(radiusIndex>=confparams_cpr->maxRangeRadius)
1906	radiusIndex = confparams_cpr->maxRangeRadius - 1;
1907	intervals[radiusIndex]++;
1908	}
1909	}
1910	//compute the appropriate number
1911	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
1912	size_t sum = 0;
1913	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
1914	{
1915	sum += intervals[i];
1916	if(sum>targetCount)
1917	break;
1918	}
1919
1920	if(i>=confparams_cpr->maxRangeRadius)
1921	i = confparams_cpr->maxRangeRadius-1;
1922	unsigned int accIntervals = 2*(i+1);
1923	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
1924
1925	if(powerOf2<32)
1926	powerOf2 = 32;
1927
1928	free(intervals);
1929	return powerOf2;
1930	}
1931
1932	unsigned int optimize_intervals_double_2D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
1933	{
1934	size_t R1 = e1 - s1 + 1;
1935	size_t R2 = e2 - s2 + 1;
1936
1937	size_t i,j, index;
1938	unsigned long radiusIndex;
1939	double pred_value = 0, pred_err;
1940	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
1941	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
1942	size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance;
1943	for(i=s1+1;i<=e1;i++)
1944	{
1945	for(j=s2+1;j<=e2;j++)
1946	{
1947	if((i+j)%confparams_cpr->sampleDistance==0)
1948	{
1949	index = i*r2+j;
1950	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
1951	pred_err = fabs(pred_value - oriData[index]);
1952	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
1953	if(radiusIndex>=confparams_cpr->maxRangeRadius)
1954	radiusIndex = confparams_cpr->maxRangeRadius - 1;
1955	intervals[radiusIndex]++;
1956	}
1957	}
1958	}
1959	//compute the appropriate number
1960	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
1961	size_t sum = 0;
1962	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
1963	{
1964	sum += intervals[i];
1965	if(sum>targetCount)
1966	break;
1967	}
1968	if(i>=confparams_cpr->maxRangeRadius)
1969	i = confparams_cpr->maxRangeRadius-1;
1970	unsigned int accIntervals = 2*(i+1);
1971	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
1972
1973	if(powerOf2<32)
1974	powerOf2 = 32;
1975
1976	free(intervals);
1977	return powerOf2;
1978	}
1979
1980	unsigned int optimize_intervals_double_3D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
1981	{
1982	size_t R1 = e1 - s1 + 1;
1983	size_t R2 = e2 - s2 + 1;
1984	size_t R3 = e3 - s3 + 1;
1985
1986	size_t r23 = r2*r3;
1987
1988	size_t i,j,k, index;
1989	unsigned long radiusIndex;
1990	double pred_value = 0, pred_err;
1991	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
1992	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
1993	size_t totalSampleSize = R1R2R3/confparams_cpr->sampleDistance;
1994	for(i=s1+1;i<=e1;i++)
1995	{
1996	for(j=s2+1;j<=e2;j++)
1997	{
1998	for(k=s3+1;k<=e3;k++)
1999	{
2000	if((i+j+k)%confparams_cpr->sampleDistance==0)
2001	{
2002	index = ir23+jr3+k;
2003	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
2004	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
2005	pred_err = fabs(pred_value - oriData[index]);
2006	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2007	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2008	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2009	intervals[radiusIndex]++;
2010	}
2011	}
2012
2013	}
2014	}
2015	//compute the appropriate number
2016	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2017	size_t sum = 0;
2018	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2019	{
2020	sum += intervals[i];
2021	if(sum>targetCount)
2022	break;
2023	}
2024	if(i>=confparams_cpr->maxRangeRadius)
2025	i = confparams_cpr->maxRangeRadius-1;
2026
2027	unsigned int accIntervals = 2*(i+1);
2028	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2029
2030	if(powerOf2<32)
2031	powerOf2 = 32;
2032
2033	free(intervals);
2034	return powerOf2;
2035	}
2036
2037	unsigned int optimize_intervals_double_4D_subblock(double *oriData, double realPrecision,
2038	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
2039	{
2040	size_t R1 = e1 - s1 + 1;
2041	size_t R2 = e2 - s2 + 1;
2042	size_t R3 = e3 - s3 + 1;
2043	size_t R4 = e4 - s4 + 1;
2044
2045	size_t r34 = r3*r4;
2046	size_t r234 = r2r3r4;
2047
2048	size_t i,j,k,l, index;
2049	unsigned long radiusIndex;
2050	double pred_value = 0, pred_err;
2051	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2052	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2053	size_t totalSampleSize = R1R2R3*R4/confparams_cpr->sampleDistance;
2054	for(i=s1+1;i<=e1;i++)
2055	{
2056	for(j=s2+1;j<=e2;j++)
2057	{
2058	for(k=s3+1;k<=e3;k++)
2059	{
2060	for(l=s4+1;l<=e4;l++)
2061	{
2062	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
2063	{
2064	index = ir234+jr34+k*r4+l;
2065	pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34]
2066	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
2067	pred_err = fabs(pred_value - oriData[index]);
2068	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2069	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2070	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2071	intervals[radiusIndex]++;
2072	}
2073	}
2074	}
2075
2076	}
2077	}
2078	//compute the appropriate number
2079	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2080	size_t sum = 0;
2081	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2082	{
2083	sum += intervals[i];
2084	if(sum>targetCount)
2085	break;
2086	}
2087	if(i>=confparams_cpr->maxRangeRadius)
2088	i = confparams_cpr->maxRangeRadius-1;
2089
2090	unsigned int accIntervals = 2*(i+1);
2091	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2092
2093	if(powerOf2<32)
2094	powerOf2 = 32;
2095
2096	free(intervals);
2097	return powerOf2;
2098	}
2099
2100	TightDataPointStorageD* SZ_compress_double_1D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2101	size_t r1, size_t s1, size_t e1)
2102	{
2103	size_t dataLength = e1 - s1 + 1;
2104
2105	unsigned int quantization_intervals;
2106	if(exe_params->optQuantMode==1)
2107	quantization_intervals = optimize_intervals_double_1D_subblock(oriData, realPrecision, r1, s1, e1);
2108	else
2109	quantization_intervals = exe_params->intvCapacity;
2110	updateQuantizationInfo(quantization_intervals);
2111
2112	size_t i;
2113	int reqLength;
2114	double medianValue = medianValue_d;
2115	short radExpo = getExponent_double(valueRangeSize/2);
2116
2117	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2118
2119	int* type = (int) malloc(dataLengthsizeof(int));
2120
2121	double* spaceFillingValue = oriData + s1; //
2122
2123	DynamicIntArray *exactLeadNumArray;
2124	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2125
2126	DynamicByteArray *exactMidByteArray;
2127	new_DBA(&exactMidByteArray, DynArrayInitLen);
2128
2129	DynamicIntArray *resiBitArray;
2130	new_DIA(&resiBitArray, DynArrayInitLen);
2131
2132	type[0] = 0;
2133
2134	unsigned char preDataBytes[8];
2135	longToBytes_bigEndian(preDataBytes, 0);
2136
2137	int reqBytesLength = reqLength/8;
2138	int resiBitsLength = reqLength%8;
2139	double last3CmprsData[3] = {0};
2140
2141	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2142	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2143
2144	//add the first data
2145	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2146	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2147	memcpy(preDataBytes,vce->curBytes,8);
2148	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2149	listAdd_double(last3CmprsData, vce->data);
2150
2151	//add the second data
2152	type[1] = 0;
2153	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2154	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2155	memcpy(preDataBytes,vce->curBytes,8);
2156	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2157	listAdd_double(last3CmprsData, vce->data);
2158
2159	int state;
2160	double checkRadius;
2161	double curData;
2162	double pred;
2163	double predAbsErr;
2164	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
2165	double interval = 2*realPrecision;
2166
2167	for(i=2;i<dataLength;i++)
2168	{
2169	//printf("%.30G\n",last3CmprsData[0]);
2170	curData = spaceFillingValue[i];
2171	pred = 2*last3CmprsData[0] - last3CmprsData[1];
2172	//pred = last3CmprsData[0];
2173	predAbsErr = fabs(curData - pred);
2174	if(predAbsErr<=checkRadius)
2175	{
2176	state = (predAbsErr/realPrecision+1)/2;
2177	if(curData>=pred)
2178	{
2179	type[i] = exe_params->intvRadius+state;
2180	pred = pred + state*interval;
2181	}
2182	else //curData<pred
2183	{
2184	type[i] = exe_params->intvRadius-state;
2185	pred = pred - state*interval;
2186	}
2187	listAdd_double(last3CmprsData, pred);
2188	continue;
2189	}
2190
2191	//unpredictable data processing
2192	type[i] = 0;
2193	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2194	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2195	memcpy(preDataBytes,vce->curBytes,8);
2196	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2197
2198	listAdd_double(last3CmprsData, vce->data);
2199	}//end of for
2200
2201	size_t exactDataNum = exactLeadNumArray->size;
2202
2203	TightDataPointStorageD* tdps;
2204
2205	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
2206	type, exactMidByteArray->array, exactMidByteArray->size,
2207	exactLeadNumArray->array,
2208	resiBitArray->array, resiBitArray->size,
2209	resiBitsLength,
2210	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2211
2212	//free memory
2213	free_DIA(exactLeadNumArray);
2214	free_DIA(resiBitArray);
2215	free(type);
2216	free(vce);
2217	free(lce);
2218	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2219
2220	return tdps;
2221	}
2222
2223
2224	TightDataPointStorageD* SZ_compress_double_2D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2225	size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
2226	{
2227	unsigned int quantization_intervals;
2228	if(exe_params->optQuantMode==1)
2229	{
2230	quantization_intervals = optimize_intervals_double_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2);
2231	updateQuantizationInfo(quantization_intervals);
2232	}
2233	else
2234	quantization_intervals = exe_params->intvCapacity;
2235
2236	size_t i,j;
2237	int reqLength;
2238	double pred1D, pred2D;
2239	double diff = 0.0;
2240	double itvNum = 0;
2241	double P0, P1;
2242
2243	size_t R1 = e1 - s1 + 1;
2244	size_t R2 = e2 - s2 + 1;
2245	size_t dataLength = R1*R2;
2246
2247	P0 = (double)malloc(R2sizeof(double));
2248	memset(P0, 0, R2*sizeof(double));
2249	P1 = (double)malloc(R2sizeof(double));
2250	memset(P1, 0, R2*sizeof(double));
2251
2252	double medianValue = medianValue_d;
2253	short radExpo = getExponent_double(valueRangeSize/2);
2254	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2255
2256	int* type = (int) malloc(dataLengthsizeof(int));
2257
2258	double* spaceFillingValue = oriData; //
2259
2260	DynamicIntArray *exactLeadNumArray;
2261	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2262
2263	DynamicByteArray *exactMidByteArray;
2264	new_DBA(&exactMidByteArray, DynArrayInitLen);
2265
2266	DynamicIntArray *resiBitArray;
2267	new_DIA(&resiBitArray, DynArrayInitLen);
2268
2269	unsigned char preDataBytes[8];
2270	longToBytes_bigEndian(preDataBytes, 0);
2271
2272	int reqBytesLength = reqLength/8;
2273	int resiBitsLength = reqLength%8;
2274
2275	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2276	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2277
2278	/* Process Row-s1 data s2*/
2279	size_t gIndex;
2280	size_t lIndex;
2281
2282	gIndex = s1*r2+s2;
2283	lIndex = 0;
2284
2285	type[lIndex] = 0;
2286	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2287	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2288	memcpy(preDataBytes,vce->curBytes,8);
2289	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2290	P1[0] = vce->data;
2291
2292	/* Process Row-s1 data s2+1*/
2293	gIndex = s1*r2+(s2+1);
2294	lIndex = 1;
2295
2296	pred1D = P1[0];
2297	diff = spaceFillingValue[gIndex] - pred1D;
2298
2299	itvNum = fabs(diff)/realPrecision + 1;
2300
2301	if (itvNum < exe_params->intvCapacity)
2302	{
2303	if (diff < 0) itvNum = -itvNum;
2304	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2305	P1[1] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2306	}
2307	else
2308	{
2309	type[lIndex] = 0;
2310	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2311	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2312	memcpy(preDataBytes,vce->curBytes,8);
2313	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2314	P1[1] = vce->data;
2315	}
2316
2317	/* Process Row-s1 data s2+2 --> data e2 */
2318	for (j = 2; j < R2; j++)
2319	{
2320	gIndex = s1*r2+(s2+j);
2321	lIndex = j;
2322
2323	pred1D = 2*P1[j-1] - P1[j-2];
2324	diff = spaceFillingValue[gIndex] - pred1D;
2325
2326	itvNum = fabs(diff)/realPrecision + 1;
2327
2328	if (itvNum < exe_params->intvCapacity)
2329	{
2330	if (diff < 0) itvNum = -itvNum;
2331	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2332	P1[j] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2333	}
2334	else
2335	{
2336	type[lIndex] = 0;
2337	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2338	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2339	memcpy(preDataBytes,vce->curBytes,8);
2340	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2341	P1[j] = vce->data;
2342	}
2343	}
2344
2345	/* Process Row-s1+1 --> Row-e1 */
2346	for (i = 1; i < R1; i++)
2347	{
2348	/* Process row-s1+i data s2 */
2349	gIndex = (s1+i)*r2+s2;
2350	lIndex = i*R2;
2351
2352	pred1D = P1[0];
2353	diff = spaceFillingValue[gIndex] - pred1D;
2354
2355	itvNum = fabs(diff)/realPrecision + 1;
2356
2357	if (itvNum < exe_params->intvCapacity)
2358	{
2359	if (diff < 0) itvNum = -itvNum;
2360	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2361	P0[0] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2362	}
2363	else
2364	{
2365	type[lIndex] = 0;
2366	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2367	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2368	memcpy(preDataBytes,vce->curBytes,8);
2369	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2370	P0[0] = vce->data;
2371	}
2372
2373	/* Process row-s1+i data s2+1 --> e2 */
2374	for (j = 1; j < R2; j++)
2375	{
2376	gIndex = (s1+i)*r2+(s2+j);
2377	lIndex = i*R2+j;
2378
2379	pred2D = P0[j-1] + P1[j] - P1[j-1];
2380	diff = spaceFillingValue[gIndex] - pred2D;
2381
2382	itvNum = fabs(diff)/realPrecision + 1;
2383
2384	if (itvNum < exe_params->intvCapacity)
2385	{
2386	if (diff < 0) itvNum = -itvNum;
2387	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2388	P0[j] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2389	}
2390	else
2391	{
2392	type[lIndex] = 0;
2393	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2394	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2395	memcpy(preDataBytes,vce->curBytes,8);
2396	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2397	P0[j] = vce->data;
2398	}
2399	}
2400
2401	double *Pt;
2402	Pt = P1;
2403	P1 = P0;
2404	P0 = Pt;
2405	}
2406
2407	free(P0);
2408	free(P1);
2409	size_t exactDataNum = exactLeadNumArray->size;
2410
2411	TightDataPointStorageD* tdps;
2412
2413	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
2414	type, exactMidByteArray->array, exactMidByteArray->size,
2415	exactLeadNumArray->array,
2416	resiBitArray->array, resiBitArray->size,
2417	resiBitsLength,
2418	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2419
2420	//free memory
2421	free_DIA(exactLeadNumArray);
2422	free_DIA(resiBitArray);
2423	free(type);
2424	free(vce);
2425	free(lce);
2426	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2427
2428	return tdps;
2429	}
2430
2431	TightDataPointStorageD* SZ_compress_double_3D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2432	size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
2433	{
2434	unsigned int quantization_intervals;
2435	if(exe_params->optQuantMode==1)
2436	{
2437	quantization_intervals = optimize_intervals_double_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3);
2438	updateQuantizationInfo(quantization_intervals);
2439	}
2440	else
2441	quantization_intervals = exe_params->intvCapacity;
2442
2443	size_t i,j,k;
2444	int reqLength;
2445	double pred1D, pred2D, pred3D;
2446	double diff = 0.0;
2447	double itvNum = 0;
2448	double P0, P1;
2449
2450	size_t R1 = e1 - s1 + 1;
2451	size_t R2 = e2 - s2 + 1;
2452	size_t R3 = e3 - s3 + 1;
2453	size_t dataLength = R1R2R3;
2454
2455	size_t r23 = r2*r3;
2456	size_t R23 = R2*R3;
2457
2458	P0 = (double)malloc(R23sizeof(double));
2459	P1 = (double)malloc(R23sizeof(double));
2460
2461	double medianValue = medianValue_d;
2462	short radExpo = getExponent_double(valueRangeSize/2);
2463	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2464
2465	int* type = (int) malloc(dataLengthsizeof(int));
2466
2467	double* spaceFillingValue = oriData; //
2468
2469	DynamicIntArray *exactLeadNumArray;
2470	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2471
2472	DynamicByteArray *exactMidByteArray;
2473	new_DBA(&exactMidByteArray, DynArrayInitLen);
2474
2475	DynamicIntArray *resiBitArray;
2476	new_DIA(&resiBitArray, DynArrayInitLen);
2477
2478	unsigned char preDataBytes[8];
2479	longToBytes_bigEndian(preDataBytes, 0);
2480
2481	int reqBytesLength = reqLength/8;
2482	int resiBitsLength = reqLength%8;
2483
2484	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2485	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2486
2487
2488	/////////////////////////// Process layer-s1 ///////////////////////////
2489	/* Process Row-s2 data s3*/
2490	size_t gIndex; //global index
2491	size_t lIndex; //local index
2492	size_t index2D; //local 2D index
2493
2494	gIndex = s1r23+s2r3+s3;
2495	lIndex = 0;
2496	index2D = 0;
2497
2498	type[lIndex] = 0;
2499	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2500	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2501	memcpy(preDataBytes,vce->curBytes,8);
2502	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2503	P1[index2D] = vce->data;
2504
2505	/* Process Row-s2 data s3+1*/
2506	gIndex = s1r23+s2r3+s3+1;
2507	lIndex = 1;
2508	index2D = 1;
2509
2510	pred1D = P1[index2D-1];
2511	diff = spaceFillingValue[gIndex] - pred1D;
2512
2513	itvNum = fabs(diff)/realPrecision + 1;
2514
2515	if (itvNum < exe_params->intvCapacity)
2516	{
2517	if (diff < 0) itvNum = -itvNum;
2518	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2519	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2520	}
2521	else
2522	{
2523	type[lIndex] = 0;
2524	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2525	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2526	memcpy(preDataBytes,vce->curBytes,8);
2527	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2528	P1[index2D] = vce->data;
2529	}
2530
2531	/* Process Row-s2 data s3+2 --> data e3 */
2532	for (j = 2; j < R3; j++)
2533	{
2534	gIndex = s1r23+s2r3+s3+j;
2535	lIndex = j;
2536	index2D = j;
2537
2538	pred1D = 2*P1[index2D-1] - P1[index2D-2];
2539	diff = spaceFillingValue[gIndex] - pred1D;
2540
2541	itvNum = fabs(diff)/realPrecision + 1;
2542
2543	if (itvNum < exe_params->intvCapacity)
2544	{
2545	if (diff < 0) itvNum = -itvNum;
2546	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2547	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2548	}
2549	else
2550	{
2551	type[lIndex] = 0;
2552	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2553	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2554	memcpy(preDataBytes,vce->curBytes,8);
2555	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2556	P1[index2D] = vce->data;
2557	}
2558	}
2559
2560	/* Process Row-s2+1 --> Row-e2 */
2561	for (i = 1; i < R2; i++)
2562	{
2563	/* Process row-s2+i data s3 */
2564	gIndex = s1r23+(s2+i)r3+s3;
2565	lIndex = i*R3;
2566	index2D = i*R3;
2567
2568	pred1D = P1[index2D-R3];
2569	diff = spaceFillingValue[gIndex] - pred1D;
2570
2571	itvNum = fabs(diff)/realPrecision + 1;
2572
2573	if (itvNum < exe_params->intvCapacity)
2574	{
2575	if (diff < 0) itvNum = -itvNum;
2576	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2577	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2578	}
2579	else
2580	{
2581	type[lIndex] = 0;
2582	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2583	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2584	memcpy(preDataBytes,vce->curBytes,8);
2585	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2586	P1[index2D] = vce->data;
2587	}
2588
2589	/* Process row-s2+i data s3+1 --> data e3*/
2590	for (j = 1; j < R3; j++)
2591	{
2592	gIndex = s1r23+(s2+i)r3+s3+j;
2593	lIndex = i*R3+j;
2594	index2D = i*R3+j;
2595
2596	pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1];
2597	diff = spaceFillingValue[gIndex] - pred2D;
2598
2599	itvNum = fabs(diff)/realPrecision + 1;
2600
2601	if (itvNum < exe_params->intvCapacity)
2602	{
2603	if (diff < 0) itvNum = -itvNum;
2604	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2605	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2606	}
2607	else
2608	{
2609	type[lIndex] = 0;
2610	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2611	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2612	memcpy(preDataBytes,vce->curBytes,8);
2613	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2614	P1[index2D] = vce->data;
2615	}
2616	}
2617	}
2618
2619
2620	/////////////////////////// Process layer-s1+1 --> layer-e1 ///////////////////////////
2621
2622	for (k = 1; k < R1; k++)
2623	{
2624	/* Process Row-s2 data s3*/
2625	gIndex = (s1+k)r23+s2r3+s3;
2626	lIndex = k*R23;
2627	index2D = 0;
2628
2629	pred1D = P1[index2D];
2630	diff = spaceFillingValue[gIndex] - pred1D;
2631
2632	itvNum = fabs(diff)/realPrecision + 1;
2633
2634	if (itvNum < exe_params->intvCapacity)
2635	{
2636	if (diff < 0) itvNum = -itvNum;
2637	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2638	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2639	}
2640	else
2641	{
2642	type[lIndex] = 0;
2643	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2644	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2645	memcpy(preDataBytes,vce->curBytes,8);
2646	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2647	P0[index2D] = vce->data;
2648	}
2649
2650
2651	/* Process Row-s2 data s3+1 --> data e3 */
2652	for (j = 1; j < R3; j++)
2653	{
2654	gIndex = (s1+k)r23+s2r3+s3+j;
2655	lIndex = k*R23+j;
2656	index2D = j;
2657
2658	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
2659	diff = spaceFillingValue[gIndex] - pred2D;
2660
2661	itvNum = fabs(diff)/realPrecision + 1;
2662
2663	if (itvNum < exe_params->intvCapacity)
2664	{
2665	if (diff < 0) itvNum = -itvNum;
2666	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2667	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2668	}
2669	else
2670	{
2671	type[lIndex] = 0;
2672	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2673	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2674	memcpy(preDataBytes,vce->curBytes,8);
2675	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2676	P0[index2D] = vce->data;
2677	}
2678	}
2679
2680	/* Process Row-s2+1 --> Row-e2 */
2681	for (i = 1; i < R2; i++)
2682	{
2683	/* Process Row-s2+i data s3 */
2684	gIndex = (s1+k)r23+(s2+i)r3+s3;
2685	lIndex = kR23+iR3;
2686	index2D = i*R3;
2687
2688	pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3];
2689	diff = spaceFillingValue[gIndex] - pred2D;
2690
2691	itvNum = fabs(diff)/realPrecision + 1;
2692
2693	if (itvNum < exe_params->intvCapacity)
2694	{
2695	if (diff < 0) itvNum = -itvNum;
2696	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2697	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2698	}
2699	else
2700	{
2701	type[lIndex] = 0;
2702	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2703	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2704	memcpy(preDataBytes,vce->curBytes,8);
2705	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2706	P0[index2D] = vce->data;
2707	}
2708
2709	/* Process Row-s2+i data s3+1 --> data e3 */
2710	for (j = 1; j < R3; j++)
2711	{
2712	gIndex = (s1+k)r23+(s2+i)r3+s3+j;
2713	lIndex = kR23+iR3+j;
2714	index2D = i*R3+j;
2715
2716	pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1];
2717	diff = spaceFillingValue[gIndex] - pred3D;
2718
2719	itvNum = fabs(diff)/realPrecision + 1;
2720
2721	if (itvNum < exe_params->intvCapacity)
2722	{
2723	if (diff < 0) itvNum = -itvNum;
2724	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2725	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2726	}
2727	else
2728	{
2729	type[lIndex] = 0;
2730	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2731	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2732	memcpy(preDataBytes,vce->curBytes,8);
2733	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2734	P0[index2D] = vce->data;
2735	}
2736	}
2737	}
2738
2739	double *Pt;
2740	Pt = P1;
2741	P1 = P0;
2742	P0 = Pt;
2743	}
2744
2745	free(P0);
2746	free(P1);
2747	size_t exactDataNum = exactLeadNumArray->size;
2748
2749	TightDataPointStorageD* tdps;
2750
2751	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
2752	type, exactMidByteArray->array, exactMidByteArray->size,
2753	exactLeadNumArray->array,
2754	resiBitArray->array, resiBitArray->size,
2755	resiBitsLength,
2756	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2757
2758	//free memory
2759	free_DIA(exactLeadNumArray);
2760	free_DIA(resiBitArray);
2761	free(type);
2762	free(vce);
2763	free(lce);
2764	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2765
2766	return tdps;
2767	}
2768
2769	TightDataPointStorageD* SZ_compress_double_4D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2770	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
2771	{
2772	unsigned int quantization_intervals;
2773	if(exe_params->optQuantMode==1)
2774	{
2775	quantization_intervals = optimize_intervals_double_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4);
2776	updateQuantizationInfo(quantization_intervals);
2777	}
2778	else
2779	quantization_intervals = exe_params->intvCapacity;
2780
2781	size_t i,j,k;
2782	int reqLength;
2783	double pred1D, pred2D, pred3D;
2784	double diff = 0.0;
2785	double itvNum = 0;
2786	double P0, P1;
2787
2788	size_t R1 = e1 - s1 + 1;
2789	size_t R2 = e2 - s2 + 1;
2790	size_t R3 = e3 - s3 + 1;
2791	size_t R4 = e4 - s4 + 1;
2792
2793	size_t dataLength = R1R2R3*R4;
2794
2795	size_t r34 = r3*r4;
2796	size_t r234 = r2r3r4;
2797	size_t R34 = R3*R4;
2798	size_t R234 = R2R3R4;
2799
2800	P0 = (double)malloc(R34sizeof(double));
2801	P1 = (double)malloc(R34sizeof(double));
2802
2803	double medianValue = medianValue_d;
2804	short radExpo = getExponent_double(valueRangeSize/2);
2805	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2806
2807	int* type = (int) malloc(dataLengthsizeof(int));
2808
2809	double* spaceFillingValue = oriData; //
2810
2811	DynamicIntArray *exactLeadNumArray;
2812	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2813
2814	DynamicByteArray *exactMidByteArray;
2815	new_DBA(&exactMidByteArray, DynArrayInitLen);
2816
2817	DynamicIntArray *resiBitArray;
2818	new_DIA(&resiBitArray, DynArrayInitLen);
2819
2820	unsigned char preDataBytes[8];
2821	longToBytes_bigEndian(preDataBytes, 0);
2822
2823	int reqBytesLength = reqLength/8;
2824	int resiBitsLength = reqLength%8;
2825
2826	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2827	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2828
2829	size_t l;
2830	for (l = 0; l < R1; l++)
2831	{
2832
2833	/////////////////////////// Process layer-s2 ///////////////////////////
2834	/* Process Row-s3 data s4*/
2835	size_t gIndex; //global index
2836	size_t lIndex; //local index
2837	size_t index2D; //local 2D index
2838
2839	gIndex = (s1+l)r234+s2r34+s3*r4+s4;
2840	lIndex = l*R234;
2841	index2D = 0;
2842
2843	type[lIndex] = 0;
2844	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2845	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2846	memcpy(preDataBytes,vce->curBytes,8);
2847	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2848	P1[index2D] = vce->data;
2849
2850	/* Process Row-s3 data s4+1*/
2851	gIndex = (s1+l)r234+s2r34+s3*r4+s4+1;
2852	lIndex = l*R234+1;
2853	index2D = 1;
2854
2855	pred1D = P1[index2D-1];
2856	diff = spaceFillingValue[gIndex] - pred1D;
2857
2858	itvNum = fabs(diff)/realPrecision + 1;
2859
2860	if (itvNum < exe_params->intvCapacity)
2861	{
2862	if (diff < 0) itvNum = -itvNum;
2863	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2864	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2865	}
2866	else
2867	{
2868	type[lIndex] = 0;
2869	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2870	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2871	memcpy(preDataBytes,vce->curBytes,8);
2872	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2873	P1[index2D] = vce->data;
2874	}
2875
2876	/* Process Row-s3 data s4+2 --> data e4 */
2877	for (j = 2; j < R4; j++)
2878	{
2879	gIndex = (s1+l)r234+s2r34+s3*r4+s4+j;
2880	lIndex = l*R234+j;
2881	index2D = j;
2882
2883	pred1D = 2*P1[index2D-1] - P1[index2D-2];
2884	diff = spaceFillingValue[gIndex] - pred1D;
2885
2886	itvNum = fabs(diff)/realPrecision + 1;
2887
2888	if (itvNum < exe_params->intvCapacity)
2889	{
2890	if (diff < 0) itvNum = -itvNum;
2891	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2892	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2893	}
2894	else
2895	{
2896	type[lIndex] = 0;
2897	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2898	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2899	memcpy(preDataBytes,vce->curBytes,8);
2900	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2901	P1[index2D] = vce->data;
2902	}
2903	}
2904
2905	/* Process Row-s3+1 --> Row-e3 */
2906	for (i = 1; i < R3; i++)
2907	{
2908	/* Process row-s2+i data s3 */
2909	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4;
2910	lIndex = lR234+iR4;
2911	index2D = i*R4;
2912
2913	pred1D = P1[index2D-R4];
2914	diff = spaceFillingValue[gIndex] - pred1D;
2915
2916	itvNum = fabs(diff)/realPrecision + 1;
2917
2918	if (itvNum < exe_params->intvCapacity)
2919	{
2920	if (diff < 0) itvNum = -itvNum;
2921	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2922	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2923	}
2924	else
2925	{
2926	type[lIndex] = 0;
2927	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2928	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2929	memcpy(preDataBytes,vce->curBytes,8);
2930	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2931	P1[index2D] = vce->data;
2932	}
2933
2934	/* Process row-s3+i data s4+1 --> data e4*/
2935	for (j = 1; j < R4; j++)
2936	{
2937	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4+j;
2938	lIndex = lR234+iR4+j;
2939	index2D = i*R4+j;
2940
2941	pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1];
2942	diff = spaceFillingValue[gIndex] - pred2D;
2943
2944	itvNum = fabs(diff)/realPrecision + 1;
2945
2946	if (itvNum < exe_params->intvCapacity)
2947	{
2948	if (diff < 0) itvNum = -itvNum;
2949	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2950	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2951	}
2952	else
2953	{
2954	type[lIndex] = 0;
2955	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2956	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2957	memcpy(preDataBytes,vce->curBytes,8);
2958	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2959	P1[index2D] = vce->data;
2960	}
2961	}
2962	}
2963
2964
2965	/////////////////////////// Process layer-s2+1 --> layer-e2 ///////////////////////////
2966
2967	for (k = 1; k < R2; k++)
2968	{
2969	/* Process Row-s3 data s4*/
2970	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4;
2971	lIndex = lR234+kR34;
2972	index2D = 0;
2973
2974	pred1D = P1[index2D];
2975	diff = spaceFillingValue[gIndex] - pred1D;
2976
2977	itvNum = fabs(diff)/realPrecision + 1;
2978
2979	if (itvNum < exe_params->intvCapacity)
2980	{
2981	if (diff < 0) itvNum = -itvNum;
2982	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2983	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2984	}
2985	else
2986	{
2987	type[lIndex] = 0;
2988	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2989	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2990	memcpy(preDataBytes,vce->curBytes,8);
2991	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2992	P0[index2D] = vce->data;
2993	}
2994
2995
2996	/* Process Row-s3 data s4+1 --> data e4 */
2997	for (j = 1; j < R4; j++)
2998	{
2999	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4+j;
3000	lIndex = lR234+kR34+j;
3001	index2D = j;
3002
3003	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
3004	diff = spaceFillingValue[gIndex] - pred2D;
3005
3006	itvNum = fabs(diff)/realPrecision + 1;
3007
3008	if (itvNum < exe_params->intvCapacity)
3009	{
3010	if (diff < 0) itvNum = -itvNum;
3011	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3012	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3013	}
3014	else
3015	{
3016	type[lIndex] = 0;
3017	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3018	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3019	memcpy(preDataBytes,vce->curBytes,8);
3020	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3021	P0[index2D] = vce->data;
3022	}
3023	}
3024
3025	/* Process Row-s3+1 --> Row-e3 */
3026	for (i = 1; i < R3; i++)
3027	{
3028	/* Process Row-s3+i data s4 */
3029	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4;
3030	lIndex = lR234+kR34+i*R4;
3031	index2D = i*R4;
3032
3033	pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4];
3034	diff = spaceFillingValue[gIndex] - pred2D;
3035
3036	itvNum = fabs(diff)/realPrecision + 1;
3037
3038	if (itvNum < exe_params->intvCapacity)
3039	{
3040	if (diff < 0) itvNum = -itvNum;
3041	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3042	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3043	}
3044	else
3045	{
3046	type[lIndex] = 0;
3047	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3048	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3049	memcpy(preDataBytes,vce->curBytes,8);
3050	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3051	P0[index2D] = vce->data;
3052	}
3053
3054	/* Process Row-s3+i data s4+1 --> data e4 */
3055	for (j = 1; j < R4; j++)
3056	{
3057	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4+j;
3058	lIndex = lR234+kR34+i*R4+j;
3059	index2D = i*R4+j;
3060
3061	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
3062
3063	pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1];
3064	diff = spaceFillingValue[gIndex] - pred3D;
3065
3066	itvNum = fabs(diff)/realPrecision + 1;
3067
3068	if (itvNum < exe_params->intvCapacity)
3069	{
3070	if (diff < 0) itvNum = -itvNum;
3071	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3072	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3073	}
3074	else
3075	{
3076	type[lIndex] = 0;
3077	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3078	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3079	memcpy(preDataBytes,vce->curBytes,8);
3080	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3081	P0[index2D] = vce->data;
3082	}
3083	}
3084	}
3085
3086	double *Pt;
3087	Pt = P1;
3088	P1 = P0;
3089	P0 = Pt;
3090	}
3091	}
3092
3093	free(P0);
3094	free(P1);
3095	size_t exactDataNum = exactLeadNumArray->size;
3096
3097	TightDataPointStorageD* tdps;
3098
3099	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
3100	type, exactMidByteArray->array, exactMidByteArray->size,
3101	exactLeadNumArray->array,
3102	resiBitArray->array, resiBitArray->size,
3103	resiBitsLength,
3104	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
3105
3106	//free memory
3107	free_DIA(exactLeadNumArray);
3108	free_DIA(resiBitArray);
3109	free(type);
3110	free(vce);
3111	free(lce);
3112	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
3113
3114	return tdps;
3115	}
3116
3117	/**
3118	*
3119	* This is a fast implementation for optimize_intervals_double_3D()
3120	* */
3121	unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision){
3122	size_t i;
3123	size_t radiusIndex;
3124	size_t r23=r2*r3;
3125	double pred_value = 0, pred_err;
3126	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3127	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3128	size_t totalSampleSize = 0;
3129
3130	size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
3131	size_t offset_count_2;
3132	double * data_pos = oriData + r23 + r3 + offset_count;
3133	size_t n1_count = 1, n2_count = 1; // count i,j sum
3134	size_t len = r1 * r2 * r3;
3135	while(data_pos - oriData < len){
3136	totalSampleSize++;
3137	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
3138	pred_err = fabs(pred_value - *data_pos);
3139	radiusIndex = (pred_err/realPrecision+1)/2;
3140	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3141	{
3142	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3143	}
3144	intervals[radiusIndex]++;
3145	offset_count += confparams_cpr->sampleDistance;
3146	if(offset_count >= r3){
3147	n2_count ++;
3148	if(n2_count == r2){
3149	n1_count ++;
3150	n2_count = 1;
3151	data_pos += r3;
3152	}
3153	offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance;
3154	data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3155	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3156	if(offset_count == 0) offset_count ++;
3157	}
3158	else data_pos += confparams_cpr->sampleDistance;
3159	}
3160	//compute the appropriate number
3161	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3162	size_t sum = 0;
3163	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3164	{
3165	sum += intervals[i];
3166	if(sum>targetCount)
3167	break;
3168	}
3169	if(i>=confparams_cpr->maxRangeRadius)
3170	i = confparams_cpr->maxRangeRadius-1;
3171	unsigned int accIntervals = 2*(i+1);
3172	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3173
3174	if(powerOf2<32)
3175	powerOf2 = 32;
3176	free(intervals);
3177	return powerOf2;
3178	}
3179
3180	unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision)
3181	{
3182	size_t i;
3183	size_t radiusIndex;
3184	double pred_value = 0, pred_err;
3185	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3186	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3187	size_t totalSampleSize = 0;
3188
3189	size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
3190	size_t offset_count_2;
3191	double * data_pos = oriData + r2 + offset_count;
3192	size_t n1_count = 1; // count i sum
3193	size_t len = r1 * r2;
3194	while(data_pos - oriData < len){
3195	totalSampleSize++;
3196	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
3197	pred_err = fabs(pred_value - *data_pos);
3198	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3199	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3200	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3201	intervals[radiusIndex]++;
3202
3203	offset_count += confparams_cpr->sampleDistance;
3204	if(offset_count >= r2){
3205	n1_count ++;
3206	offset_count_2 = n1_count % confparams_cpr->sampleDistance;
3207	data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3208	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3209	if(offset_count == 0) offset_count ++;
3210	}
3211	else data_pos += confparams_cpr->sampleDistance;
3212	}
3213
3214	//compute the appropriate number
3215	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3216	size_t sum = 0;
3217	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3218	{
3219	sum += intervals[i];
3220	if(sum>targetCount)
3221	break;
3222	}
3223	if(i>=confparams_cpr->maxRangeRadius)
3224	i = confparams_cpr->maxRangeRadius-1;
3225	unsigned int accIntervals = 2*(i+1);
3226	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3227
3228	if(powerOf2<32)
3229	powerOf2 = 32;
3230
3231	free(intervals);
3232	return powerOf2;
3233	}
3234
3235	unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision)
3236	{
3237	size_t i = 0, radiusIndex;
3238	double pred_value = 0, pred_err;
3239	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3240	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3241	size_t totalSampleSize = 0;
3242
3243	double * data_pos = oriData + 2;
3244	while(data_pos - oriData < dataLength){
3245	totalSampleSize++;
3246	pred_value = data_pos[-1];
3247	pred_err = fabs(pred_value - *data_pos);
3248	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3249	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3250	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3251	intervals[radiusIndex]++;
3252
3253	data_pos += confparams_cpr->sampleDistance;
3254	}
3255	//compute the appropriate number
3256	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3257	size_t sum = 0;
3258	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3259	{
3260	sum += intervals[i];
3261	if(sum>targetCount)
3262	break;
3263	}
3264	if(i>=confparams_cpr->maxRangeRadius)
3265	i = confparams_cpr->maxRangeRadius-1;
3266
3267	unsigned int accIntervals = 2*(i+1);
3268	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3269
3270	if(powerOf2<32)
3271	powerOf2 = 32;
3272
3273	free(intervals);
3274	return powerOf2;
3275	}
3276
3277	/The above code is for sz 1.4.13; the following code is for sz 2.0/
3278	unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
3279	{
3280	double mean = 0.0;
3281	size_t len = r1 * r2;
3282	size_t mean_distance = (int) (sqrt(len));
3283
3284	double * data_pos = oriData;
3285	size_t mean_count = 0;
3286	while(data_pos - oriData < len){
3287	mean += *data_pos;
3288	mean_count ++;
3289	data_pos += mean_distance;
3290	}
3291	if(mean_count > 0) mean /= mean_count;
3292	size_t range = 8192;
3293	size_t radius = 4096;
3294	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
3295	memset(freq_intervals, 0, range*sizeof(size_t));
3296
3297	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
3298	int sampleDistance = confparams_cpr->sampleDistance;
3299	double predThreshold = confparams_cpr->predThreshold;
3300
3301	size_t i;
3302	size_t radiusIndex;
3303	double pred_value = 0, pred_err;
3304	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
3305	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
3306
3307	double mean_diff;
3308	ptrdiff_t freq_index;
3309	size_t freq_count = 0;
3310	size_t n1_count = 1;
3311	size_t offset_count = sampleDistance - 1;
3312	size_t offset_count_2 = 0;
3313	size_t sample_count = 0;
3314	data_pos = oriData + r2 + offset_count;
3315	while(data_pos - oriData < len){
3316	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
3317	pred_err = fabs(pred_value - *data_pos);
3318	if(pred_err < realPrecision) freq_count ++;
3319	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3320	if(radiusIndex>=maxRangeRadius)
3321	radiusIndex = maxRangeRadius - 1;
3322	intervals[radiusIndex]++;
3323
3324	mean_diff = *data_pos - mean;
3325	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
3326	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
3327	if(freq_index <= 0){
3328	freq_intervals[0] ++;
3329	}
3330	else if(freq_index >= range){
3331	freq_intervals[range - 1] ++;
3332	}
3333	else{
3334	freq_intervals[freq_index] ++;
3335	}
3336	offset_count += sampleDistance;
3337	if(offset_count >= r2){
3338	n1_count ++;
3339	offset_count_2 = n1_count % sampleDistance;
3340	data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
3341	offset_count = (sampleDistance - offset_count_2);
3342	if(offset_count == 0) offset_count ++;
3343	}
3344	else data_pos += sampleDistance;
3345	sample_count ++;
3346	}
3347	max_freq = freq_count 1.0/ sample_count;
3348
3349	//compute the appropriate number
3350	size_t targetCount = sample_count*predThreshold;
3351	size_t sum = 0;
3352	for(i=0;i<maxRangeRadius;i++)
3353	{
3354	sum += intervals[i];
3355	if(sum>targetCount)
3356	break;
3357	}
3358	if(i>=maxRangeRadius)
3359	i = maxRangeRadius-1;
3360	unsigned int accIntervals = 2*(i+1);
3361	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3362
3363	if(powerOf2<32)
3364	powerOf2 = 32;
3365
3366	// collect frequency
3367	size_t max_sum = 0;
3368	size_t max_index = 0;
3369	size_t tmp_sum;
3370	size_t * freq_pos = freq_intervals + 1;
3371	for(size_t i=1; i<range-2; i++){
3372	tmp_sum = freq_pos[0] + freq_pos[1];
3373	if(tmp_sum > max_sum){
3374	max_sum = tmp_sum;
3375	max_index = i;
3376	}
3377	freq_pos ++;
3378	}
3379	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
3380	mean_freq = max_sum 1.0 / sample_count;
3381
3382	free(freq_intervals);
3383	free(intervals);
3384	return powerOf2;
3385	}
3386
3387	unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
3388	{
3389	double mean = 0.0;
3390	size_t len = r1 * r2 * r3;
3391	size_t mean_distance = (int) (sqrt(len));
3392	double * data_pos = oriData;
3393	size_t offset_count = 0;
3394	size_t offset_count_2 = 0;
3395	size_t mean_count = 0;
3396	while(data_pos - oriData < len){
3397	mean += *data_pos;
3398	mean_count ++;
3399	data_pos += mean_distance;
3400	offset_count += mean_distance;
3401	offset_count_2 += mean_distance;
3402	if(offset_count >= r3){
3403	offset_count = 0;
3404	data_pos -= 1;
3405	}
3406	if(offset_count_2 >= r2 * r3){
3407	offset_count_2 = 0;
3408	data_pos -= 1;
3409	}
3410	}
3411	if(mean_count > 0) mean /= mean_count;
3412	size_t range = 8192;
3413	size_t radius = 4096;
3414	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
3415	memset(freq_intervals, 0, range*sizeof(size_t));
3416
3417	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
3418	int sampleDistance = confparams_cpr->sampleDistance;
3419	double predThreshold = confparams_cpr->predThreshold;
3420
3421	size_t i;
3422	size_t radiusIndex;
3423	size_t r23=r2*r3;
3424	double pred_value = 0, pred_err;
3425	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
3426	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
3427
3428	double mean_diff;
3429	ptrdiff_t freq_index;
3430	size_t freq_count = 0;
3431	size_t sample_count = 0;
3432
3433	offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
3434	data_pos = oriData + r23 + r3 + offset_count;
3435	size_t n1_count = 1, n2_count = 1; // count i,j sum
3436
3437	while(data_pos - oriData < len){
3438
3439	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
3440	pred_err = fabs(pred_value - *data_pos);
3441	if(pred_err < realPrecision) freq_count ++;
3442	radiusIndex = (pred_err/realPrecision+1)/2;
3443	if(radiusIndex>=maxRangeRadius)
3444	{
3445	radiusIndex = maxRangeRadius - 1;
3446	}
3447	intervals[radiusIndex]++;
3448
3449	mean_diff = *data_pos - mean;
3450	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
3451	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
3452	if(freq_index <= 0){
3453	freq_intervals[0] ++;
3454	}
3455	else if(freq_index >= range){
3456	freq_intervals[range - 1] ++;
3457	}
3458	else{
3459	freq_intervals[freq_index] ++;
3460	}
3461	offset_count += sampleDistance;
3462	if(offset_count >= r3){
3463	n2_count ++;
3464	if(n2_count == r2){
3465	n1_count ++;
3466	n2_count = 1;
3467	data_pos += r3;
3468	}
3469	offset_count_2 = (n1_count + n2_count) % sampleDistance;
3470	data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
3471	offset_count = (sampleDistance - offset_count_2);
3472	if(offset_count == 0) offset_count ++;
3473	}
3474	else data_pos += sampleDistance;
3475	sample_count ++;
3476	}
3477	max_freq = freq_count 1.0/ sample_count;
3478
3479	//compute the appropriate number
3480	size_t targetCount = sample_count*predThreshold;
3481	size_t sum = 0;
3482	for(i=0;i<maxRangeRadius;i++)
3483	{
3484	sum += intervals[i];
3485	if(sum>targetCount)
3486	break;
3487	}
3488	if(i>=maxRangeRadius)
3489	i = maxRangeRadius-1;
3490	unsigned int accIntervals = 2*(i+1);
3491	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3492
3493	if(powerOf2<32)
3494	powerOf2 = 32;
3495	// collect frequency
3496	size_t max_sum = 0;
3497	size_t max_index = 0;
3498	size_t tmp_sum;
3499	size_t * freq_pos = freq_intervals + 1;
3500	for(size_t i=1; i<range-2; i++){
3501	tmp_sum = freq_pos[0] + freq_pos[1];
3502	if(tmp_sum > max_sum){
3503	max_sum = tmp_sum;
3504	max_index = i;
3505	}
3506	freq_pos ++;
3507	}
3508	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
3509	mean_freq = max_sum 1.0 / sample_count;
3510
3511	free(freq_intervals);
3512	free(intervals);
3513	return powerOf2;
3514	}
3515
3516	#define MIN(a, b) a<b? a : b
3517	unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, double realPrecision, size_t comp_size){
3518
3519	unsigned int quantization_intervals;
3520	double sz_sample_correct_freq = -1;//0.5; //-1
3521	double dense_pos;
3522	double mean_flush_freq;
3523	unsigned char use_mean = 0;
3524
3525	if(exe_params->optQuantMode==1)
3526	{
3527	quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
3528	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
3529	updateQuantizationInfo(quantization_intervals);
3530	}
3531	else{
3532	quantization_intervals = exe_params->intvCapacity;
3533	}
3534
3535	// calculate block dims
3536	size_t num_x, num_y;
3537	size_t block_size = 16;
3538
3539	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
3540	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
3541
3542	size_t split_index_x, split_index_y;
3543	size_t early_blockcount_x, early_blockcount_y;
3544	size_t late_blockcount_x, late_blockcount_y;
3545	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
3546	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
3547
3548	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
3549	size_t num_blocks = num_x * num_y;
3550	size_t num_elements = r1 * r2;
3551
3552	size_t dim0_offset = r2;
3553
3554	int * result_type = (int ) malloc(num_elements sizeof(int));
3555	size_t unpred_data_max_size = max_num_block_elements;
3556	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
3557	size_t total_unpred = 0;
3558	size_t unpredictable_count;
3559	double * data_pos = oriData;
3560	int * type = result_type;
3561	size_t offset_x, offset_y;
3562	size_t current_blockcount_x, current_blockcount_y;
3563
3564	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
3565	double * reg_params_pos = reg_params;
3566	// move regression part out
3567	size_t params_offset_b = num_blocks;
3568	size_t params_offset_c = 2*num_blocks;
3569	for(size_t i=0; i<num_x; i++){
3570	for(size_t j=0; j<num_y; j++){
3571	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
3572	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
3573	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
3574	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
3575
3576	data_pos = oriData + offset_x * dim0_offset + offset_y;
3577
3578	{
3579	double * cur_data_pos = data_pos;
3580	double fx = 0.0;
3581	double fy = 0.0;
3582	double f = 0;
3583	double sum_x;
3584	double curData;
3585	for(size_t i=0; i<current_blockcount_x; i++){
3586	sum_x = 0;
3587	for(size_t j=0; j<current_blockcount_y; j++){
3588	curData = *cur_data_pos;
3589	sum_x += curData;
3590	fy += curData * j;
3591	cur_data_pos ++;
3592	}
3593	fx += sum_x * i;
3594	f += sum_x;
3595	cur_data_pos += dim0_offset - current_blockcount_y;
3596	}
3597	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
3598	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
3599	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
3600	reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
3601	}
3602
3603	reg_params_pos ++;
3604	}
3605	}
3606
3607	//Compress coefficient arrays
3608	double precision_a, precision_b, precision_c;
3609	double rel_param_err = 0.15/3;
3610	precision_a = rel_param_err * realPrecision / late_blockcount_x;
3611	precision_b = rel_param_err * realPrecision / late_blockcount_y;
3612	precision_c = rel_param_err * realPrecision;
3613
3614	double mean = 0;
3615	use_mean = 0;
3616	if(use_mean){
3617	// compute mean
3618	double sum = 0.0;
3619	size_t mean_count = 0;
3620	for(size_t i=0; i<num_elements; i++){
3621	if(fabs(oriData[i] - dense_pos) < realPrecision){
3622	sum += oriData[i];
3623	mean_count ++;
3624	}
3625	}
3626	if(mean_count > 0) mean = sum / mean_count;
3627	}
3628
3629
3630	double tmp_realPrecision = realPrecision;
3631
3632	// use two prediction buffers for higher performance
3633	double * unpredictable_data = result_unpredictable_data;
3634	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
3635	memset(indicator, 0, num_blocks * sizeof(unsigned char));
3636	size_t reg_count = 0;
3637	size_t strip_dim_0 = early_blockcount_x + 1;
3638	size_t strip_dim_1 = r2 + 1;
3639	size_t strip_dim0_offset = strip_dim_1;
3640	unsigned char * indicator_pos = indicator;
3641	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
3642	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
3643	memset(prediction_buffer_1, 0, prediction_buffer_size);
3644	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
3645	memset(prediction_buffer_2, 0, prediction_buffer_size);
3646	double * cur_pb_buf = prediction_buffer_1;
3647	double * next_pb_buf = prediction_buffer_2;
3648	double * cur_pb_buf_pos;
3649	double * next_pb_buf_pos;
3650	int intvCapacity = exe_params->intvCapacity;
3651	int intvRadius = exe_params->intvRadius;
3652	int use_reg = 0;
3653
3654	reg_params_pos = reg_params;
3655	// compress the regression coefficients on the fly
3656	double last_coeffcients[3] = {0.0};
3657	int coeff_intvCapacity_sz = 65536;
3658	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
3659	int * coeff_type[3];
3660	int * coeff_result_type = (int ) malloc(num_blocks3*sizeof(int));
3661	double * coeff_unpred_data[3];
3662	double * coeff_unpredictable_data = (double ) malloc(num_blocks3*sizeof(double));
3663	double precision[3];
3664	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
3665	for(int i=0; i<3; i++){
3666	coeff_type[i] = coeff_result_type + i * num_blocks;
3667	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
3668	}
3669	int coeff_index = 0;
3670	unsigned int coeff_unpredictable_count[3] = {0};
3671	if(use_mean){
3672	type = result_type;
3673	int intvCapacity_sz = intvCapacity - 2;
3674	for(size_t i=0; i<num_x; i++){
3675	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
3676	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
3677	data_pos = oriData + offset_x * dim0_offset;
3678
3679	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
3680	next_pb_buf_pos = next_pb_buf + 1;
3681	double * pb_pos = cur_pb_buf_pos;
3682	double * next_pb_pos = next_pb_buf_pos;
3683
3684	for(size_t j=0; j<num_y; j++){
3685	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
3686	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
3687
3688	/sampling: decide which predictor to use (regression or lorenzo)/
3689	{
3690	double * cur_data_pos;
3691	double curData;
3692	double pred_reg, pred_sz;
3693	double err_sz = 0.0, err_reg = 0.0;
3694	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
3695	// [1, 9] [3, 7] [7, 3] [9, 1]
3696	int count = 0;
3697	for(int i=1; i<current_blockcount_x; i+=2){
3698	cur_data_pos = data_pos + i * dim0_offset + i;
3699	curData = *cur_data_pos;
3700	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
3701	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
3702
3703	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
3704
3705	err_reg += fabs(pred_reg - curData);
3706
3707	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
3708	curData = *cur_data_pos;
3709	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
3710	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
3711	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
3712
3713	err_reg += fabs(pred_reg - curData);
3714
3715	count += 2;
3716	}
3717
3718	use_reg = (err_reg < err_sz);
3719	}
3720	if(use_reg)
3721	{
3722	{
3723	/predict coefficients in current block via previous reg_block/
3724	double cur_coeff;
3725	double diff, itvNum;
3726	for(int e=0; e<3; e++){
3727	cur_coeff = reg_params_pos[e*num_blocks];
3728	diff = cur_coeff - last_coeffcients[e];
3729	itvNum = fabs(diff)/precision[e] + 1;
3730	if (itvNum < coeff_intvCapacity_sz){
3731	if (diff < 0) itvNum = -itvNum;
3732	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
3733	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
3734	//ganrantee comporession error against the case of machine-epsilon
3735	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
3736	coeff_type[e][coeff_index] = 0;
3737	last_coeffcients[e] = cur_coeff;
3738	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
3739	}
3740	}
3741	else{
3742	coeff_type[e][coeff_index] = 0;
3743	last_coeffcients[e] = cur_coeff;
3744	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
3745	}
3746	}
3747	coeff_index ++;
3748	}
3749	double curData;
3750	double pred;
3751	double itvNum;
3752	double diff;
3753	size_t index = 0;
3754	size_t block_unpredictable_count = 0;
3755	double * cur_data_pos = data_pos;
3756	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
3757	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
3758	curData = *cur_data_pos;
3759	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3760	diff = curData - pred;
3761	itvNum = fabs(diff)/realPrecision + 1;
3762	if (itvNum < intvCapacity){
3763	if (diff < 0) itvNum = -itvNum;
3764	type[index] = (int) (itvNum/2) + intvRadius;
3765	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3766	//ganrantee comporession error against the case of machine-epsilon
3767	if(fabs(curData - pred)>realPrecision){
3768	type[index] = 0;
3769	pred = curData;
3770	unpredictable_data[block_unpredictable_count ++] = curData;
3771	}
3772	}
3773	else{
3774	type[index] = 0;
3775	pred = curData;
3776	unpredictable_data[block_unpredictable_count ++] = curData;
3777	}
3778	index ++;
3779	cur_data_pos ++;
3780	}
3781	/dealing with the last jj (boundary)/
3782	{
3783	size_t jj = current_blockcount_y - 1;
3784	curData = *cur_data_pos;
3785	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3786	diff = curData - pred;
3787	itvNum = fabs(diff)/realPrecision + 1;
3788	if (itvNum < intvCapacity){
3789	if (diff < 0) itvNum = -itvNum;
3790	type[index] = (int) (itvNum/2) + intvRadius;
3791	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3792	//ganrantee comporession error against the case of machine-epsilon
3793	if(fabs(curData - pred)>realPrecision){
3794	type[index] = 0;
3795	pred = curData;
3796	unpredictable_data[block_unpredictable_count ++] = curData;
3797	}
3798	}
3799	else{
3800	type[index] = 0;
3801	pred = curData;
3802	unpredictable_data[block_unpredictable_count ++] = curData;
3803	}
3804
3805	// assign value to block surfaces
3806	pb_pos[ii * strip_dim0_offset + jj] = pred;
3807	index ++;
3808	cur_data_pos ++;
3809	}
3810	cur_data_pos += dim0_offset - current_blockcount_y;
3811	}
3812	/dealing with the last ii (boundary)/
3813	{
3814	size_t ii = current_blockcount_x - 1;
3815	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
3816	curData = *cur_data_pos;
3817	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3818	diff = curData - pred;
3819	itvNum = fabs(diff)/realPrecision + 1;
3820	if (itvNum < intvCapacity){
3821	if (diff < 0) itvNum = -itvNum;
3822	type[index] = (int) (itvNum/2) + intvRadius;
3823	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3824	//ganrantee comporession error against the case of machine-epsilon
3825	if(fabs(curData - pred)>realPrecision){
3826	type[index] = 0;
3827	pred = curData;
3828	unpredictable_data[block_unpredictable_count ++] = curData;
3829	}
3830	}
3831	else{
3832	type[index] = 0;
3833	pred = curData;
3834	unpredictable_data[block_unpredictable_count ++] = curData;
3835	}
3836	// assign value to next prediction buffer
3837	next_pb_pos[jj] = pred;
3838	index ++;
3839	cur_data_pos ++;
3840	}
3841	/dealing with the last jj (boundary)/
3842	{
3843	size_t jj = current_blockcount_y - 1;
3844	curData = *cur_data_pos;
3845	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3846	diff = curData - pred;
3847	itvNum = fabs(diff)/realPrecision + 1;
3848	if (itvNum < intvCapacity){
3849	if (diff < 0) itvNum = -itvNum;
3850	type[index] = (int) (itvNum/2) + intvRadius;
3851	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3852	//ganrantee comporession error against the case of machine-epsilon
3853	if(fabs(curData - pred)>realPrecision){
3854	type[index] = 0;
3855	pred = curData;
3856	unpredictable_data[block_unpredictable_count ++] = curData;
3857	}
3858	}
3859	else{
3860	type[index] = 0;
3861	pred = curData;
3862	unpredictable_data[block_unpredictable_count ++] = curData;
3863	}
3864
3865	// assign value to block surfaces
3866	pb_pos[ii * strip_dim0_offset + jj] = pred;
3867	// assign value to next prediction buffer
3868	next_pb_pos[jj] = pred;
3869
3870	index ++;
3871	cur_data_pos ++;
3872	}
3873	} // end ii == -1
3874	unpredictable_count = block_unpredictable_count;
3875	total_unpred += unpredictable_count;
3876	unpredictable_data += unpredictable_count;
3877	reg_count ++;
3878	}// end use_reg
3879	else{
3880	// use SZ
3881	// SZ predication
3882	unpredictable_count = 0;
3883	double * cur_pb_pos = pb_pos;
3884	double * cur_data_pos = data_pos;
3885	double curData;
3886	double pred2D;
3887	double itvNum, diff;
3888	size_t index = 0;
3889	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
3890	for(size_t jj=0; jj<current_blockcount_y; jj++){
3891	curData = *cur_data_pos;
3892	if(fabs(curData - mean) <= realPrecision){
3893	// adjust type[index] to intvRadius for coherence with freq in reg
3894	type[index] = intvRadius;
3895	*cur_pb_pos = mean;
3896	}
3897	else
3898	{
3899	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
3900	diff = curData - pred2D;
3901	itvNum = fabs(diff)/realPrecision + 1;
3902	if (itvNum < intvCapacity_sz){
3903	if (diff < 0) itvNum = -itvNum;
3904	type[index] = (int) (itvNum/2) + intvRadius;
3905	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
3906	if(type[index] <= intvRadius) type[index] -= 1;
3907	//ganrantee comporession error against the case of machine-epsilon
3908	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
3909	type[index] = 0;
3910	*cur_pb_pos = curData;
3911	unpredictable_data[unpredictable_count ++] = curData;
3912	}
3913	}
3914	else{
3915	type[index] = 0;
3916	*cur_pb_pos = curData;
3917	unpredictable_data[unpredictable_count ++] = curData;
3918	}
3919	}
3920	index ++;
3921	cur_pb_pos ++;
3922	cur_data_pos ++;
3923	}
3924	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
3925	cur_data_pos += dim0_offset - current_blockcount_y;
3926	}
3927	/dealing with the last ii (boundary)/
3928	{
3929	// ii == current_blockcount_x - 1
3930	for(size_t jj=0; jj<current_blockcount_y; jj++){
3931	curData = *cur_data_pos;
3932	if(fabs(curData - mean) <= realPrecision){
3933	// adjust type[index] to intvRadius for coherence with freq in reg
3934	type[index] = intvRadius;
3935	*cur_pb_pos = mean;
3936	}
3937	else
3938	{
3939	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
3940	diff = curData - pred2D;
3941	itvNum = fabs(diff)/realPrecision + 1;
3942	if (itvNum < intvCapacity_sz){
3943	if (diff < 0) itvNum = -itvNum;
3944	type[index] = (int) (itvNum/2) + intvRadius;
3945	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
3946	if(type[index] <= intvRadius) type[index] -= 1;
3947	//ganrantee comporession error against the case of machine-epsilon
3948	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
3949	type[index] = 0;
3950	*cur_pb_pos = curData;
3951	unpredictable_data[unpredictable_count ++] = curData;
3952	}
3953	}
3954	else{
3955	type[index] = 0;
3956	*cur_pb_pos = curData;
3957	unpredictable_data[unpredictable_count ++] = curData;
3958	}
3959	}
3960	next_pb_pos[jj] = *cur_pb_pos;
3961	index ++;
3962	cur_pb_pos ++;
3963	cur_data_pos ++;
3964	}
3965	}
3966	total_unpred += unpredictable_count;
3967	unpredictable_data += unpredictable_count;
3968	// change indicator
3969	indicator_pos[j] = 1;
3970	}// end SZ
3971	reg_params_pos ++;
3972	data_pos += current_blockcount_y;
3973	pb_pos += current_blockcount_y;
3974	next_pb_pos += current_blockcount_y;
3975	type += current_blockcount_x * current_blockcount_y;
3976	}// end j
3977	indicator_pos += num_y;
3978	double * tmp;
3979	tmp = cur_pb_buf;
3980	cur_pb_buf = next_pb_buf;
3981	next_pb_buf = tmp;
3982	}// end i
3983	}// end use mean
3984	else{
3985	type = result_type;
3986	int intvCapacity_sz = intvCapacity - 2;
3987	for(size_t i=0; i<num_x; i++){
3988	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
3989	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
3990	data_pos = oriData + offset_x * dim0_offset;
3991
3992	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
3993	next_pb_buf_pos = next_pb_buf + 1;
3994	double * pb_pos = cur_pb_buf_pos;
3995	double * next_pb_pos = next_pb_buf_pos;
3996
3997	for(size_t j=0; j<num_y; j++){
3998	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
3999	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4000	/sampling/
4001	{
4002	// sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
4003	double * cur_data_pos;
4004	double curData;
4005	double pred_reg, pred_sz;
4006	double err_sz = 0.0, err_reg = 0.0;
4007	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
4008	// [1, 9] [3, 7] [7, 3] [9, 1]
4009	int count = 0;
4010	for(int i=1; i<current_blockcount_x; i+=2){
4011	cur_data_pos = data_pos + i * dim0_offset + i;
4012	curData = *cur_data_pos;
4013	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4014	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
4015	err_sz += fabs(pred_sz - curData);
4016	err_reg += fabs(pred_reg - curData);
4017
4018	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
4019	curData = *cur_data_pos;
4020	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4021	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
4022	err_sz += fabs(pred_sz - curData);
4023	err_reg += fabs(pred_reg - curData);
4024
4025	count += 2;
4026	}
4027	err_sz += realPrecision * count * 0.81;
4028	use_reg = (err_reg < err_sz);
4029
4030	}
4031	if(use_reg)
4032	{
4033	{
4034	/predict coefficients in current block via previous reg_block/
4035	double cur_coeff;
4036	double diff, itvNum;
4037	for(int e=0; e<3; e++){
4038	cur_coeff = reg_params_pos[e*num_blocks];
4039	diff = cur_coeff - last_coeffcients[e];
4040	itvNum = fabs(diff)/precision[e] + 1;
4041	if (itvNum < coeff_intvCapacity_sz){
4042	if (diff < 0) itvNum = -itvNum;
4043	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4044	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4045	//ganrantee comporession error against the case of machine-epsilon
4046	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4047	coeff_type[e][coeff_index] = 0;
4048	last_coeffcients[e] = cur_coeff;
4049	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4050	}
4051	}
4052	else{
4053	coeff_type[e][coeff_index] = 0;
4054	last_coeffcients[e] = cur_coeff;
4055	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4056	}
4057	}
4058	coeff_index ++;
4059	}
4060	double curData;
4061	double pred;
4062	double itvNum;
4063	double diff;
4064	size_t index = 0;
4065	size_t block_unpredictable_count = 0;
4066	double * cur_data_pos = data_pos;
4067	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4068	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4069	curData = *cur_data_pos;
4070	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4071	diff = curData - pred;
4072	itvNum = fabs(diff)/realPrecision + 1;
4073	if (itvNum < intvCapacity){
4074	if (diff < 0) itvNum = -itvNum;
4075	type[index] = (int) (itvNum/2) + intvRadius;
4076	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4077	//ganrantee comporession error against the case of machine-epsilon
4078	if(fabs(curData - pred)>realPrecision){
4079	type[index] = 0;
4080	pred = curData;
4081	unpredictable_data[block_unpredictable_count ++] = curData;
4082	}
4083	}
4084	else{
4085	type[index] = 0;
4086	pred = curData;
4087	unpredictable_data[block_unpredictable_count ++] = curData;
4088	}
4089	index ++;
4090	cur_data_pos ++;
4091	}
4092	/dealing with the last jj (boundary)/
4093	{
4094	// jj == current_blockcount_y - 1
4095	size_t jj = current_blockcount_y - 1;
4096	curData = *cur_data_pos;
4097	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4098	diff = curData - pred;
4099	itvNum = fabs(diff)/realPrecision + 1;
4100	if (itvNum < intvCapacity){
4101	if (diff < 0) itvNum = -itvNum;
4102	type[index] = (int) (itvNum/2) + intvRadius;
4103	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4104	//ganrantee comporession error against the case of machine-epsilon
4105	if(fabs(curData - pred)>realPrecision){
4106	type[index] = 0;
4107	pred = curData;
4108	unpredictable_data[block_unpredictable_count ++] = curData;
4109	}
4110	}
4111	else{
4112	type[index] = 0;
4113	pred = curData;
4114	unpredictable_data[block_unpredictable_count ++] = curData;
4115	}
4116
4117	// assign value to block surfaces
4118	pb_pos[ii * strip_dim0_offset + jj] = pred;
4119	index ++;
4120	cur_data_pos ++;
4121	}
4122	cur_data_pos += dim0_offset - current_blockcount_y;
4123	}
4124	/dealing with the last ii (boundary)/
4125	{
4126	size_t ii = current_blockcount_x - 1;
4127	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4128	curData = *cur_data_pos;
4129	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4130	diff = curData - pred;
4131	itvNum = fabs(diff)/realPrecision + 1;
4132	if (itvNum < intvCapacity){
4133	if (diff < 0) itvNum = -itvNum;
4134	type[index] = (int) (itvNum/2) + intvRadius;
4135	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4136	//ganrantee comporession error against the case of machine-epsilon
4137	if(fabs(curData - pred)>realPrecision){
4138	type[index] = 0;
4139	pred = curData;
4140	unpredictable_data[block_unpredictable_count ++] = curData;
4141	}
4142	}
4143	else{
4144	type[index] = 0;
4145	pred = curData;
4146	unpredictable_data[block_unpredictable_count ++] = curData;
4147	}
4148	// assign value to next prediction buffer
4149	next_pb_pos[jj] = pred;
4150	index ++;
4151	cur_data_pos ++;
4152	}
4153	/dealing with the last jj (boundary)/
4154	{
4155	// jj == current_blockcount_y - 1
4156	size_t jj = current_blockcount_y - 1;
4157	curData = *cur_data_pos;
4158	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4159	diff = curData - pred;
4160	itvNum = fabs(diff)/realPrecision + 1;
4161	if (itvNum < intvCapacity){
4162	if (diff < 0) itvNum = -itvNum;
4163	type[index] = (int) (itvNum/2) + intvRadius;
4164	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4165	//ganrantee comporession error against the case of machine-epsilon
4166	if(fabs(curData - pred)>realPrecision){
4167	type[index] = 0;
4168	pred = curData;
4169	unpredictable_data[block_unpredictable_count ++] = curData;
4170	}
4171	}
4172	else{
4173	type[index] = 0;
4174	pred = curData;
4175	unpredictable_data[block_unpredictable_count ++] = curData;
4176	}
4177
4178	// assign value to block surfaces
4179	pb_pos[ii * strip_dim0_offset + jj] = pred;
4180	// assign value to next prediction buffer
4181	next_pb_pos[jj] = pred;
4182
4183	index ++;
4184	cur_data_pos ++;
4185	}
4186	} // end ii == -1
4187	unpredictable_count = block_unpredictable_count;
4188	total_unpred += unpredictable_count;
4189	unpredictable_data += unpredictable_count;
4190	reg_count ++;
4191	}// end use_reg
4192	else{
4193	// use SZ
4194	// SZ predication
4195	unpredictable_count = 0;
4196	double * cur_pb_pos = pb_pos;
4197	double * cur_data_pos = data_pos;
4198	double curData;
4199	double pred2D;
4200	double itvNum, diff;
4201	size_t index = 0;
4202	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4203	for(size_t jj=0; jj<current_blockcount_y; jj++){
4204	curData = *cur_data_pos;
4205
4206	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4207	diff = curData - pred2D;
4208	itvNum = fabs(diff)/realPrecision + 1;
4209	if (itvNum < intvCapacity_sz){
4210	if (diff < 0) itvNum = -itvNum;
4211	type[index] = (int) (itvNum/2) + intvRadius;
4212	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4213	//ganrantee comporession error against the case of machine-epsilon
4214	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4215	type[index] = 0;
4216	*cur_pb_pos = curData;
4217	unpredictable_data[unpredictable_count ++] = curData;
4218	}
4219	}
4220	else{
4221	type[index] = 0;
4222	*cur_pb_pos = curData;
4223	unpredictable_data[unpredictable_count ++] = curData;
4224	}
4225
4226	index ++;
4227	cur_pb_pos ++;
4228	cur_data_pos ++;
4229	}
4230	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
4231	cur_data_pos += dim0_offset - current_blockcount_y;
4232	}
4233	/dealing with the last ii (boundary)/
4234	{
4235	// ii == current_blockcount_x - 1
4236	for(size_t jj=0; jj<current_blockcount_y; jj++){
4237	curData = *cur_data_pos;
4238
4239	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4240	diff = curData - pred2D;
4241	itvNum = fabs(diff)/realPrecision + 1;
4242	if (itvNum < intvCapacity_sz){
4243	if (diff < 0) itvNum = -itvNum;
4244	type[index] = (int) (itvNum/2) + intvRadius;
4245	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4246	//ganrantee comporession error against the case of machine-epsilon
4247	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4248	type[index] = 0;
4249	*cur_pb_pos = curData;
4250	unpredictable_data[unpredictable_count ++] = curData;
4251	}
4252	}
4253	else{
4254	type[index] = 0;
4255	*cur_pb_pos = curData;
4256	unpredictable_data[unpredictable_count ++] = curData;
4257	}
4258	next_pb_pos[jj] = *cur_pb_pos;
4259	index ++;
4260	cur_pb_pos ++;
4261	cur_data_pos ++;
4262	}
4263	}
4264	total_unpred += unpredictable_count;
4265	unpredictable_data += unpredictable_count;
4266	// change indicator
4267	indicator_pos[j] = 1;
4268	}// end SZ
4269	reg_params_pos ++;
4270	data_pos += current_blockcount_y;
4271	pb_pos += current_blockcount_y;
4272	next_pb_pos += current_blockcount_y;
4273	type += current_blockcount_x * current_blockcount_y;
4274	}// end j
4275	indicator_pos += num_y;
4276	double * tmp;
4277	tmp = cur_pb_buf;
4278	cur_pb_buf = next_pb_buf;
4279	next_pb_buf = tmp;
4280	}// end i
4281	}
4282	free(prediction_buffer_1);
4283	free(prediction_buffer_2);
4284
4285	int stateNum = 2*quantization_intervals;
4286	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4287
4288	size_t nodeCount = 0;
4289	size_t i = 0;
4290	init(huffmanTree, result_type, num_elements);
4291	for (i = 0; i < stateNum; i++)
4292	if (huffmanTree->code[i]) nodeCount++;
4293	nodeCount = nodeCount*2-1;
4294
4295	unsigned char *treeBytes;
4296	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4297
4298	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
4299	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
4300	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
4301	unsigned char * result_pos = result;
4302	initRandomAccessBytes(result_pos);
4303	result_pos += meta_data_offset;
4304
4305	sizeToBytes(result_pos, num_elements);
4306	result_pos += exe_params->SZ_SIZE_TYPE;
4307
4308	intToBytes_bigEndian(result_pos, block_size);
4309	result_pos += sizeof(int);
4310	doubleToBytes(result_pos, realPrecision);
4311	result_pos += sizeof(double);
4312	intToBytes_bigEndian(result_pos, quantization_intervals);
4313	result_pos += sizeof(int);
4314	intToBytes_bigEndian(result_pos, treeByteSize);
4315	result_pos += sizeof(int);
4316	intToBytes_bigEndian(result_pos, nodeCount);
4317	result_pos += sizeof(int);
4318	memcpy(result_pos, treeBytes, treeByteSize);
4319	result_pos += treeByteSize;
4320	free(treeBytes);
4321
4322	memcpy(result_pos, &use_mean, sizeof(unsigned char));
4323	result_pos += sizeof(unsigned char);
4324	memcpy(result_pos, &mean, sizeof(double));
4325	result_pos += sizeof(double);
4326
4327	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
4328	result_pos += indicator_size;
4329
4330	//convert the lead/mid/resi to byte stream
4331	if(reg_count>0){
4332	for(int e=0; e<3; e++){
4333	int stateNum = 2*coeff_intvCapacity_sz;
4334	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4335	size_t nodeCount = 0;
4336	init(huffmanTree, coeff_type[e], reg_count);
4337	size_t i = 0;
4338	for (i = 0; i < huffmanTree->stateNum; i++)
4339	if (huffmanTree->code[i]) nodeCount++;
4340	nodeCount = nodeCount*2-1;
4341	unsigned char *treeBytes;
4342	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4343	doubleToBytes(result_pos, precision[e]);
4344	result_pos += sizeof(double);
4345	intToBytes_bigEndian(result_pos, coeff_intvRadius);
4346	result_pos += sizeof(int);
4347	intToBytes_bigEndian(result_pos, treeByteSize);
4348	result_pos += sizeof(int);
4349	intToBytes_bigEndian(result_pos, nodeCount);
4350	result_pos += sizeof(int);
4351	memcpy(result_pos, treeBytes, treeByteSize);
4352	result_pos += treeByteSize;
4353	free(treeBytes);
4354	size_t typeArray_size = 0;
4355	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
4356	sizeToBytes(result_pos, typeArray_size);
4357	result_pos += sizeof(size_t) + typeArray_size;
4358	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
4359	result_pos += sizeof(int);
4360	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
4361	result_pos += coeff_unpredictable_count[e]*sizeof(double);
4362	SZ_ReleaseHuffman(huffmanTree);
4363	}
4364	}
4365	free(coeff_result_type);
4366	free(coeff_unpredictable_data);
4367
4368	//record the number of unpredictable data and also store them
4369	memcpy(result_pos, &total_unpred, sizeof(size_t));
4370	result_pos += sizeof(size_t);
4371	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
4372	result_pos += total_unpred * sizeof(double);
4373	size_t typeArray_size = 0;
4374	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
4375	result_pos += typeArray_size;
4376
4377	size_t totalEncodeSize = result_pos - result;
4378	free(indicator);
4379	free(result_unpredictable_data);
4380	free(result_type);
4381	free(reg_params);
4382
4383	SZ_ReleaseHuffman(huffmanTree);
4384	*comp_size = totalEncodeSize;
4385
4386	return result;
4387	}
4388
4389	unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
4390
4391	unsigned int quantization_intervals;
4392	double sz_sample_correct_freq = -1;//0.5; //-1
4393	double dense_pos;
4394	double mean_flush_freq;
4395	unsigned char use_mean = 0;
4396
4397	// calculate block dims
4398	size_t num_x, num_y, num_z;
4399	size_t block_size = 6;
4400	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
4401	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
4402	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
4403
4404	size_t split_index_x, split_index_y, split_index_z;
4405	size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
4406	size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
4407	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
4408	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
4409	SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
4410
4411	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
4412	size_t num_blocks = num_x * num_y * num_z;
4413	size_t num_elements = r1 * r2 * r3;
4414
4415	size_t dim0_offset = r2 * r3;
4416	size_t dim1_offset = r3;
4417
4418	int * result_type = (int ) malloc(num_elements sizeof(int));
4419	size_t unpred_data_max_size = max_num_block_elements;
4420	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
4421	size_t total_unpred = 0;
4422	size_t unpredictable_count;
4423	size_t max_unpred_count = 0;
4424	double * data_pos = oriData;
4425	int * type = result_type;
4426	size_t type_offset;
4427	size_t offset_x, offset_y, offset_z;
4428	size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
4429
4430	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
4431	double * reg_params_pos = reg_params;
4432	// move regression part out
4433	size_t params_offset_b = num_blocks;
4434	size_t params_offset_c = 2*num_blocks;
4435	size_t params_offset_d = 3*num_blocks;
4436	for(size_t i=0; i<num_x; i++){
4437	for(size_t j=0; j<num_y; j++){
4438	for(size_t k=0; k<num_z; k++){
4439	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4440	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4441	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
4442	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4443	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4444	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
4445
4446	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
4447	/Calculate regression coefficients/
4448	{
4449	double * cur_data_pos = data_pos;
4450	double fx = 0.0;
4451	double fy = 0.0;
4452	double fz = 0.0;
4453	double f = 0;
4454	double sum_x, sum_y;
4455	double curData;
4456	for(size_t i=0; i<current_blockcount_x; i++){
4457	sum_x = 0;
4458	for(size_t j=0; j<current_blockcount_y; j++){
4459	sum_y = 0;
4460	for(size_t k=0; k<current_blockcount_z; k++){
4461	curData = *cur_data_pos;
4462	// f += curData;
4463	// fx += curData * i;
4464	// fy += curData * j;
4465	// fz += curData * k;
4466	sum_y += curData;
4467	fz += curData * k;
4468	cur_data_pos ++;
4469	}
4470	fy += sum_y * j;
4471	sum_x += sum_y;
4472	cur_data_pos += dim1_offset - current_blockcount_z;
4473	}
4474	fx += sum_x * i;
4475	f += sum_x;
4476	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
4477	}
4478	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
4479	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
4480	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
4481	reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
4482	reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
4483	}
4484	reg_params_pos ++;
4485	}
4486	}
4487	}
4488
4489	//Compress coefficient arrays
4490	double precision_a, precision_b, precision_c, precision_d;
4491	double rel_param_err = 0.025;
4492	precision_a = rel_param_err * realPrecision / late_blockcount_x;
4493	precision_b = rel_param_err * realPrecision / late_blockcount_y;
4494	precision_c = rel_param_err * realPrecision / late_blockcount_z;
4495	precision_d = rel_param_err * realPrecision;
4496
4497	if(exe_params->optQuantMode==1)
4498	{
4499	quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
4500	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
4501	updateQuantizationInfo(quantization_intervals);
4502	}
4503	else{
4504	quantization_intervals = exe_params->intvCapacity;
4505	}
4506
4507	double mean = 0;
4508	if(use_mean){
4509	// compute mean
4510	double sum = 0.0;
4511	size_t mean_count = 0;
4512	for(size_t i=0; i<num_elements; i++){
4513	if(fabs(oriData[i] - dense_pos) < realPrecision){
4514	sum += oriData[i];
4515	mean_count ++;
4516	}
4517	}
4518	if(mean_count > 0) mean = sum / mean_count;
4519	}
4520
4521	double tmp_realPrecision = realPrecision;
4522
4523	// use two prediction buffers for higher performance
4524	double * unpredictable_data = result_unpredictable_data;
4525	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
4526	memset(indicator, 0, num_blocks * sizeof(unsigned char));
4527	size_t reg_count = 0;
4528	size_t strip_dim_0 = early_blockcount_x + 1;
4529	size_t strip_dim_1 = r2 + 1;
4530	size_t strip_dim_2 = r3 + 1;
4531	size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
4532	size_t strip_dim1_offset = strip_dim_2;
4533	unsigned char * indicator_pos = indicator;
4534
4535	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
4536	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
4537	memset(prediction_buffer_1, 0, prediction_buffer_size);
4538	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
4539	memset(prediction_buffer_2, 0, prediction_buffer_size);
4540	double * cur_pb_buf = prediction_buffer_1;
4541	double * next_pb_buf = prediction_buffer_2;
4542	double * cur_pb_buf_pos;
4543	double * next_pb_buf_pos;
4544	int intvCapacity = exe_params->intvCapacity;
4545	int intvRadius = exe_params->intvRadius;
4546	int use_reg = 0;
4547	double noise = realPrecision * 1.22;
4548
4549	reg_params_pos = reg_params;
4550	// compress the regression coefficients on the fly
4551	double last_coeffcients[4] = {0.0};
4552	int coeff_intvCapacity_sz = 65536;
4553	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
4554	int * coeff_type[4];
4555	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
4556	double * coeff_unpred_data[4];
4557	double * coeff_unpredictable_data = (double ) malloc(num_blocks4*sizeof(double));
4558	double precision[4];
4559	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
4560	for(int i=0; i<4; i++){
4561	coeff_type[i] = coeff_result_type + i * num_blocks;
4562	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
4563	}
4564	int coeff_index = 0;
4565	unsigned int coeff_unpredictable_count[4] = {0};
4566
4567	if(use_mean){
4568	int intvCapacity_sz = intvCapacity - 2;
4569	for(size_t i=0; i<num_x; i++){
4570	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4571	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4572	for(size_t j=0; j<num_y; j++){
4573	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4574	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4575	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
4576	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
4577	type = result_type + type_offset;
4578
4579	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
4580	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
4581	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
4582
4583	size_t current_blockcount_z;
4584	double * pb_pos = cur_pb_buf_pos;
4585	double * next_pb_pos = next_pb_buf_pos;
4586	size_t strip_unpredictable_count = 0;
4587	for(size_t k=0; k<num_z; k++){
4588	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
4589
4590	/sampling and decide which predictor/
4591	{
4592	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
4593	double * cur_data_pos;
4594	double curData;
4595	double pred_reg, pred_sz;
4596	double err_sz = 0.0, err_reg = 0.0;
4597	int bmi = 0;
4598	if(i>0 && j>0 && k>0){
4599	for(int i=0; i<block_size; i++){
4600	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4601	curData = *cur_data_pos;
4602	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4603	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4604	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4605	err_reg += fabs(pred_reg - curData);
4606
4607	bmi = block_size - i;
4608	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4609	curData = *cur_data_pos;
4610	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4611	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4612	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4613	err_reg += fabs(pred_reg - curData);
4614
4615	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4616	curData = *cur_data_pos;
4617	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4618	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4619	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4620	err_reg += fabs(pred_reg - curData);
4621
4622	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4623	curData = *cur_data_pos;
4624	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4625	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4626	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4627	err_reg += fabs(pred_reg - curData);
4628	}
4629	}
4630	else{
4631	for(int i=1; i<block_size; i++){
4632	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4633	curData = *cur_data_pos;
4634	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4635	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4636	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4637	err_reg += fabs(pred_reg - curData);
4638
4639	bmi = block_size - i;
4640	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4641	curData = *cur_data_pos;
4642	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4643	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4644	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4645	err_reg += fabs(pred_reg - curData);
4646
4647	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4648	curData = *cur_data_pos;
4649	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4650	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4651	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4652	err_reg += fabs(pred_reg - curData);
4653
4654	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4655	curData = *cur_data_pos;
4656	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4657	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4658	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4659	err_reg += fabs(pred_reg - curData);
4660
4661	}
4662	}
4663	use_reg = (err_reg < err_sz);
4664	}
4665	if(use_reg){
4666	{
4667	/predict coefficients in current block via previous reg_block/
4668	double cur_coeff;
4669	double diff, itvNum;
4670	for(int e=0; e<4; e++){
4671	cur_coeff = reg_params_pos[e*num_blocks];
4672	diff = cur_coeff - last_coeffcients[e];
4673	itvNum = fabs(diff)/precision[e] + 1;
4674	if (itvNum < coeff_intvCapacity_sz){
4675	if (diff < 0) itvNum = -itvNum;
4676	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4677	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4678	//ganrantee comporession error against the case of machine-epsilon
4679	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4680	coeff_type[e][coeff_index] = 0;
4681	last_coeffcients[e] = cur_coeff;
4682	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4683	}
4684	}
4685	else{
4686	coeff_type[e][coeff_index] = 0;
4687	last_coeffcients[e] = cur_coeff;
4688	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4689	}
4690	}
4691	coeff_index ++;
4692	}
4693	double curData;
4694	double pred;
4695	double itvNum;
4696	double diff;
4697	size_t index = 0;
4698	size_t block_unpredictable_count = 0;
4699	double * cur_data_pos = data_pos;
4700	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4701	for(size_t jj=0; jj<current_blockcount_y; jj++){
4702	for(size_t kk=0; kk<current_blockcount_z; kk++){
4703	curData = *cur_data_pos;
4704	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
4705	diff = curData - pred;
4706	itvNum = fabs(diff)/tmp_realPrecision + 1;
4707	if (itvNum < intvCapacity){
4708	if (diff < 0) itvNum = -itvNum;
4709	type[index] = (int) (itvNum/2) + intvRadius;
4710	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
4711	//ganrantee comporession error against the case of machine-epsilon
4712	if(fabs(curData - pred)>tmp_realPrecision){
4713	type[index] = 0;
4714	pred = curData;
4715	unpredictable_data[block_unpredictable_count ++] = curData;
4716	}
4717	}
4718	else{
4719	type[index] = 0;
4720	pred = curData;
4721	unpredictable_data[block_unpredictable_count ++] = curData;
4722	}
4723	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
4724	// assign value to block surfaces
4725	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
4726	}
4727	index ++;
4728	cur_data_pos ++;
4729	}
4730	cur_data_pos += dim1_offset - current_blockcount_z;
4731	}
4732	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
4733	}
4734	/dealing with the last ii (boundary)/
4735	{
4736	// ii == current_blockcount_x - 1
4737	size_t ii = current_blockcount_x - 1;
4738	for(size_t jj=0; jj<current_blockcount_y; jj++){
4739	for(size_t kk=0; kk<current_blockcount_z; kk++){
4740	curData = *cur_data_pos;
4741	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
4742	diff = curData - pred;
4743	itvNum = fabs(diff)/tmp_realPrecision + 1;
4744	if (itvNum < intvCapacity){
4745	if (diff < 0) itvNum = -itvNum;
4746	type[index] = (int) (itvNum/2) + intvRadius;
4747	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
4748	//ganrantee comporession error against the case of machine-epsilon
4749	if(fabs(curData - pred)>tmp_realPrecision){
4750	type[index] = 0;
4751	pred = curData;
4752	unpredictable_data[block_unpredictable_count ++] = curData;
4753	}
4754	}
4755	else{
4756	type[index] = 0;
4757	pred = curData;
4758	unpredictable_data[block_unpredictable_count ++] = curData;
4759	}
4760
4761	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
4762	// assign value to block surfaces
4763	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
4764	}
4765	// assign value to next prediction buffer
4766	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
4767	index ++;
4768	cur_data_pos ++;
4769	}
4770	cur_data_pos += dim1_offset - current_blockcount_z;
4771	}
4772	}
4773	unpredictable_count = block_unpredictable_count;
4774	strip_unpredictable_count += unpredictable_count;
4775	unpredictable_data += unpredictable_count;
4776
4777	reg_count ++;
4778	}
4779	else{
4780	// use SZ
4781	// SZ predication
4782	unpredictable_count = 0;
4783	double * cur_pb_pos = pb_pos;
4784	double * cur_data_pos = data_pos;
4785	double curData;
4786	double pred3D;
4787	double itvNum, diff;
4788	size_t index = 0;
4789	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4790	for(size_t jj=0; jj<current_blockcount_y; jj++){
4791	for(size_t kk=0; kk<current_blockcount_z; kk++){
4792
4793	curData = *cur_data_pos;
4794	if(fabs(curData - mean) <= realPrecision){
4795	// adjust type[index] to intvRadius for coherence with freq in reg
4796	type[index] = intvRadius;
4797	*cur_pb_pos = mean;
4798	}
4799	else
4800	{
4801	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
4802	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
4803	diff = curData - pred3D;
4804	itvNum = fabs(diff)/realPrecision + 1;
4805	if (itvNum < intvCapacity_sz){
4806	if (diff < 0) itvNum = -itvNum;
4807	type[index] = (int) (itvNum/2) + intvRadius;
4808	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4809	if(type[index] <= intvRadius) type[index] -= 1;
4810	//ganrantee comporession error against the case of machine-epsilon
4811	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4812	type[index] = 0;
4813	*cur_pb_pos = curData;
4814	unpredictable_data[unpredictable_count ++] = curData;
4815	}
4816	}
4817	else{
4818	type[index] = 0;
4819	*cur_pb_pos = curData;
4820	unpredictable_data[unpredictable_count ++] = curData;
4821	}
4822	}
4823	index ++;
4824	cur_pb_pos ++;
4825	cur_data_pos ++;
4826	}
4827	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
4828	cur_data_pos += dim1_offset - current_blockcount_z;
4829	}
4830	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
4831	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
4832	}
4833	/dealing with the last ii (boundary)/
4834	{
4835	// ii == current_blockcount_x - 1
4836	for(size_t jj=0; jj<current_blockcount_y; jj++){
4837	for(size_t kk=0; kk<current_blockcount_z; kk++){
4838
4839	curData = *cur_data_pos;
4840	if(fabs(curData - mean) <= realPrecision){
4841	// adjust type[index] to intvRadius for coherence with freq in reg
4842	type[index] = intvRadius;
4843	*cur_pb_pos = mean;
4844	}
4845	else
4846	{
4847	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
4848	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
4849	diff = curData - pred3D;
4850	itvNum = fabs(diff)/realPrecision + 1;
4851	if (itvNum < intvCapacity_sz){
4852	if (diff < 0) itvNum = -itvNum;
4853	type[index] = (int) (itvNum/2) + intvRadius;
4854	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4855	if(type[index] <= intvRadius) type[index] -= 1;
4856	//ganrantee comporession error against the case of machine-epsilon
4857	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4858	type[index] = 0;
4859	*cur_pb_pos = curData;
4860	unpredictable_data[unpredictable_count ++] = curData;
4861	}
4862	}
4863	else{
4864	type[index] = 0;
4865	*cur_pb_pos = curData;
4866	unpredictable_data[unpredictable_count ++] = curData;
4867	}
4868	}
4869	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
4870	index ++;
4871	cur_pb_pos ++;
4872	cur_data_pos ++;
4873	}
4874	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
4875	cur_data_pos += dim1_offset - current_blockcount_z;
4876	}
4877	}
4878	strip_unpredictable_count += unpredictable_count;
4879	unpredictable_data += unpredictable_count;
4880	// change indicator
4881	indicator_pos[k] = 1;
4882	}// end SZ
4883
4884	reg_params_pos ++;
4885	data_pos += current_blockcount_z;
4886	pb_pos += current_blockcount_z;
4887	next_pb_pos += current_blockcount_z;
4888	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
4889
4890	} // end k
4891
4892	if(strip_unpredictable_count > max_unpred_count){
4893	max_unpred_count = strip_unpredictable_count;
4894	}
4895	total_unpred += strip_unpredictable_count;
4896	indicator_pos += num_z;
4897	}// end j
4898	double * tmp;
4899	tmp = cur_pb_buf;
4900	cur_pb_buf = next_pb_buf;
4901	next_pb_buf = tmp;
4902	}// end i
4903	}
4904	else{
4905	int intvCapacity_sz = intvCapacity - 2;
4906	for(size_t i=0; i<num_x; i++){
4907	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4908	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4909
4910	for(size_t j=0; j<num_y; j++){
4911	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4912	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4913	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
4914	// copy bottom plane from plane buffer
4915	// memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double));
4916	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
4917	type = result_type + type_offset;
4918
4919	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
4920	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
4921	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
4922
4923	size_t current_blockcount_z;
4924	double * pb_pos = cur_pb_buf_pos;
4925	double * next_pb_pos = next_pb_buf_pos;
4926	size_t strip_unpredictable_count = 0;
4927	for(size_t k=0; k<num_z; k++){
4928	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
4929	/sampling/
4930	{
4931	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
4932	double * cur_data_pos;
4933	double curData;
4934	double pred_reg, pred_sz;
4935	double err_sz = 0.0, err_reg = 0.0;
4936	int bmi;
4937	if(i>0 && j>0 && k>0){
4938	for(int i=0; i<block_size; i++){
4939	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4940	curData = *cur_data_pos;
4941	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4942	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4943	err_sz += fabs(pred_sz - curData) + noise;
4944	err_reg += fabs(pred_reg - curData);
4945
4946	bmi = block_size - i;
4947	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4948	curData = *cur_data_pos;
4949	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4950	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4951	err_sz += fabs(pred_sz - curData) + noise;
4952	err_reg += fabs(pred_reg - curData);
4953
4954	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4955	curData = *cur_data_pos;
4956	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4957	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4958	err_sz += fabs(pred_sz - curData) + noise;
4959	err_reg += fabs(pred_reg - curData);
4960
4961	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4962	curData = *cur_data_pos;
4963	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4964	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4965	err_sz += fabs(pred_sz - curData) + noise;
4966	err_reg += fabs(pred_reg - curData);
4967	}
4968	}
4969	else{
4970	for(int i=1; i<block_size; i++){
4971	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4972	curData = *cur_data_pos;
4973	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4974	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4975	err_sz += fabs(pred_sz - curData) + noise;
4976	err_reg += fabs(pred_reg - curData);
4977
4978	bmi = block_size - i;
4979	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4980	curData = *cur_data_pos;
4981	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4982	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4983	err_sz += fabs(pred_sz - curData) + noise;
4984	err_reg += fabs(pred_reg - curData);
4985
4986	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4987	curData = *cur_data_pos;
4988	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4989	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4990	err_sz += fabs(pred_sz - curData) + noise;
4991	err_reg += fabs(pred_reg - curData);
4992
4993	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4994	curData = *cur_data_pos;
4995	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4996	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4997	err_sz += fabs(pred_sz - curData) + noise;
4998	err_reg += fabs(pred_reg - curData);
4999	}
5000	}
5001	use_reg = (err_reg < err_sz);
5002
5003	}
5004	if(use_reg)
5005	{
5006	{
5007	/predict coefficients in current block via previous reg_block/
5008	double cur_coeff;
5009	double diff, itvNum;
5010	for(int e=0; e<4; e++){
5011	cur_coeff = reg_params_pos[e*num_blocks];
5012	diff = cur_coeff - last_coeffcients[e];
5013	itvNum = fabs(diff)/precision[e] + 1;
5014	if (itvNum < coeff_intvCapacity_sz){
5015	if (diff < 0) itvNum = -itvNum;
5016	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
5017	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
5018	//ganrantee comporession error against the case of machine-epsilon
5019	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
5020	coeff_type[e][coeff_index] = 0;
5021	last_coeffcients[e] = cur_coeff;
5022	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5023	}
5024	}
5025	else{
5026	coeff_type[e][coeff_index] = 0;
5027	last_coeffcients[e] = cur_coeff;
5028	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5029	}
5030	}
5031	coeff_index ++;
5032	}
5033	double curData;
5034	double pred;
5035	double itvNum;
5036	double diff;
5037	size_t index = 0;
5038	size_t block_unpredictable_count = 0;
5039	double * cur_data_pos = data_pos;
5040	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5041	for(size_t jj=0; jj<current_blockcount_y; jj++){
5042	for(size_t kk=0; kk<current_blockcount_z; kk++){
5043
5044	curData = *cur_data_pos;
5045	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5046	diff = curData - pred;
5047	itvNum = fabs(diff)/tmp_realPrecision + 1;
5048	if (itvNum < intvCapacity){
5049	if (diff < 0) itvNum = -itvNum;
5050	type[index] = (int) (itvNum/2) + intvRadius;
5051	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5052	//ganrantee comporession error against the case of machine-epsilon
5053	if(fabs(curData - pred)>tmp_realPrecision){
5054	type[index] = 0;
5055	pred = curData;
5056	unpredictable_data[block_unpredictable_count ++] = curData;
5057	}
5058	}
5059	else{
5060	type[index] = 0;
5061	pred = curData;
5062	unpredictable_data[block_unpredictable_count ++] = curData;
5063	}
5064
5065	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5066	// assign value to block surfaces
5067	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5068	}
5069	index ++;
5070	cur_data_pos ++;
5071	}
5072	cur_data_pos += dim1_offset - current_blockcount_z;
5073	}
5074	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5075	}
5076	/dealing with the last ii (boundary)/
5077	{
5078	// ii == current_blockcount_x - 1
5079	size_t ii = current_blockcount_x - 1;
5080	for(size_t jj=0; jj<current_blockcount_y; jj++){
5081	for(size_t kk=0; kk<current_blockcount_z; kk++){
5082	curData = *cur_data_pos;
5083	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5084	diff = curData - pred;
5085	itvNum = fabs(diff)/tmp_realPrecision + 1;
5086	if (itvNum < intvCapacity){
5087	if (diff < 0) itvNum = -itvNum;
5088	type[index] = (int) (itvNum/2) + intvRadius;
5089	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5090	//ganrantee comporession error against the case of machine-epsilon
5091	if(fabs(curData - pred)>tmp_realPrecision){
5092	type[index] = 0;
5093	pred = curData;
5094	unpredictable_data[block_unpredictable_count ++] = curData;
5095	}
5096	}
5097	else{
5098	type[index] = 0;
5099	pred = curData;
5100	unpredictable_data[block_unpredictable_count ++] = curData;
5101	}
5102
5103	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5104	// assign value to block surfaces
5105	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5106	}
5107	// assign value to next prediction buffer
5108	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
5109	index ++;
5110	cur_data_pos ++;
5111	}
5112	cur_data_pos += dim1_offset - current_blockcount_z;
5113	}
5114	}
5115	unpredictable_count = block_unpredictable_count;
5116	strip_unpredictable_count += unpredictable_count;
5117	unpredictable_data += unpredictable_count;
5118	reg_count ++;
5119	}
5120	else{
5121	// use SZ
5122	// SZ predication
5123	unpredictable_count = 0;
5124	double * cur_pb_pos = pb_pos;
5125	double * cur_data_pos = data_pos;
5126	double curData;
5127	double pred3D;
5128	double itvNum, diff;
5129	size_t index = 0;
5130	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5131	for(size_t jj=0; jj<current_blockcount_y; jj++){
5132	for(size_t kk=0; kk<current_blockcount_z; kk++){
5133
5134	curData = *cur_data_pos;
5135	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5136	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5137	diff = curData - pred3D;
5138	itvNum = fabs(diff)/realPrecision + 1;
5139	if (itvNum < intvCapacity_sz){
5140	if (diff < 0) itvNum = -itvNum;
5141	type[index] = (int) (itvNum/2) + intvRadius;
5142	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5143	//ganrantee comporession error against the case of machine-epsilon
5144	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5145	type[index] = 0;
5146	*cur_pb_pos = curData;
5147	unpredictable_data[unpredictable_count ++] = curData;
5148	}
5149	}
5150	else{
5151	type[index] = 0;
5152	*cur_pb_pos = curData;
5153	unpredictable_data[unpredictable_count ++] = curData;
5154	}
5155	index ++;
5156	cur_pb_pos ++;
5157	cur_data_pos ++;
5158	}
5159	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5160	cur_data_pos += dim1_offset - current_blockcount_z;
5161	}
5162	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
5163	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5164	}
5165	/dealing with the last ii (boundary)/
5166	{
5167	// ii == current_blockcount_x - 1
5168	for(size_t jj=0; jj<current_blockcount_y; jj++){
5169	for(size_t kk=0; kk<current_blockcount_z; kk++){
5170
5171	curData = *cur_data_pos;
5172	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5173	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5174	diff = curData - pred3D;
5175	itvNum = fabs(diff)/realPrecision + 1;
5176	if (itvNum < intvCapacity_sz){
5177	if (diff < 0) itvNum = -itvNum;
5178	type[index] = (int) (itvNum/2) + intvRadius;
5179	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5180	//ganrantee comporession error against the case of machine-epsilon
5181	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5182	type[index] = 0;
5183	*cur_pb_pos = curData;
5184	unpredictable_data[unpredictable_count ++] = curData;
5185	}
5186	}
5187	else{
5188	type[index] = 0;
5189	*cur_pb_pos = curData;
5190	unpredictable_data[unpredictable_count ++] = curData;
5191	}
5192	// assign value to next prediction buffer
5193	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
5194	index ++;
5195	cur_pb_pos ++;
5196	cur_data_pos ++;
5197	}
5198	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5199	cur_data_pos += dim1_offset - current_blockcount_z;
5200	}
5201	}
5202	strip_unpredictable_count += unpredictable_count;
5203	unpredictable_data += unpredictable_count;
5204	// change indicator
5205	indicator_pos[k] = 1;
5206	}// end SZ
5207
5208	reg_params_pos ++;
5209	data_pos += current_blockcount_z;
5210	pb_pos += current_blockcount_z;
5211	next_pb_pos += current_blockcount_z;
5212	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
5213
5214	}
5215
5216	if(strip_unpredictable_count > max_unpred_count){
5217	max_unpred_count = strip_unpredictable_count;
5218	}
5219	total_unpred += strip_unpredictable_count;
5220	indicator_pos += num_z;
5221	}
5222	double * tmp;
5223	tmp = cur_pb_buf;
5224	cur_pb_buf = next_pb_buf;
5225	next_pb_buf = tmp;
5226	}
5227	}
5228
5229	free(prediction_buffer_1);
5230	free(prediction_buffer_2);
5231
5232	int stateNum = 2*quantization_intervals;
5233	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
5234
5235	size_t nodeCount = 0;
5236	init(huffmanTree, result_type, num_elements);
5237	size_t i = 0;
5238	for (i = 0; i < huffmanTree->stateNum; i++)
5239	if (huffmanTree->code[i]) nodeCount++;
5240	nodeCount = nodeCount*2-1;
5241
5242	unsigned char *treeBytes;
5243	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
5244
5245	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
5246	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
5247	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
5248	unsigned char * result_pos = result;
5249	initRandomAccessBytes(result_pos);
5250
5251	result_pos += meta_data_offset;
5252
5253	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
5254	result_pos += exe_params->SZ_SIZE_TYPE;
5255
5256	intToBytes_bigEndian(result_pos, block_size);
5257	result_pos += sizeof(int);
5258	doubleToBytes(result_pos, realPrecision);
5259	result_pos += sizeof(double);
5260	intToBytes_bigEndian(result_pos, quantization_intervals);
5261	result_pos += sizeof(int);
5262	intToBytes_bigEndian(result_pos, treeByteSize);
5263	result_pos += sizeof(int);
5264	intToBytes_bigEndian(result_pos, nodeCount);
5265	result_pos += sizeof(int);
5266	memcpy(result_pos, treeBytes, treeByteSize);
5267	result_pos += treeByteSize;
5268	free(treeBytes);
5269
5270	memcpy(result_pos, &use_mean, sizeof(unsigned char));
5271	result_pos += sizeof(unsigned char);
5272	memcpy(result_pos, &mean, sizeof(double));
5273	result_pos += sizeof(double);
5274	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
5275	result_pos += indicator_size;
5276
5277	//convert the lead/mid/resi to byte stream
5278	if(reg_count > 0){
5279	for(int e=0; e<4; e++){
5280	int stateNum = 2*coeff_intvCapacity_sz;
5281	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
5282	size_t nodeCount = 0;
5283	init(huffmanTree, coeff_type[e], reg_count);
5284	size_t i = 0;
5285	for (i = 0; i < huffmanTree->stateNum; i++)
5286	if (huffmanTree->code[i]) nodeCount++;
5287	nodeCount = nodeCount*2-1;
5288	unsigned char *treeBytes;
5289	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
5290	doubleToBytes(result_pos, precision[e]);
5291	result_pos += sizeof(double);
5292	intToBytes_bigEndian(result_pos, coeff_intvRadius);
5293	result_pos += sizeof(int);
5294	intToBytes_bigEndian(result_pos, treeByteSize);
5295	result_pos += sizeof(int);
5296	intToBytes_bigEndian(result_pos, nodeCount);
5297	result_pos += sizeof(int);
5298	memcpy(result_pos, treeBytes, treeByteSize);
5299	result_pos += treeByteSize;
5300	free(treeBytes);
5301	size_t typeArray_size = 0;
5302	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
5303	sizeToBytes(result_pos, typeArray_size);
5304	result_pos += sizeof(size_t) + typeArray_size;
5305	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
5306	result_pos += sizeof(int);
5307	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
5308	result_pos += coeff_unpredictable_count[e]*sizeof(double);
5309	SZ_ReleaseHuffman(huffmanTree);
5310	}
5311	}
5312	free(coeff_result_type);
5313	free(coeff_unpredictable_data);
5314
5315	//record the number of unpredictable data and also store them
5316	memcpy(result_pos, &total_unpred, sizeof(size_t));
5317	result_pos += sizeof(size_t);
5318	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
5319	result_pos += total_unpred * sizeof(double);
5320	size_t typeArray_size = 0;
5321	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
5322	result_pos += typeArray_size;
5323	size_t totalEncodeSize = result_pos - result;
5324	free(indicator);
5325	free(result_unpredictable_data);
5326	free(result_type);
5327	free(reg_params);
5328
5329
5330	SZ_ReleaseHuffman(huffmanTree);
5331	*comp_size = totalEncodeSize;
5332	return result;
5333	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: