Context Navigation

source: thirdparty/SZ/sz/src/sz_double.c @ 9ee2ce3

Revision 9ee2ce3, 186.1 KB checked in by Hal Finkel <hfinkel@…>, 6 years ago (diff)
importing new SZ files
Property mode set to `100644`

Line
1	/**
2	* @file sz_double.c
3	* @author Sheng Di and Dingwen Tao
4	* @date Aug, 2016
5	* @brief SZ_Init, Compression and Decompression functions
6	* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
7	* See COPYRIGHT in top-level directory.
8	*/
9
10
11	#include <stdio.h>
12	#include <stdlib.h>
13	#include <string.h>
14	#include <unistd.h>
15	#include <math.h>
16	#include "sz.h"
17	#include "CompressElement.h"
18	#include "DynamicByteArray.h"
19	#include "DynamicIntArray.h"
20	#include "TightDataPointStorageD.h"
21	#include "sz_double.h"
22	#include "sz_double_pwr.h"
23	#include "szd_double.h"
24	#include "szd_double_pwr.h"
25	#include "zlib.h"
26	#include "rw.h"
27	#include "sz_double_ts.h"
28	#include "utility.h"
29
30	unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize)
31	{
32	outSize = dataLengthsizeof(double);
33	unsigned char* out = (unsigned char)malloc(dataLengthsizeof(double));
34	memcpy(out, data, dataLength*sizeof(double));
35	return out;
36	}
37
38	void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue)
39	{
40	short reqExpo = getPrecisionReqLength_double(realPrecision);
41	*reqLength = 12+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength
42	if(*reqLength<12)
43	*reqLength = 12;
44	if(*reqLength>64)
45	{
46	*reqLength = 64;
47	*medianValue = 0;
48	}
49	}
50
51	unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision)
52	{
53	size_t i = 0, radiusIndex;
54	double pred_value = 0, pred_err;
55	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
56	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
57	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
58	for(i=2;i<dataLength;i++)
59	{
60	if(i%confparams_cpr->sampleDistance==0)
61	{
62	//pred_value = 2*oriData[i-1] - oriData[i-2];
63	pred_value = oriData[i-1];
64	pred_err = fabs(pred_value - oriData[i]);
65	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
66	if(radiusIndex>=confparams_cpr->maxRangeRadius)
67	radiusIndex = confparams_cpr->maxRangeRadius - 1;
68	intervals[radiusIndex]++;
69	}
70	}
71	//compute the appropriate number
72	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
73	size_t sum = 0;
74	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
75	{
76	sum += intervals[i];
77	if(sum>targetCount)
78	break;
79	}
80
81	if(i>=confparams_cpr->maxRangeRadius)
82	i = confparams_cpr->maxRangeRadius-1;
83	unsigned int accIntervals = 2*(i+1);
84	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
85
86	if(powerOf2<32)
87	powerOf2 = 32;
88
89	free(intervals);
90	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
91	return powerOf2;
92	}
93
94	unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision)
95	{
96	size_t i,j, index;
97	size_t radiusIndex;
98	double pred_value = 0, pred_err;
99	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
100	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
101	size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance;
102	for(i=1;i<r1;i++)
103	{
104	for(j=1;j<r2;j++)
105	{
106	if((i+j)%confparams_cpr->sampleDistance==0)
107	{
108	index = i*r2+j;
109	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
110	pred_err = fabs(pred_value - oriData[index]);
111	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
112	if(radiusIndex>=confparams_cpr->maxRangeRadius)
113	radiusIndex = confparams_cpr->maxRangeRadius - 1;
114	intervals[radiusIndex]++;
115	}
116	}
117	}
118	//compute the appropriate number
119	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
120	size_t sum = 0;
121	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
122	{
123	sum += intervals[i];
124	if(sum>targetCount)
125	break;
126	}
127	if(i>=confparams_cpr->maxRangeRadius)
128	i = confparams_cpr->maxRangeRadius-1;
129	unsigned int accIntervals = 2*(i+1);
130	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
131	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
132
133	if(powerOf2<32)
134	powerOf2 = 32;
135
136	free(intervals);
137	return powerOf2;
138	}
139
140	unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
141	{
142	size_t i,j,k, index;
143	size_t radiusIndex;
144	size_t r23=r2*r3;
145	double pred_value = 0, pred_err;
146	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
147	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
148	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)/confparams_cpr->sampleDistance;
149	for(i=1;i<r1;i++)
150	{
151	for(j=1;j<r2;j++)
152	{
153	for(k=1;k<r3;k++)
154	{
155	if((i+j+k)%confparams_cpr->sampleDistance==0)
156	{
157	index = ir23+jr3+k;
158	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
159	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
160	pred_err = fabs(pred_value - oriData[index]);
161	radiusIndex = (pred_err/realPrecision+1)/2;
162	if(radiusIndex>=confparams_cpr->maxRangeRadius)
163	radiusIndex = confparams_cpr->maxRangeRadius - 1;
164	intervals[radiusIndex]++;
165	}
166	}
167
168	}
169	}
170	//compute the appropriate number
171	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
172	size_t sum = 0;
173	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
174	{
175	sum += intervals[i];
176	if(sum>targetCount)
177	break;
178	}
179	if(i>=confparams_cpr->maxRangeRadius)
180	i = confparams_cpr->maxRangeRadius-1;
181
182	unsigned int accIntervals = 2*(i+1);
183	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
184
185	if(powerOf2<32)
186	powerOf2 = 32;
187
188	free(intervals);
189	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
190	return powerOf2;
191	}
192
193	unsigned int optimize_intervals_double_4D(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision)
194	{
195	size_t i,j,k,l, index;
196	size_t radiusIndex;
197	size_t r234=r2r3r4;
198	size_t r34=r3*r4;
199	double pred_value = 0, pred_err;
200	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
201	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
202	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)*(r4-1)/confparams_cpr->sampleDistance;
203	for(i=1;i<r1;i++)
204	{
205	for(j=1;j<r2;j++)
206	{
207	for(k=1;k<r3;k++)
208	{
209	for (l=1;l<r4;l++)
210	{
211	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
212	{
213	index = ir234+jr34+k*r4+l;
214	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34]
215	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
216	pred_err = fabs(pred_value - oriData[index]);
217	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
218	if(radiusIndex>=confparams_cpr->maxRangeRadius)
219	radiusIndex = confparams_cpr->maxRangeRadius - 1;
220	intervals[radiusIndex]++;
221	}
222	}
223	}
224	}
225	}
226	//compute the appropriate number
227	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
228	size_t sum = 0;
229	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
230	{
231	sum += intervals[i];
232	if(sum>targetCount)
233	break;
234	}
235	if(i>=confparams_cpr->maxRangeRadius)
236	i = confparams_cpr->maxRangeRadius-1;
237
238	unsigned int accIntervals = 2*(i+1);
239	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
240
241	if(powerOf2<32)
242	powerOf2 = 32;
243
244	free(intervals);
245	return powerOf2;
246	}
247
248	TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData,
249	size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d)
250	{
251	#ifdef HAVE_TIMECMPR
252	double* decData = NULL;
253	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
254	decData = (double*)(multisteps->hist_data);
255	#endif
256
257	unsigned int quantization_intervals;
258	if(exe_params->optQuantMode==1)
259	quantization_intervals = optimize_intervals_double_1D_opt(oriData, dataLength, realPrecision);
260	else
261	quantization_intervals = exe_params->intvCapacity;
262	updateQuantizationInfo(quantization_intervals);
263
264	size_t i;
265	int reqLength;
266	double medianValue = medianValue_d;
267	short radExpo = getExponent_double(valueRangeSize/2);
268
269	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
270
271	int* type = (int) malloc(dataLengthsizeof(int));
272
273	double* spaceFillingValue = oriData; //
274
275	DynamicIntArray *exactLeadNumArray;
276	new_DIA(&exactLeadNumArray, DynArrayInitLen);
277
278	DynamicByteArray *exactMidByteArray;
279	new_DBA(&exactMidByteArray, DynArrayInitLen);
280
281	DynamicIntArray *resiBitArray;
282	new_DIA(&resiBitArray, DynArrayInitLen);
283
284	unsigned char preDataBytes[8];
285	longToBytes_bigEndian(preDataBytes, 0);
286
287	int reqBytesLength = reqLength/8;
288	int resiBitsLength = reqLength%8;
289	double last3CmprsData[3] = {0};
290
291	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
292	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
293
294	//add the first data
295	type[0] = 0;
296	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
297	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
298	memcpy(preDataBytes,vce->curBytes,8);
299	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
300	listAdd_double(last3CmprsData, vce->data);
301	#ifdef HAVE_TIMECMPR
302	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
303	decData[0] = vce->data;
304	#endif
305
306	//add the second data
307	type[1] = 0;
308	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
309	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
310	memcpy(preDataBytes,vce->curBytes,8);
311	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
312	listAdd_double(last3CmprsData, vce->data);
313	#ifdef HAVE_TIMECMPR
314	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
315	decData[1] = vce->data;
316	#endif
317	int state;
318	double checkRadius;
319	double curData;
320	double pred;
321	double predAbsErr;
322	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
323	double interval = 2*realPrecision;
324
325	for(i=2;i<dataLength;i++)
326	{
327	//printf("%.30G\n",last3CmprsData[0]);
328	curData = spaceFillingValue[i];
329	//pred = 2*last3CmprsData[0] - last3CmprsData[1];
330	pred = last3CmprsData[0];
331	predAbsErr = fabs(curData - pred);
332	if(predAbsErr<checkRadius)
333	{
334	state = (predAbsErr/realPrecision+1)/2;
335	if(curData>=pred)
336	{
337	type[i] = exe_params->intvRadius+state;
338	pred = pred + state*interval;
339	}
340	else //curData<pred
341	{
342	type[i] = exe_params->intvRadius-state;
343	pred = pred - state*interval;
344	}
345	listAdd_double(last3CmprsData, pred);
346	#ifdef HAVE_TIMECMPR
347	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
348	decData[i] = pred;
349	#endif
350	continue;
351	}
352
353	//unpredictable data processing
354	type[i] = 0;
355	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
356	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
357	memcpy(preDataBytes,vce->curBytes,8);
358	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
359
360	listAdd_double(last3CmprsData, vce->data);
361	#ifdef HAVE_TIMECMPR
362	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
363	decData[i] = vce->data;
364	#endif
365
366	}//end of for
367
368	int exactDataNum = exactLeadNumArray->size;
369
370	TightDataPointStorageD* tdps;
371
372	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
373	type, exactMidByteArray->array, exactMidByteArray->size,
374	exactLeadNumArray->array,
375	resiBitArray->array, resiBitArray->size,
376	resiBitsLength,
377	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
378
379	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
380	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
381
382	//free memory
383	free_DIA(exactLeadNumArray);
384	free_DIA(resiBitArray);
385	free(type);
386	free(vce);
387	free(lce);
388	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
389
390	return tdps;
391	}
392
393	void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, TightDataPointStorageD* tdps,
394	unsigned char** newByteData, size_t *outSize)
395	{
396	int doubleSize = sizeof(double);
397	size_t k = 0, i;
398	tdps->isLossless = 1;
399	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength;
400	newByteData = (unsigned char)malloc(totalByteLength);
401
402	unsigned char dsLengthBytes[8];
403	for (i = 0; i < 3; i++)//3
404	(*newByteData)[k++] = versionNumber[i];
405
406	if(exe_params->SZ_SIZE_TYPE==4)//1
407	(*newByteData)[k++] = 16; //00010000
408	else
409	(*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8
410
411	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
412	k = k + MetaDataByteLength;
413
414	sizeToBytes(dsLengthBytes,dataLength);
415	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8
416	(*newByteData)[k++] = dsLengthBytes[i];
417
418	if(sysEndianType==BIG_ENDIAN_SYSTEM)
419	memcpy((newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLengthdoubleSize);
420	else
421	{
422	unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
423	for(i=0;i<dataLength;i++,p+=doubleSize)
424	doubleToBytes(p, oriData[i]);
425	}
426	*outSize = totalByteLength;
427	}
428
429
430	char SZ_compress_args_double_NoCkRngeNoGzip_1D(unsigned char** newByteData, double *oriData,
431	size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d)
432	{
433	char compressionType = 0;
434	TightDataPointStorageD* tdps = NULL;
435	#ifdef HAVE_TIMECMPR
436	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
437	{
438	int timestep = sz_tsc->currentStep;
439	if(timestep % confparams_cpr->snapshotCmprStep != 0)
440	{
441	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
442	compressionType = 1; //time-series based compression
443	}
444	else
445	{
446	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
447	compressionType = 0; //snapshot-based compression
448	multisteps->lastSnapshotStep = timestep;
449	}
450	}
451	else
452	#endif
453	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
454
455	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
456
457	if(outSize>dataLengthsizeof(double))
458	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
459
460	free_TightDataPointStorageD(tdps);
461	return compressionType;
462	}
463
464	TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_d)
465	{
466	#ifdef HAVE_TIMECMPR
467	double* decData = NULL;
468	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
469	decData = (double*)(multisteps->hist_data);
470	#endif
471
472	unsigned int quantization_intervals;
473	if(exe_params->optQuantMode==1)
474	{
475	quantization_intervals = optimize_intervals_double_2D_opt(oriData, r1, r2, realPrecision);
476	updateQuantizationInfo(quantization_intervals);
477	}
478	else
479	quantization_intervals = exe_params->intvCapacity;
480	size_t i,j;
481	int reqLength;
482	double pred1D, pred2D;
483	double diff = 0.0;
484	double itvNum = 0;
485	double P0, P1;
486
487	size_t dataLength = r1*r2;
488
489	P0 = (double)malloc(r2sizeof(double));
490	memset(P0, 0, r2*sizeof(double));
491	P1 = (double)malloc(r2sizeof(double));
492	memset(P1, 0, r2*sizeof(double));
493
494	double medianValue = medianValue_d;
495	short radExpo = getExponent_double(valueRangeSize/2);
496	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
497
498	int* type = (int) malloc(dataLengthsizeof(int));
499	//type[dataLength]=0;
500
501	double* spaceFillingValue = oriData; //
502
503	DynamicIntArray *exactLeadNumArray;
504	new_DIA(&exactLeadNumArray, DynArrayInitLen);
505
506	DynamicByteArray *exactMidByteArray;
507	new_DBA(&exactMidByteArray, DynArrayInitLen);
508
509	DynamicIntArray *resiBitArray;
510	new_DIA(&resiBitArray, DynArrayInitLen);
511
512	type[0] = 0;
513
514	unsigned char preDataBytes[8];
515	longToBytes_bigEndian(preDataBytes, 0);
516
517	int reqBytesLength = reqLength/8;
518	int resiBitsLength = reqLength%8;
519
520	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
521	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
522
523	/* Process Row-0 data 0*/
524	type[0] = 0;
525	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
526	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
527	memcpy(preDataBytes,vce->curBytes,8);
528	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
529	P1[0] = vce->data;
530	#ifdef HAVE_TIMECMPR
531	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
532	decData[0] = vce->data;
533	#endif
534
535	/* Process Row-0 data 1*/
536	pred1D = P1[0];
537	diff = spaceFillingValue[1] - pred1D;
538
539	itvNum = fabs(diff)/realPrecision + 1;
540
541	if (itvNum < exe_params->intvCapacity)
542	{
543	if (diff < 0) itvNum = -itvNum;
544	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
545	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
546	}
547	else
548	{
549	type[1] = 0;
550	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
551	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
552	memcpy(preDataBytes,vce->curBytes,8);
553	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
554	P1[1] = vce->data;
555	}
556	#ifdef HAVE_TIMECMPR
557	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
558	decData[1] = P1[1];
559	#endif
560
561	/* Process Row-0 data 2 --> data r2-1 */
562	for (j = 2; j < r2; j++)
563	{
564	pred1D = 2*P1[j-1] - P1[j-2];
565	diff = spaceFillingValue[j] - pred1D;
566
567	itvNum = fabs(diff)/realPrecision + 1;
568
569	if (itvNum < exe_params->intvCapacity)
570	{
571	if (diff < 0) itvNum = -itvNum;
572	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
573	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
574	}
575	else
576	{
577	type[j] = 0;
578	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
579	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
580	memcpy(preDataBytes,vce->curBytes,8);
581	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
582	P1[j] = vce->data;
583	}
584	#ifdef HAVE_TIMECMPR
585	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
586	decData[j] = P1[j];
587	#endif
588	}
589
590	/* Process Row-1 --> Row-r1-1 */
591	size_t index;
592	for (i = 1; i < r1; i++)
593	{
594	/* Process row-i data 0 */
595	index = i*r2;
596	pred1D = P1[0];
597	diff = spaceFillingValue[index] - pred1D;
598
599	itvNum = fabs(diff)/realPrecision + 1;
600
601	if (itvNum < exe_params->intvCapacity)
602	{
603	if (diff < 0) itvNum = -itvNum;
604	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
605	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
606	}
607	else
608	{
609	type[index] = 0;
610	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
611	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
612	memcpy(preDataBytes,vce->curBytes,8);
613	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
614	P0[0] = vce->data;
615	}
616	#ifdef HAVE_TIMECMPR
617	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
618	decData[index] = P0[0];
619	#endif
620
621	/* Process row-i data 1 --> r2-1*/
622	for (j = 1; j < r2; j++)
623	{
624	index = i*r2+j;
625	pred2D = P0[j-1] + P1[j] - P1[j-1];
626
627	diff = spaceFillingValue[index] - pred2D;
628
629	itvNum = fabs(diff)/realPrecision + 1;
630
631	if (itvNum < exe_params->intvCapacity)
632	{
633	if (diff < 0) itvNum = -itvNum;
634	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
635	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
636	}
637	else
638	{
639	type[index] = 0;
640	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
641	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
642	memcpy(preDataBytes,vce->curBytes,8);
643	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
644	P0[j] = vce->data;
645	}
646	#ifdef HAVE_TIMECMPR
647	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
648	decData[index] = P0[j];
649	#endif
650	}
651
652	double *Pt;
653	Pt = P1;
654	P1 = P0;
655	P0 = Pt;
656	}
657
658	if(r2!=1)
659	free(P0);
660	free(P1);
661	size_t exactDataNum = exactLeadNumArray->size;
662
663	TightDataPointStorageD* tdps;
664
665	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
666	type, exactMidByteArray->array, exactMidByteArray->size,
667	exactLeadNumArray->array,
668	resiBitArray->array, resiBitArray->size,
669	resiBitsLength,
670	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
671
672	/* int sum =0;
673	for(i=0;i<dataLength;i++)
674	if(type[i]==0) sum++;
675	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);
676
677	for(i=0;i<dataLength;i++)
678	printf("%d ", type[i]);
679	printf("\n");*/
680
681	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
682	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
683
684	// for(i = 3800;i<3844;i++)
685	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
686
687	//free memory
688	free_DIA(exactLeadNumArray);
689	free_DIA(resiBitArray);
690	free(type);
691	free(vce);
692	free(lce);
693	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
694
695	return tdps;
696	}
697
698	/**
699	*
700	* Note: @r1 is high dimension
701	* @r2 is low dimension
702	* */
703	char SZ_compress_args_double_NoCkRngeNoGzip_2D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
704	{
705	size_t dataLength = r1*r2;
706	char compressionType = 0;
707	TightDataPointStorageD* tdps = NULL;
708	#ifdef HAVE_TIMECMPR
709	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
710	{
711	int timestep = sz_tsc->currentStep;
712	if(timestep % confparams_cpr->snapshotCmprStep != 0)
713	{
714	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
715	compressionType = 1; //time-series based compression
716	}
717	else
718	{
719	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
720	compressionType = 0; //snapshot-based compression
721	multisteps->lastSnapshotStep = timestep;
722	}
723	}
724	else
725	#endif
726	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
727
728	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
729
730	if(outSize>dataLengthsizeof(double))
731	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
732
733	free_TightDataPointStorageD(tdps);
734	return compressionType;
735	}
736
737	TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_d)
738	{
739	#ifdef HAVE_TIMECMPR
740	double* decData = NULL;
741	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
742	decData = (double*)(multisteps->hist_data);
743	#endif
744
745	unsigned int quantization_intervals;
746	if(exe_params->optQuantMode==1)
747	{
748	quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision);
749	updateQuantizationInfo(quantization_intervals);
750	}
751	else
752	quantization_intervals = exe_params->intvCapacity;
753	size_t i,j,k;
754	int reqLength;
755	double pred1D, pred2D, pred3D;
756	double diff = 0.0;
757	double itvNum = 0;
758	double P0, P1;
759
760	size_t dataLength = r1r2r3;
761
762	size_t r23 = r2*r3;
763
764	P0 = (double)malloc(r23sizeof(double));
765	P1 = (double)malloc(r23sizeof(double));
766
767	double medianValue = medianValue_d;
768	short radExpo = getExponent_double(valueRangeSize/2);
769	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
770
771	int* type = (int) malloc(dataLengthsizeof(int));
772	//type[dataLength]=0;
773
774	double* spaceFillingValue = oriData; //
775
776	DynamicIntArray *exactLeadNumArray;
777	new_DIA(&exactLeadNumArray, DynArrayInitLen);
778
779	DynamicByteArray *exactMidByteArray;
780	new_DBA(&exactMidByteArray, DynArrayInitLen);
781
782	DynamicIntArray *resiBitArray;
783	new_DIA(&resiBitArray, DynArrayInitLen);
784
785	type[0] = 0;
786
787	unsigned char preDataBytes[8];
788	longToBytes_bigEndian(preDataBytes, 0);
789
790	int reqBytesLength = reqLength/8;
791	int resiBitsLength = reqLength%8;
792
793	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
794	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
795
796
797	/////////////////////////// Process layer-0 ///////////////////////////
798	/* Process Row-0 data 0*/
799	type[0] = 0;
800	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
801	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
802	memcpy(preDataBytes,vce->curBytes,8);
803	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
804	P1[0] = vce->data;
805	#ifdef HAVE_TIMECMPR
806	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
807	decData[0] = P1[0];
808	#endif
809
810	/* Process Row-0 data 1*/
811	pred1D = P1[0];
812	diff = spaceFillingValue[1] - pred1D;
813
814	itvNum = fabs(diff)/realPrecision + 1;
815
816	if (itvNum < exe_params->intvCapacity)
817	{
818	if (diff < 0) itvNum = -itvNum;
819	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
820	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
821	}
822	else
823	{
824	type[1] = 0;
825	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
826	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
827	memcpy(preDataBytes,vce->curBytes,8);
828	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
829	P1[1] = vce->data;
830	}
831	#ifdef HAVE_TIMECMPR
832	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
833	decData[1] = P1[1];
834	#endif
835
836	/* Process Row-0 data 2 --> data r3-1 */
837	for (j = 2; j < r3; j++)
838	{
839	pred1D = 2*P1[j-1] - P1[j-2];
840	diff = spaceFillingValue[j] - pred1D;
841
842	itvNum = fabs(diff)/realPrecision + 1;
843
844	if (itvNum < exe_params->intvCapacity)
845	{
846	if (diff < 0) itvNum = -itvNum;
847	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
848	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
849	}
850	else
851	{
852	type[j] = 0;
853	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
854	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
855	memcpy(preDataBytes,vce->curBytes,8);
856	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
857	P1[j] = vce->data;
858	}
859	#ifdef HAVE_TIMECMPR
860	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
861	decData[j] = P1[j];
862	#endif
863	}
864
865	/* Process Row-1 --> Row-r2-1 */
866	size_t index;
867	for (i = 1; i < r2; i++)
868	{
869	/* Process row-i data 0 */
870	index = i*r3;
871	pred1D = P1[index-r3];
872	diff = spaceFillingValue[index] - pred1D;
873
874	itvNum = fabs(diff)/realPrecision + 1;
875
876	if (itvNum < exe_params->intvCapacity)
877	{
878	if (diff < 0) itvNum = -itvNum;
879	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
880	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
881	}
882	else
883	{
884	type[index] = 0;
885	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
886	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
887	memcpy(preDataBytes,vce->curBytes,8);
888	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
889	P1[index] = vce->data;
890	}
891	#ifdef HAVE_TIMECMPR
892	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
893	decData[index] = P1[index];
894	#endif
895
896	/* Process row-i data 1 --> data r3-1*/
897	for (j = 1; j < r3; j++)
898	{
899	index = i*r3+j;
900	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
901
902	diff = spaceFillingValue[index] - pred2D;
903
904	itvNum = fabs(diff)/realPrecision + 1;
905
906	if (itvNum < exe_params->intvCapacity)
907	{
908	if (diff < 0) itvNum = -itvNum;
909	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
910	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
911	}
912	else
913	{
914	type[index] = 0;
915	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
916	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
917	memcpy(preDataBytes,vce->curBytes,8);
918	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
919	P1[index] = vce->data;
920	}
921	#ifdef HAVE_TIMECMPR
922	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
923	decData[index] = P1[index];
924	#endif
925	}
926	}
927
928
929	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
930
931	for (k = 1; k < r1; k++)
932	{
933	/* Process Row-0 data 0*/
934	index = k*r23;
935	pred1D = P1[0];
936	diff = spaceFillingValue[index] - pred1D;
937
938	itvNum = fabs(diff)/realPrecision + 1;
939
940	if (itvNum < exe_params->intvCapacity)
941	{
942	if (diff < 0) itvNum = -itvNum;
943	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
944	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
945	}
946	else
947	{
948	type[index] = 0;
949	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
950	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
951	memcpy(preDataBytes,vce->curBytes,8);
952	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
953	P0[0] = vce->data;
954	}
955	#ifdef HAVE_TIMECMPR
956	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
957	decData[index] = P0[0];
958	#endif
959
960	/* Process Row-0 data 1 --> data r3-1 */
961	for (j = 1; j < r3; j++)
962	{
963	//index = kr2r3+j;
964	index ++;
965	pred2D = P0[j-1] + P1[j] - P1[j-1];
966	diff = spaceFillingValue[index] - pred2D;
967
968	itvNum = fabs(diff)/realPrecision + 1;
969
970	if (itvNum < exe_params->intvCapacity)
971	{
972	if (diff < 0) itvNum = -itvNum;
973	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
974	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
975	}
976	else
977	{
978	type[index] = 0;
979	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
980	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
981	memcpy(preDataBytes,vce->curBytes,8);
982	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
983	P0[j] = vce->data;
984	}
985	#ifdef HAVE_TIMECMPR
986	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
987	decData[index] = P0[j];
988	#endif
989	}
990
991	/* Process Row-1 --> Row-r2-1 */
992	size_t index2D;
993	for (i = 1; i < r2; i++)
994	{
995	/* Process Row-i data 0 */
996	index = kr23 + ir3;
997	index2D = i*r3;
998	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
999	diff = spaceFillingValue[index] - pred2D;
1000
1001	itvNum = fabs(diff)/realPrecision + 1;
1002
1003	if (itvNum < exe_params->intvCapacity)
1004	{
1005	if (diff < 0) itvNum = -itvNum;
1006	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1007	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1008	}
1009	else
1010	{
1011	type[index] = 0;
1012	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1013	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1014	memcpy(preDataBytes,vce->curBytes,8);
1015	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1016	P0[index2D] = vce->data;
1017	}
1018	#ifdef HAVE_TIMECMPR
1019	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1020	decData[index] = P0[index2D];
1021	#endif
1022
1023	/* Process Row-i data 1 --> data r3-1 */
1024	for (j = 1; j < r3; j++)
1025	{
1026	//index = kr2r3 + i*r3 + j;
1027	index ++;
1028	index2D = i*r3 + j;
1029	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
1030	diff = spaceFillingValue[index] - pred3D;
1031
1032	itvNum = fabs(diff)/realPrecision + 1;
1033
1034	if (itvNum < exe_params->intvCapacity)
1035	{
1036	if (diff < 0) itvNum = -itvNum;
1037	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1038	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1039	}
1040	else
1041	{
1042	type[index] = 0;
1043	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1044	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1045	memcpy(preDataBytes,vce->curBytes,8);
1046	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1047	P0[index2D] = vce->data;
1048	}
1049	#ifdef HAVE_TIMECMPR
1050	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1051	decData[index] = P0[index2D];
1052	#endif
1053	}
1054	}
1055
1056	double *Pt;
1057	Pt = P1;
1058	P1 = P0;
1059	P0 = Pt;
1060	}
1061	if(r23!=1)
1062	free(P0);
1063	free(P1);
1064	size_t exactDataNum = exactLeadNumArray->size;
1065
1066	TightDataPointStorageD* tdps;
1067
1068	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
1069	type, exactMidByteArray->array, exactMidByteArray->size,
1070	exactLeadNumArray->array,
1071	resiBitArray->array, resiBitArray->size,
1072	resiBitsLength,
1073	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1074
1075	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
1076	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
1077
1078	// for(i = 3800;i<3844;i++)
1079	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
1080
1081	//free memory
1082	free_DIA(exactLeadNumArray);
1083	free_DIA(resiBitArray);
1084	free(type);
1085	free(vce);
1086	free(lce);
1087	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1088
1089	return tdps;
1090	}
1091
1092
1093	char SZ_compress_args_double_NoCkRngeNoGzip_3D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
1094	{
1095	size_t dataLength = r1r2r3;
1096	char compressionType = 0;
1097	TightDataPointStorageD* tdps = NULL;
1098	#ifdef HAVE_TIMECMPR
1099	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1100	{
1101	int timestep = sz_tsc->currentStep;
1102	if(timestep % confparams_cpr->snapshotCmprStep != 0)
1103	{
1104	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
1105	compressionType = 1; //time-series based compression
1106	}
1107	else
1108	{
1109	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
1110	compressionType = 0; //snapshot-based compression
1111	multisteps->lastSnapshotStep = timestep;
1112	}
1113	}
1114	else
1115	#endif
1116	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
1117
1118	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
1119
1120	if(outSize>dataLengthsizeof(double))
1121	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1122
1123	free_TightDataPointStorageD(tdps);
1124	return compressionType;
1125	}
1126
1127	TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, double valueRangeSize, double medianValue_d)
1128	{
1129	unsigned int quantization_intervals;
1130	if(exe_params->optQuantMode==1)
1131	{
1132	quantization_intervals = optimize_intervals_double_4D(oriData, r1, r2, r3, r4, realPrecision);
1133	updateQuantizationInfo(quantization_intervals);
1134	}
1135	else
1136	quantization_intervals = exe_params->intvCapacity;
1137
1138	size_t i,j,k;
1139	int reqLength;
1140	double pred1D, pred2D, pred3D;
1141	double diff = 0.0;
1142	double itvNum = 0;
1143	double P0, P1;
1144
1145	size_t dataLength = r1r2r3*r4;
1146
1147	size_t r234 = r2r3r4;
1148	size_t r34 = r3*r4;
1149
1150	P0 = (double)malloc(r34sizeof(double));
1151	P1 = (double)malloc(r34sizeof(double));
1152
1153	double medianValue = medianValue_d;
1154	short radExpo = getExponent_double(valueRangeSize/2);
1155	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
1156
1157	int* type = (int) malloc(dataLengthsizeof(int));
1158
1159	double* spaceFillingValue = oriData; //
1160
1161	DynamicIntArray *exactLeadNumArray;
1162	new_DIA(&exactLeadNumArray, DynArrayInitLen);
1163
1164	DynamicByteArray *exactMidByteArray;
1165	new_DBA(&exactMidByteArray, DynArrayInitLen);
1166
1167	DynamicIntArray *resiBitArray;
1168	new_DIA(&resiBitArray, DynArrayInitLen);
1169
1170	unsigned char preDataBytes[8];
1171	longToBytes_bigEndian(preDataBytes, 0);
1172
1173	int reqBytesLength = reqLength/8;
1174	int resiBitsLength = reqLength%8;
1175
1176	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
1177	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
1178
1179
1180	size_t l;
1181	for (l = 0; l < r1; l++)
1182	{
1183
1184	/////////////////////////// Process layer-0 ///////////////////////////
1185	/* Process Row-0 data 0*/
1186	size_t index = l*r234;
1187	size_t index2D = 0;
1188
1189	type[index] = 0;
1190	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1191	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1192	memcpy(preDataBytes,vce->curBytes,8);
1193	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1194	P1[index2D] = vce->data;
1195
1196	/* Process Row-0 data 1*/
1197	index = l*r234+1;
1198	index2D = 1;
1199
1200	pred1D = P1[index2D-1];
1201	diff = spaceFillingValue[index] - pred1D;
1202
1203	itvNum = fabs(diff)/realPrecision + 1;
1204
1205	if (itvNum < exe_params->intvCapacity)
1206	{
1207	if (diff < 0) itvNum = -itvNum;
1208	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1209	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1210	}
1211	else
1212	{
1213	type[index] = 0;
1214	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1215	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1216	memcpy(preDataBytes,vce->curBytes,8);
1217	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1218	P1[index2D] = vce->data;
1219	}
1220
1221	/* Process Row-0 data 2 --> data r4-1 */
1222	for (j = 2; j < r4; j++)
1223	{
1224	index = l*r234+j;
1225	index2D = j;
1226
1227	pred1D = 2*P1[index2D-1] - P1[index2D-2];
1228	diff = spaceFillingValue[index] - pred1D;
1229
1230	itvNum = fabs(diff)/realPrecision + 1;
1231
1232	if (itvNum < exe_params->intvCapacity)
1233	{
1234	if (diff < 0) itvNum = -itvNum;
1235	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1236	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1237	}
1238	else
1239	{
1240	type[index] = 0;
1241	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1242	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1243	memcpy(preDataBytes,vce->curBytes,8);
1244	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1245	P1[index2D] = vce->data;
1246	}
1247	}
1248
1249	/* Process Row-1 --> Row-r3-1 */
1250	for (i = 1; i < r3; i++)
1251	{
1252	/* Process row-i data 0 */
1253	index = lr234+ir4;
1254	index2D = i*r4;
1255
1256	pred1D = P1[index2D-r4];
1257	diff = spaceFillingValue[index] - pred1D;
1258
1259	itvNum = fabs(diff)/realPrecision + 1;
1260
1261	if (itvNum < exe_params->intvCapacity)
1262	{
1263	if (diff < 0) itvNum = -itvNum;
1264	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1265	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1266	}
1267	else
1268	{
1269	type[index] = 0;
1270	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1271	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1272	memcpy(preDataBytes,vce->curBytes,8);
1273	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1274	P1[index2D] = vce->data;
1275	}
1276
1277	/* Process row-i data 1 --> data r4-1*/
1278	for (j = 1; j < r4; j++)
1279	{
1280	index = lr234+ir4+j;
1281	index2D = i*r4+j;
1282
1283	pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1];
1284
1285	diff = spaceFillingValue[index] - pred2D;
1286
1287	itvNum = fabs(diff)/realPrecision + 1;
1288
1289	if (itvNum < exe_params->intvCapacity)
1290	{
1291	if (diff < 0) itvNum = -itvNum;
1292	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1293	P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1294	}
1295	else
1296	{
1297	type[index] = 0;
1298	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1299	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1300	memcpy(preDataBytes,vce->curBytes,8);
1301	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1302	P1[index2D] = vce->data;
1303	}
1304	}
1305	}
1306
1307
1308	/////////////////////////// Process layer-1 --> layer-r2-1 ///////////////////////////
1309
1310	for (k = 1; k < r2; k++)
1311	{
1312	/* Process Row-0 data 0*/
1313	index = lr234+kr34;
1314	index2D = 0;
1315
1316	pred1D = P1[index2D];
1317	diff = spaceFillingValue[index] - pred1D;
1318
1319	itvNum = fabs(diff)/realPrecision + 1;
1320
1321	if (itvNum < exe_params->intvCapacity)
1322	{
1323	if (diff < 0) itvNum = -itvNum;
1324	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1325	P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1326	}
1327	else
1328	{
1329	type[index] = 0;
1330	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1331	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1332	memcpy(preDataBytes,vce->curBytes,8);
1333	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1334	P0[index2D] = vce->data;
1335	}
1336
1337
1338	/* Process Row-0 data 1 --> data r4-1 */
1339	for (j = 1; j < r4; j++)
1340	{
1341	index = lr234+kr34+j;
1342	index2D = j;
1343
1344	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
1345	diff = spaceFillingValue[index] - pred2D;
1346
1347	itvNum = fabs(diff)/realPrecision + 1;
1348
1349	if (itvNum < exe_params->intvCapacity)
1350	{
1351	if (diff < 0) itvNum = -itvNum;
1352	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1353	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1354	}
1355	else
1356	{
1357	type[index] = 0;
1358	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1359	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1360	memcpy(preDataBytes,vce->curBytes,8);
1361	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1362	P0[index2D] = vce->data;
1363	}
1364	}
1365
1366	/* Process Row-1 --> Row-r3-1 */
1367	for (i = 1; i < r3; i++)
1368	{
1369	/* Process Row-i data 0 */
1370	index = lr234+kr34+i*r4;
1371	index2D = i*r4;
1372
1373	pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4];
1374	diff = spaceFillingValue[index] - pred2D;
1375
1376	itvNum = fabs(diff)/realPrecision + 1;
1377
1378	if (itvNum < exe_params->intvCapacity)
1379	{
1380	if (diff < 0) itvNum = -itvNum;
1381	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1382	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1383	}
1384	else
1385	{
1386	type[index] = 0;
1387	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1388	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1389	memcpy(preDataBytes,vce->curBytes,8);
1390	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1391	P0[index2D] = vce->data;
1392	}
1393
1394	/* Process Row-i data 1 --> data r4-1 */
1395	for (j = 1; j < r4; j++)
1396	{
1397	index = lr234+kr34+i*r4+j;
1398	index2D = i*r4+j;
1399
1400	pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1];
1401	diff = spaceFillingValue[index] - pred3D;
1402
1403
1404	itvNum = fabs(diff)/realPrecision + 1;
1405
1406	if (itvNum < exe_params->intvCapacity)
1407	{
1408	if (diff < 0) itvNum = -itvNum;
1409	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1410	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1411	}
1412	else
1413	{
1414	type[index] = 0;
1415	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1416	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1417	memcpy(preDataBytes,vce->curBytes,8);
1418	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1419	P0[index2D] = vce->data;
1420	}
1421	}
1422	}
1423
1424	double *Pt;
1425	Pt = P1;
1426	P1 = P0;
1427	P0 = Pt;
1428	}
1429	}
1430
1431	free(P0);
1432	free(P1);
1433	size_t exactDataNum = exactLeadNumArray->size;
1434
1435	TightDataPointStorageD* tdps;
1436
1437	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
1438	type, exactMidByteArray->array, exactMidByteArray->size,
1439	exactLeadNumArray->array,
1440	resiBitArray->array, resiBitArray->size,
1441	resiBitsLength,
1442	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1443
1444	//free memory
1445	free_DIA(exactLeadNumArray);
1446	free_DIA(resiBitArray);
1447	free(type);
1448	free(vce);
1449	free(lce);
1450	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1451
1452	return tdps;
1453	}
1454
1455
1456	char SZ_compress_args_double_NoCkRngeNoGzip_4D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
1457	{
1458	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_d);
1459
1460	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
1461
1462	size_t dataLength = r1r2r3*r4;
1463	if(outSize>dataLengthsizeof(double))
1464	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1465
1466	free_TightDataPointStorageD(tdps);
1467	return 0;
1468	}
1469
1470	void SZ_compress_args_double_withinRange(unsigned char** newByteData, double oriData, size_t dataLength, size_t outSize)
1471	{
1472	TightDataPointStorageD* tdps = (TightDataPointStorageD*) malloc(sizeof(TightDataPointStorageD));
1473	tdps->rtypeArray = NULL;
1474	tdps->typeArray = NULL;
1475	tdps->leadNumArray = NULL;
1476	tdps->residualMidBits = NULL;
1477
1478	tdps->allSameData = 1;
1479	tdps->dataSeriesLength = dataLength;
1480	tdps->exactMidBytes = (unsigned char)malloc(sizeof(unsigned char)8);
1481	tdps->pwrErrBoundBytes = NULL;
1482	tdps->isLossless = 0;
1483	double value = oriData[0];
1484	doubleToBytes(tdps->exactMidBytes, value);
1485	tdps->exactMidBytes_size = 8;
1486
1487	size_t tmpOutSize;
1488	//unsigned char *tmpByteData;
1489	convertTDPStoFlatBytes_double(tdps, newByteData, &tmpOutSize);
1490	//convertTDPStoFlatBytes_double(tdps, &tmpByteData, &tmpOutSize);
1491
1492	//newByteData = (unsigned char)malloc(sizeof(unsigned char)*16); //for floating-point data (1+3+4+4)
1493	//memcpy(*newByteData, tmpByteData, 16);
1494	*outSize = tmpOutSize;//12==3+1+8(double_size)+MetaDataByteLength
1495	free_TightDataPointStorageD(tdps);
1496	}
1497
1498	int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData,
1499	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1500	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
1501	{
1502	int status = SZ_SCES;
1503	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1504	double valueRangeSize = 0, medianValue = 0;
1505
1506	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
1507	double max = min+valueRangeSize;
1508	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1509
1510	if(valueRangeSize <= realPrecision)
1511	{
1512	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
1513	}
1514	else
1515	{
1516	if(r5==0&&r4==0&&r3==0&&r2==0)
1517	{
1518	if(errBoundMode>=PW_REL)
1519	{
1520	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
1521	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
1522	}
1523	else
1524	SZ_compress_args_double_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
1525	}
1526	else if(r5==0&&r4==0&&r3==0)
1527	{
1528	if(errBoundMode>=PW_REL)
1529	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(newByteData, oriData, realPrecision, r2, r1, outSize, min, max);
1530	else
1531	SZ_compress_args_double_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1532	}
1533	else if(r5==0&&r4==0)
1534	{
1535	if(errBoundMode>=PW_REL)
1536	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max);
1537	else
1538	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1539	}
1540	else if(r5==0)
1541	{
1542	if(errBoundMode>=PW_REL)
1543	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max);
1544	else
1545	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1546	}
1547	}
1548	return status;
1549	}
1550
1551	int SZ_compress_args_double(unsigned char** newByteData, double *oriData,
1552	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1553	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
1554	{
1555	confparams_cpr->errorBoundMode = errBoundMode;
1556	if(errBoundMode==PW_REL)
1557	{
1558	confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
1559	//confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
1560	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE && r3 != 0 )
1561	{
1562	printf("Error: Current version doesn't support 3D data compression with point-wise relative error bound being based on pwrType=AVG\n");
1563	exit(0);
1564	return SZ_NSCS;
1565	}
1566	}
1567
1568	int status = SZ_SCES;
1569	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1570
1571	if(dataLength <= MIN_NUM_OF_ELEMENTS)
1572	{
1573	*newByteData = SZ_skip_compress_double(oriData, dataLength, outSize);
1574	return status;
1575	}
1576
1577	double valueRangeSize = 0, medianValue = 0;
1578
1579	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
1580	double max = min+valueRangeSize;
1581
1582	double realPrecision = 0;
1583
1584	if(confparams_cpr->errorBoundMode==PSNR)
1585	{
1586	confparams_cpr->errorBoundMode = ABS;
1587	realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, valueRangeSize);
1588	}
1589	else
1590	realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1591
1592	if(valueRangeSize <= realPrecision)
1593	{
1594	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
1595	}
1596	else
1597	{
1598	size_t tmpOutSize = 0;
1599	unsigned char* tmpByteData;
1600	if (r2==0)
1601	{
1602	if(confparams_cpr->errorBoundMode>=PW_REL)
1603	{
1604	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
1605	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
1606	}
1607	else
1608	#ifdef HAVE_TIMECMPR
1609	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1610	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1611	else
1612	#endif
1613	SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1614	}
1615	else
1616	if (r3==0)
1617	{
1618	if(confparams_cpr->errorBoundMode>=PW_REL)
1619	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
1620	else
1621	#ifdef HAVE_TIMECMPR
1622	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1623	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1624	else
1625	#endif
1626	{
1627	if(sz_with_regression == SZ_NO_REGRESSION)
1628	SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1629	else
1630	tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
1631	}
1632	}
1633	else
1634	if (r4==0)
1635	{
1636	if(confparams_cpr->errorBoundMode>=PW_REL)
1637	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
1638	else
1639	#ifdef HAVE_TIMECMPR
1640	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1641	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1642	else
1643	#endif
1644	{
1645	if(sz_with_regression == SZ_NO_REGRESSION)
1646	SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1647	else
1648	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
1649	}
1650
1651
1652	}
1653	else
1654	if (r5==0)
1655	{
1656	if(confparams_cpr->errorBoundMode>=PW_REL)
1657	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
1658	else
1659	#ifdef HAVE_TIMECMPR
1660	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1661	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1662	else
1663	#endif
1664	{
1665	if(sz_with_regression == SZ_NO_REGRESSION)
1666	SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1667	else
1668	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
1669	}
1670
1671	}
1672	else
1673	{
1674	printf("Error: doesn't support 5 dimensions for now.\n");
1675	status = SZ_DERR;
1676	}
1677
1678	//Call Gzip to do the further compression.
1679	if(confparams_cpr->szMode==SZ_BEST_SPEED)
1680	{
1681	*outSize = tmpOutSize;
1682	*newByteData = tmpByteData;
1683	}
1684	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1685	{
1686	*outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
1687	free(tmpByteData);
1688	}
1689	else
1690	{
1691	printf("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1692	status = SZ_MERR;
1693	}
1694	}
1695
1696	return status;
1697	}
1698
1699	//TODO
1700	int SZ_compress_args_double_subblock(unsigned char* compressedBytes, double *oriData,
1701	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
1702	size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
1703	size_t e5, size_t e4, size_t e3, size_t e2, size_t e1,
1704	size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio)
1705	{
1706	int status = SZ_SCES;
1707	double valueRangeSize = 0, medianValue = 0;
1708	computeRangeSize_double_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1);
1709
1710	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1711
1712	if(valueRangeSize <= realPrecision)
1713	{
1714	//TODO
1715	//SZ_compress_args_double_withinRange_subblock();
1716	}
1717	else
1718	{
1719	if (r2==0)
1720	{
1721	//TODO
1722	if(errBoundMode==PW_REL)
1723	{
1724	//TODO
1725	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_subblock();
1726	printf ("Current subblock version does not support point-wise relative error bound.\n");
1727	}
1728	else
1729	SZ_compress_args_double_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1);
1730	}
1731	else
1732	if (r3==0)
1733	{
1734	if(errBoundMode==PW_REL)
1735	{
1736	//TODO
1737	//SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_subblock();
1738	printf ("Current subblock version does not support point-wise relative error bound.\n");
1739	}
1740	else
1741	SZ_compress_args_double_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1);
1742	}
1743	else
1744	if (r4==0)
1745	{
1746	if(errBoundMode==PW_REL)
1747	{
1748	//TODO
1749	//SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_subblock();
1750	printf ("Current subblock version does not support point-wise relative error bound.\n");
1751	}
1752	else
1753	SZ_compress_args_double_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1);
1754	}
1755	else
1756	if (r5==0)
1757	{
1758	if(errBoundMode==PW_REL)
1759	{
1760	//TODO
1761	//SZ_compress_args_double_NoCkRngeNoGzip_4D_pwr_subblock();
1762	printf ("Current subblock version does not support point-wise relative error bound.\n");
1763	}
1764	else
1765	SZ_compress_args_double_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
1766	}
1767	else
1768	{
1769	printf("Error: doesn't support 5 dimensions for now.\n");
1770	status = SZ_DERR; //dimension error
1771	}
1772	}
1773	return status;
1774	}
1775
1776	void SZ_compress_args_double_NoCkRnge_1D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1777	size_t r1, size_t s1, size_t e1)
1778	{
1779	TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r1, s1, e1);
1780
1781	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1782	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1783	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1784	{
1785	unsigned char *tmpCompBytes;
1786	size_t tmpOutSize;
1787	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1788	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1789	free(tmpCompBytes);
1790	}
1791	else
1792	{
1793	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1794	}
1795
1796	//TODO
1797	// if(outSize>dataLengthsizeof(double))
1798	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1799
1800	free_TightDataPointStorageD(tdps);
1801	}
1802
1803	void SZ_compress_args_double_NoCkRnge_2D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1804	size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1)
1805	{
1806	TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r2, r1, s2, s1, e2, e1);
1807
1808	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1809	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1810	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1811	{
1812	unsigned char *tmpCompBytes;
1813	size_t tmpOutSize;
1814	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1815	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1816	free(tmpCompBytes);
1817	}
1818	else
1819	{
1820	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1821	}
1822
1823	//TODO
1824	// if(outSize>dataLengthsizeof(double))
1825	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1826
1827	free_TightDataPointStorageD(tdps);
1828	}
1829
1830	void SZ_compress_args_double_NoCkRnge_3D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1831	size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1)
1832	{
1833	TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r3, r2, r1, s3, s2, s1, e3, e2, e1);
1834
1835	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1836	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1837	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1838	{
1839	unsigned char *tmpCompBytes;
1840	size_t tmpOutSize;
1841	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1842	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1843	free(tmpCompBytes);
1844	}
1845	else
1846	{
1847	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1848	}
1849
1850	//TODO
1851	// if(outSize>dataLengthsizeof(double))
1852	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1853
1854	free_TightDataPointStorageD(tdps);
1855	}
1856
1857	void SZ_compress_args_double_NoCkRnge_4D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
1858	size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1)
1859	{
1860	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
1861
1862	if (confparams_cpr->szMode==SZ_BEST_SPEED)
1863	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
1864	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
1865	{
1866	unsigned char *tmpCompBytes;
1867	size_t tmpOutSize;
1868	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
1869	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
1870	free(tmpCompBytes);
1871	}
1872	else
1873	{
1874	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
1875	}
1876
1877	//TODO
1878	// if(outSize>dataLengthsizeof(double))
1879	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1880
1881	free_TightDataPointStorageD(tdps);
1882	}
1883
1884
1885	unsigned int optimize_intervals_double_1D_subblock(double *oriData, double realPrecision, size_t r1, size_t s1, size_t e1)
1886	{
1887	size_t dataLength = e1 - s1 + 1;
1888	oriData = oriData + s1;
1889
1890	size_t i = 0;
1891	unsigned long radiusIndex;
1892	double pred_value = 0, pred_err;
1893	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
1894	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
1895	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
1896	for(i=2;i<dataLength;i++)
1897	{
1898	if(i%confparams_cpr->sampleDistance==0)
1899	{
1900	pred_value = 2*oriData[i-1] - oriData[i-2];
1901	//pred_value = oriData[i-1];
1902	pred_err = fabs(pred_value - oriData[i]);
1903	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
1904	if(radiusIndex>=confparams_cpr->maxRangeRadius)
1905	radiusIndex = confparams_cpr->maxRangeRadius - 1;
1906	intervals[radiusIndex]++;
1907	}
1908	}
1909	//compute the appropriate number
1910	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
1911	size_t sum = 0;
1912	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
1913	{
1914	sum += intervals[i];
1915	if(sum>targetCount)
1916	break;
1917	}
1918
1919	if(i>=confparams_cpr->maxRangeRadius)
1920	i = confparams_cpr->maxRangeRadius-1;
1921	unsigned int accIntervals = 2*(i+1);
1922	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
1923
1924	if(powerOf2<32)
1925	powerOf2 = 32;
1926
1927	free(intervals);
1928	return powerOf2;
1929	}
1930
1931	unsigned int optimize_intervals_double_2D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
1932	{
1933	size_t R1 = e1 - s1 + 1;
1934	size_t R2 = e2 - s2 + 1;
1935
1936	size_t i,j, index;
1937	unsigned long radiusIndex;
1938	double pred_value = 0, pred_err;
1939	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
1940	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
1941	size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance;
1942	for(i=s1+1;i<=e1;i++)
1943	{
1944	for(j=s2+1;j<=e2;j++)
1945	{
1946	if((i+j)%confparams_cpr->sampleDistance==0)
1947	{
1948	index = i*r2+j;
1949	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
1950	pred_err = fabs(pred_value - oriData[index]);
1951	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
1952	if(radiusIndex>=confparams_cpr->maxRangeRadius)
1953	radiusIndex = confparams_cpr->maxRangeRadius - 1;
1954	intervals[radiusIndex]++;
1955	}
1956	}
1957	}
1958	//compute the appropriate number
1959	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
1960	size_t sum = 0;
1961	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
1962	{
1963	sum += intervals[i];
1964	if(sum>targetCount)
1965	break;
1966	}
1967	if(i>=confparams_cpr->maxRangeRadius)
1968	i = confparams_cpr->maxRangeRadius-1;
1969	unsigned int accIntervals = 2*(i+1);
1970	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
1971
1972	if(powerOf2<32)
1973	powerOf2 = 32;
1974
1975	free(intervals);
1976	return powerOf2;
1977	}
1978
1979	unsigned int optimize_intervals_double_3D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
1980	{
1981	size_t R1 = e1 - s1 + 1;
1982	size_t R2 = e2 - s2 + 1;
1983	size_t R3 = e3 - s3 + 1;
1984
1985	size_t r23 = r2*r3;
1986
1987	size_t i,j,k, index;
1988	unsigned long radiusIndex;
1989	double pred_value = 0, pred_err;
1990	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
1991	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
1992	size_t totalSampleSize = R1R2R3/confparams_cpr->sampleDistance;
1993	for(i=s1+1;i<=e1;i++)
1994	{
1995	for(j=s2+1;j<=e2;j++)
1996	{
1997	for(k=s3+1;k<=e3;k++)
1998	{
1999	if((i+j+k)%confparams_cpr->sampleDistance==0)
2000	{
2001	index = ir23+jr3+k;
2002	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
2003	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
2004	pred_err = fabs(pred_value - oriData[index]);
2005	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2006	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2007	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2008	intervals[radiusIndex]++;
2009	}
2010	}
2011
2012	}
2013	}
2014	//compute the appropriate number
2015	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2016	size_t sum = 0;
2017	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2018	{
2019	sum += intervals[i];
2020	if(sum>targetCount)
2021	break;
2022	}
2023	if(i>=confparams_cpr->maxRangeRadius)
2024	i = confparams_cpr->maxRangeRadius-1;
2025
2026	unsigned int accIntervals = 2*(i+1);
2027	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2028
2029	if(powerOf2<32)
2030	powerOf2 = 32;
2031
2032	free(intervals);
2033	return powerOf2;
2034	}
2035
2036	unsigned int optimize_intervals_double_4D_subblock(double *oriData, double realPrecision,
2037	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
2038	{
2039	size_t R1 = e1 - s1 + 1;
2040	size_t R2 = e2 - s2 + 1;
2041	size_t R3 = e3 - s3 + 1;
2042	size_t R4 = e4 - s4 + 1;
2043
2044	size_t r34 = r3*r4;
2045	size_t r234 = r2r3r4;
2046
2047	size_t i,j,k,l, index;
2048	unsigned long radiusIndex;
2049	double pred_value = 0, pred_err;
2050	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2051	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2052	size_t totalSampleSize = R1R2R3*R4/confparams_cpr->sampleDistance;
2053	for(i=s1+1;i<=e1;i++)
2054	{
2055	for(j=s2+1;j<=e2;j++)
2056	{
2057	for(k=s3+1;k<=e3;k++)
2058	{
2059	for(l=s4+1;l<=e4;l++)
2060	{
2061	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
2062	{
2063	index = ir234+jr34+k*r4+l;
2064	pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34]
2065	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
2066	pred_err = fabs(pred_value - oriData[index]);
2067	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2068	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2069	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2070	intervals[radiusIndex]++;
2071	}
2072	}
2073	}
2074
2075	}
2076	}
2077	//compute the appropriate number
2078	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2079	size_t sum = 0;
2080	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2081	{
2082	sum += intervals[i];
2083	if(sum>targetCount)
2084	break;
2085	}
2086	if(i>=confparams_cpr->maxRangeRadius)
2087	i = confparams_cpr->maxRangeRadius-1;
2088
2089	unsigned int accIntervals = 2*(i+1);
2090	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2091
2092	if(powerOf2<32)
2093	powerOf2 = 32;
2094
2095	free(intervals);
2096	return powerOf2;
2097	}
2098
2099	TightDataPointStorageD* SZ_compress_double_1D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2100	size_t r1, size_t s1, size_t e1)
2101	{
2102	size_t dataLength = e1 - s1 + 1;
2103
2104	unsigned int quantization_intervals;
2105	if(exe_params->optQuantMode==1)
2106	quantization_intervals = optimize_intervals_double_1D_subblock(oriData, realPrecision, r1, s1, e1);
2107	else
2108	quantization_intervals = exe_params->intvCapacity;
2109	updateQuantizationInfo(quantization_intervals);
2110
2111	size_t i;
2112	int reqLength;
2113	double medianValue = medianValue_d;
2114	short radExpo = getExponent_double(valueRangeSize/2);
2115
2116	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2117
2118	int* type = (int) malloc(dataLengthsizeof(int));
2119
2120	double* spaceFillingValue = oriData + s1; //
2121
2122	DynamicIntArray *exactLeadNumArray;
2123	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2124
2125	DynamicByteArray *exactMidByteArray;
2126	new_DBA(&exactMidByteArray, DynArrayInitLen);
2127
2128	DynamicIntArray *resiBitArray;
2129	new_DIA(&resiBitArray, DynArrayInitLen);
2130
2131	type[0] = 0;
2132
2133	unsigned char preDataBytes[8];
2134	longToBytes_bigEndian(preDataBytes, 0);
2135
2136	int reqBytesLength = reqLength/8;
2137	int resiBitsLength = reqLength%8;
2138	double last3CmprsData[3] = {0};
2139
2140	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2141	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2142
2143	//add the first data
2144	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2145	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2146	memcpy(preDataBytes,vce->curBytes,8);
2147	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2148	listAdd_double(last3CmprsData, vce->data);
2149
2150	//add the second data
2151	type[1] = 0;
2152	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2153	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2154	memcpy(preDataBytes,vce->curBytes,8);
2155	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2156	listAdd_double(last3CmprsData, vce->data);
2157
2158	int state;
2159	double checkRadius;
2160	double curData;
2161	double pred;
2162	double predAbsErr;
2163	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
2164	double interval = 2*realPrecision;
2165
2166	for(i=2;i<dataLength;i++)
2167	{
2168	//printf("%.30G\n",last3CmprsData[0]);
2169	curData = spaceFillingValue[i];
2170	pred = 2*last3CmprsData[0] - last3CmprsData[1];
2171	//pred = last3CmprsData[0];
2172	predAbsErr = fabs(curData - pred);
2173	if(predAbsErr<=checkRadius)
2174	{
2175	state = (predAbsErr/realPrecision+1)/2;
2176	if(curData>=pred)
2177	{
2178	type[i] = exe_params->intvRadius+state;
2179	pred = pred + state*interval;
2180	}
2181	else //curData<pred
2182	{
2183	type[i] = exe_params->intvRadius-state;
2184	pred = pred - state*interval;
2185	}
2186	listAdd_double(last3CmprsData, pred);
2187	continue;
2188	}
2189
2190	//unpredictable data processing
2191	type[i] = 0;
2192	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2193	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2194	memcpy(preDataBytes,vce->curBytes,8);
2195	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2196
2197	listAdd_double(last3CmprsData, vce->data);
2198	}//end of for
2199
2200	size_t exactDataNum = exactLeadNumArray->size;
2201
2202	TightDataPointStorageD* tdps;
2203
2204	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
2205	type, exactMidByteArray->array, exactMidByteArray->size,
2206	exactLeadNumArray->array,
2207	resiBitArray->array, resiBitArray->size,
2208	resiBitsLength,
2209	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2210
2211	//free memory
2212	free_DIA(exactLeadNumArray);
2213	free_DIA(resiBitArray);
2214	free(type);
2215	free(vce);
2216	free(lce);
2217	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2218
2219	return tdps;
2220	}
2221
2222
2223	TightDataPointStorageD* SZ_compress_double_2D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2224	size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
2225	{
2226	unsigned int quantization_intervals;
2227	if(exe_params->optQuantMode==1)
2228	{
2229	quantization_intervals = optimize_intervals_double_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2);
2230	updateQuantizationInfo(quantization_intervals);
2231	}
2232	else
2233	quantization_intervals = exe_params->intvCapacity;
2234
2235	size_t i,j;
2236	int reqLength;
2237	double pred1D, pred2D;
2238	double diff = 0.0;
2239	double itvNum = 0;
2240	double P0, P1;
2241
2242	size_t R1 = e1 - s1 + 1;
2243	size_t R2 = e2 - s2 + 1;
2244	size_t dataLength = R1*R2;
2245
2246	P0 = (double)malloc(R2sizeof(double));
2247	memset(P0, 0, R2*sizeof(double));
2248	P1 = (double)malloc(R2sizeof(double));
2249	memset(P1, 0, R2*sizeof(double));
2250
2251	double medianValue = medianValue_d;
2252	short radExpo = getExponent_double(valueRangeSize/2);
2253	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2254
2255	int* type = (int) malloc(dataLengthsizeof(int));
2256
2257	double* spaceFillingValue = oriData; //
2258
2259	DynamicIntArray *exactLeadNumArray;
2260	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2261
2262	DynamicByteArray *exactMidByteArray;
2263	new_DBA(&exactMidByteArray, DynArrayInitLen);
2264
2265	DynamicIntArray *resiBitArray;
2266	new_DIA(&resiBitArray, DynArrayInitLen);
2267
2268	unsigned char preDataBytes[8];
2269	longToBytes_bigEndian(preDataBytes, 0);
2270
2271	int reqBytesLength = reqLength/8;
2272	int resiBitsLength = reqLength%8;
2273
2274	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2275	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2276
2277	/* Process Row-s1 data s2*/
2278	size_t gIndex;
2279	size_t lIndex;
2280
2281	gIndex = s1*r2+s2;
2282	lIndex = 0;
2283
2284	type[lIndex] = 0;
2285	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2286	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2287	memcpy(preDataBytes,vce->curBytes,8);
2288	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2289	P1[0] = vce->data;
2290
2291	/* Process Row-s1 data s2+1*/
2292	gIndex = s1*r2+(s2+1);
2293	lIndex = 1;
2294
2295	pred1D = P1[0];
2296	diff = spaceFillingValue[gIndex] - pred1D;
2297
2298	itvNum = fabs(diff)/realPrecision + 1;
2299
2300	if (itvNum < exe_params->intvCapacity)
2301	{
2302	if (diff < 0) itvNum = -itvNum;
2303	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2304	P1[1] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2305	}
2306	else
2307	{
2308	type[lIndex] = 0;
2309	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2310	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2311	memcpy(preDataBytes,vce->curBytes,8);
2312	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2313	P1[1] = vce->data;
2314	}
2315
2316	/* Process Row-s1 data s2+2 --> data e2 */
2317	for (j = 2; j < R2; j++)
2318	{
2319	gIndex = s1*r2+(s2+j);
2320	lIndex = j;
2321
2322	pred1D = 2*P1[j-1] - P1[j-2];
2323	diff = spaceFillingValue[gIndex] - pred1D;
2324
2325	itvNum = fabs(diff)/realPrecision + 1;
2326
2327	if (itvNum < exe_params->intvCapacity)
2328	{
2329	if (diff < 0) itvNum = -itvNum;
2330	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2331	P1[j] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2332	}
2333	else
2334	{
2335	type[lIndex] = 0;
2336	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2337	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2338	memcpy(preDataBytes,vce->curBytes,8);
2339	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2340	P1[j] = vce->data;
2341	}
2342	}
2343
2344	/* Process Row-s1+1 --> Row-e1 */
2345	for (i = 1; i < R1; i++)
2346	{
2347	/* Process row-s1+i data s2 */
2348	gIndex = (s1+i)*r2+s2;
2349	lIndex = i*R2;
2350
2351	pred1D = P1[0];
2352	diff = spaceFillingValue[gIndex] - pred1D;
2353
2354	itvNum = fabs(diff)/realPrecision + 1;
2355
2356	if (itvNum < exe_params->intvCapacity)
2357	{
2358	if (diff < 0) itvNum = -itvNum;
2359	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2360	P0[0] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2361	}
2362	else
2363	{
2364	type[lIndex] = 0;
2365	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2366	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2367	memcpy(preDataBytes,vce->curBytes,8);
2368	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2369	P0[0] = vce->data;
2370	}
2371
2372	/* Process row-s1+i data s2+1 --> e2 */
2373	for (j = 1; j < R2; j++)
2374	{
2375	gIndex = (s1+i)*r2+(s2+j);
2376	lIndex = i*R2+j;
2377
2378	pred2D = P0[j-1] + P1[j] - P1[j-1];
2379	diff = spaceFillingValue[gIndex] - pred2D;
2380
2381	itvNum = fabs(diff)/realPrecision + 1;
2382
2383	if (itvNum < exe_params->intvCapacity)
2384	{
2385	if (diff < 0) itvNum = -itvNum;
2386	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2387	P0[j] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2388	}
2389	else
2390	{
2391	type[lIndex] = 0;
2392	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2393	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2394	memcpy(preDataBytes,vce->curBytes,8);
2395	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2396	P0[j] = vce->data;
2397	}
2398	}
2399
2400	double *Pt;
2401	Pt = P1;
2402	P1 = P0;
2403	P0 = Pt;
2404	}
2405
2406	free(P0);
2407	free(P1);
2408	size_t exactDataNum = exactLeadNumArray->size;
2409
2410	TightDataPointStorageD* tdps;
2411
2412	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
2413	type, exactMidByteArray->array, exactMidByteArray->size,
2414	exactLeadNumArray->array,
2415	resiBitArray->array, resiBitArray->size,
2416	resiBitsLength,
2417	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2418
2419	//free memory
2420	free_DIA(exactLeadNumArray);
2421	free_DIA(resiBitArray);
2422	free(type);
2423	free(vce);
2424	free(lce);
2425	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2426
2427	return tdps;
2428	}
2429
2430	TightDataPointStorageD* SZ_compress_double_3D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2431	size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
2432	{
2433	unsigned int quantization_intervals;
2434	if(exe_params->optQuantMode==1)
2435	{
2436	quantization_intervals = optimize_intervals_double_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3);
2437	updateQuantizationInfo(quantization_intervals);
2438	}
2439	else
2440	quantization_intervals = exe_params->intvCapacity;
2441
2442	size_t i,j,k;
2443	int reqLength;
2444	double pred1D, pred2D, pred3D;
2445	double diff = 0.0;
2446	double itvNum = 0;
2447	double P0, P1;
2448
2449	size_t R1 = e1 - s1 + 1;
2450	size_t R2 = e2 - s2 + 1;
2451	size_t R3 = e3 - s3 + 1;
2452	size_t dataLength = R1R2R3;
2453
2454	size_t r23 = r2*r3;
2455	size_t R23 = R2*R3;
2456
2457	P0 = (double)malloc(R23sizeof(double));
2458	P1 = (double)malloc(R23sizeof(double));
2459
2460	double medianValue = medianValue_d;
2461	short radExpo = getExponent_double(valueRangeSize/2);
2462	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2463
2464	int* type = (int) malloc(dataLengthsizeof(int));
2465
2466	double* spaceFillingValue = oriData; //
2467
2468	DynamicIntArray *exactLeadNumArray;
2469	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2470
2471	DynamicByteArray *exactMidByteArray;
2472	new_DBA(&exactMidByteArray, DynArrayInitLen);
2473
2474	DynamicIntArray *resiBitArray;
2475	new_DIA(&resiBitArray, DynArrayInitLen);
2476
2477	unsigned char preDataBytes[8];
2478	longToBytes_bigEndian(preDataBytes, 0);
2479
2480	int reqBytesLength = reqLength/8;
2481	int resiBitsLength = reqLength%8;
2482
2483	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2484	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2485
2486
2487	/////////////////////////// Process layer-s1 ///////////////////////////
2488	/* Process Row-s2 data s3*/
2489	size_t gIndex; //global index
2490	size_t lIndex; //local index
2491	size_t index2D; //local 2D index
2492
2493	gIndex = s1r23+s2r3+s3;
2494	lIndex = 0;
2495	index2D = 0;
2496
2497	type[lIndex] = 0;
2498	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2499	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2500	memcpy(preDataBytes,vce->curBytes,8);
2501	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2502	P1[index2D] = vce->data;
2503
2504	/* Process Row-s2 data s3+1*/
2505	gIndex = s1r23+s2r3+s3+1;
2506	lIndex = 1;
2507	index2D = 1;
2508
2509	pred1D = P1[index2D-1];
2510	diff = spaceFillingValue[gIndex] - pred1D;
2511
2512	itvNum = fabs(diff)/realPrecision + 1;
2513
2514	if (itvNum < exe_params->intvCapacity)
2515	{
2516	if (diff < 0) itvNum = -itvNum;
2517	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2518	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2519	}
2520	else
2521	{
2522	type[lIndex] = 0;
2523	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2524	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2525	memcpy(preDataBytes,vce->curBytes,8);
2526	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2527	P1[index2D] = vce->data;
2528	}
2529
2530	/* Process Row-s2 data s3+2 --> data e3 */
2531	for (j = 2; j < R3; j++)
2532	{
2533	gIndex = s1r23+s2r3+s3+j;
2534	lIndex = j;
2535	index2D = j;
2536
2537	pred1D = 2*P1[index2D-1] - P1[index2D-2];
2538	diff = spaceFillingValue[gIndex] - pred1D;
2539
2540	itvNum = fabs(diff)/realPrecision + 1;
2541
2542	if (itvNum < exe_params->intvCapacity)
2543	{
2544	if (diff < 0) itvNum = -itvNum;
2545	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2546	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2547	}
2548	else
2549	{
2550	type[lIndex] = 0;
2551	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2552	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2553	memcpy(preDataBytes,vce->curBytes,8);
2554	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2555	P1[index2D] = vce->data;
2556	}
2557	}
2558
2559	/* Process Row-s2+1 --> Row-e2 */
2560	for (i = 1; i < R2; i++)
2561	{
2562	/* Process row-s2+i data s3 */
2563	gIndex = s1r23+(s2+i)r3+s3;
2564	lIndex = i*R3;
2565	index2D = i*R3;
2566
2567	pred1D = P1[index2D-R3];
2568	diff = spaceFillingValue[gIndex] - pred1D;
2569
2570	itvNum = fabs(diff)/realPrecision + 1;
2571
2572	if (itvNum < exe_params->intvCapacity)
2573	{
2574	if (diff < 0) itvNum = -itvNum;
2575	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2576	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2577	}
2578	else
2579	{
2580	type[lIndex] = 0;
2581	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2582	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2583	memcpy(preDataBytes,vce->curBytes,8);
2584	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2585	P1[index2D] = vce->data;
2586	}
2587
2588	/* Process row-s2+i data s3+1 --> data e3*/
2589	for (j = 1; j < R3; j++)
2590	{
2591	gIndex = s1r23+(s2+i)r3+s3+j;
2592	lIndex = i*R3+j;
2593	index2D = i*R3+j;
2594
2595	pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1];
2596	diff = spaceFillingValue[gIndex] - pred2D;
2597
2598	itvNum = fabs(diff)/realPrecision + 1;
2599
2600	if (itvNum < exe_params->intvCapacity)
2601	{
2602	if (diff < 0) itvNum = -itvNum;
2603	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2604	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2605	}
2606	else
2607	{
2608	type[lIndex] = 0;
2609	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2610	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2611	memcpy(preDataBytes,vce->curBytes,8);
2612	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2613	P1[index2D] = vce->data;
2614	}
2615	}
2616	}
2617
2618
2619	/////////////////////////// Process layer-s1+1 --> layer-e1 ///////////////////////////
2620
2621	for (k = 1; k < R1; k++)
2622	{
2623	/* Process Row-s2 data s3*/
2624	gIndex = (s1+k)r23+s2r3+s3;
2625	lIndex = k*R23;
2626	index2D = 0;
2627
2628	pred1D = P1[index2D];
2629	diff = spaceFillingValue[gIndex] - pred1D;
2630
2631	itvNum = fabs(diff)/realPrecision + 1;
2632
2633	if (itvNum < exe_params->intvCapacity)
2634	{
2635	if (diff < 0) itvNum = -itvNum;
2636	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2637	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2638	}
2639	else
2640	{
2641	type[lIndex] = 0;
2642	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2643	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2644	memcpy(preDataBytes,vce->curBytes,8);
2645	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2646	P0[index2D] = vce->data;
2647	}
2648
2649
2650	/* Process Row-s2 data s3+1 --> data e3 */
2651	for (j = 1; j < R3; j++)
2652	{
2653	gIndex = (s1+k)r23+s2r3+s3+j;
2654	lIndex = k*R23+j;
2655	index2D = j;
2656
2657	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
2658	diff = spaceFillingValue[gIndex] - pred2D;
2659
2660	itvNum = fabs(diff)/realPrecision + 1;
2661
2662	if (itvNum < exe_params->intvCapacity)
2663	{
2664	if (diff < 0) itvNum = -itvNum;
2665	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2666	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2667	}
2668	else
2669	{
2670	type[lIndex] = 0;
2671	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2672	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2673	memcpy(preDataBytes,vce->curBytes,8);
2674	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2675	P0[index2D] = vce->data;
2676	}
2677	}
2678
2679	/* Process Row-s2+1 --> Row-e2 */
2680	for (i = 1; i < R2; i++)
2681	{
2682	/* Process Row-s2+i data s3 */
2683	gIndex = (s1+k)r23+(s2+i)r3+s3;
2684	lIndex = kR23+iR3;
2685	index2D = i*R3;
2686
2687	pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3];
2688	diff = spaceFillingValue[gIndex] - pred2D;
2689
2690	itvNum = fabs(diff)/realPrecision + 1;
2691
2692	if (itvNum < exe_params->intvCapacity)
2693	{
2694	if (diff < 0) itvNum = -itvNum;
2695	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2696	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2697	}
2698	else
2699	{
2700	type[lIndex] = 0;
2701	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2702	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2703	memcpy(preDataBytes,vce->curBytes,8);
2704	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2705	P0[index2D] = vce->data;
2706	}
2707
2708	/* Process Row-s2+i data s3+1 --> data e3 */
2709	for (j = 1; j < R3; j++)
2710	{
2711	gIndex = (s1+k)r23+(s2+i)r3+s3+j;
2712	lIndex = kR23+iR3+j;
2713	index2D = i*R3+j;
2714
2715	pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1];
2716	diff = spaceFillingValue[gIndex] - pred3D;
2717
2718	itvNum = fabs(diff)/realPrecision + 1;
2719
2720	if (itvNum < exe_params->intvCapacity)
2721	{
2722	if (diff < 0) itvNum = -itvNum;
2723	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2724	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2725	}
2726	else
2727	{
2728	type[lIndex] = 0;
2729	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2730	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2731	memcpy(preDataBytes,vce->curBytes,8);
2732	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2733	P0[index2D] = vce->data;
2734	}
2735	}
2736	}
2737
2738	double *Pt;
2739	Pt = P1;
2740	P1 = P0;
2741	P0 = Pt;
2742	}
2743
2744	free(P0);
2745	free(P1);
2746	size_t exactDataNum = exactLeadNumArray->size;
2747
2748	TightDataPointStorageD* tdps;
2749
2750	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
2751	type, exactMidByteArray->array, exactMidByteArray->size,
2752	exactLeadNumArray->array,
2753	resiBitArray->array, resiBitArray->size,
2754	resiBitsLength,
2755	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2756
2757	//free memory
2758	free_DIA(exactLeadNumArray);
2759	free_DIA(resiBitArray);
2760	free(type);
2761	free(vce);
2762	free(lce);
2763	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2764
2765	return tdps;
2766	}
2767
2768	TightDataPointStorageD* SZ_compress_double_4D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
2769	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
2770	{
2771	unsigned int quantization_intervals;
2772	if(exe_params->optQuantMode==1)
2773	{
2774	quantization_intervals = optimize_intervals_double_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4);
2775	updateQuantizationInfo(quantization_intervals);
2776	}
2777	else
2778	quantization_intervals = exe_params->intvCapacity;
2779
2780	size_t i,j,k;
2781	int reqLength;
2782	double pred1D, pred2D, pred3D;
2783	double diff = 0.0;
2784	double itvNum = 0;
2785	double P0, P1;
2786
2787	size_t R1 = e1 - s1 + 1;
2788	size_t R2 = e2 - s2 + 1;
2789	size_t R3 = e3 - s3 + 1;
2790	size_t R4 = e4 - s4 + 1;
2791
2792	size_t dataLength = R1R2R3*R4;
2793
2794	size_t r34 = r3*r4;
2795	size_t r234 = r2r3r4;
2796	size_t R34 = R3*R4;
2797	size_t R234 = R2R3R4;
2798
2799	P0 = (double)malloc(R34sizeof(double));
2800	P1 = (double)malloc(R34sizeof(double));
2801
2802	double medianValue = medianValue_d;
2803	short radExpo = getExponent_double(valueRangeSize/2);
2804	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
2805
2806	int* type = (int) malloc(dataLengthsizeof(int));
2807
2808	double* spaceFillingValue = oriData; //
2809
2810	DynamicIntArray *exactLeadNumArray;
2811	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2812
2813	DynamicByteArray *exactMidByteArray;
2814	new_DBA(&exactMidByteArray, DynArrayInitLen);
2815
2816	DynamicIntArray *resiBitArray;
2817	new_DIA(&resiBitArray, DynArrayInitLen);
2818
2819	unsigned char preDataBytes[8];
2820	longToBytes_bigEndian(preDataBytes, 0);
2821
2822	int reqBytesLength = reqLength/8;
2823	int resiBitsLength = reqLength%8;
2824
2825	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
2826	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2827
2828	size_t l;
2829	for (l = 0; l < R1; l++)
2830	{
2831
2832	/////////////////////////// Process layer-s2 ///////////////////////////
2833	/* Process Row-s3 data s4*/
2834	size_t gIndex; //global index
2835	size_t lIndex; //local index
2836	size_t index2D; //local 2D index
2837
2838	gIndex = (s1+l)r234+s2r34+s3*r4+s4;
2839	lIndex = l*R234;
2840	index2D = 0;
2841
2842	type[lIndex] = 0;
2843	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2844	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2845	memcpy(preDataBytes,vce->curBytes,8);
2846	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2847	P1[index2D] = vce->data;
2848
2849	/* Process Row-s3 data s4+1*/
2850	gIndex = (s1+l)r234+s2r34+s3*r4+s4+1;
2851	lIndex = l*R234+1;
2852	index2D = 1;
2853
2854	pred1D = P1[index2D-1];
2855	diff = spaceFillingValue[gIndex] - pred1D;
2856
2857	itvNum = fabs(diff)/realPrecision + 1;
2858
2859	if (itvNum < exe_params->intvCapacity)
2860	{
2861	if (diff < 0) itvNum = -itvNum;
2862	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2863	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2864	}
2865	else
2866	{
2867	type[lIndex] = 0;
2868	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2869	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2870	memcpy(preDataBytes,vce->curBytes,8);
2871	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2872	P1[index2D] = vce->data;
2873	}
2874
2875	/* Process Row-s3 data s4+2 --> data e4 */
2876	for (j = 2; j < R4; j++)
2877	{
2878	gIndex = (s1+l)r234+s2r34+s3*r4+s4+j;
2879	lIndex = l*R234+j;
2880	index2D = j;
2881
2882	pred1D = 2*P1[index2D-1] - P1[index2D-2];
2883	diff = spaceFillingValue[gIndex] - pred1D;
2884
2885	itvNum = fabs(diff)/realPrecision + 1;
2886
2887	if (itvNum < exe_params->intvCapacity)
2888	{
2889	if (diff < 0) itvNum = -itvNum;
2890	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2891	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2892	}
2893	else
2894	{
2895	type[lIndex] = 0;
2896	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2897	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2898	memcpy(preDataBytes,vce->curBytes,8);
2899	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2900	P1[index2D] = vce->data;
2901	}
2902	}
2903
2904	/* Process Row-s3+1 --> Row-e3 */
2905	for (i = 1; i < R3; i++)
2906	{
2907	/* Process row-s2+i data s3 */
2908	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4;
2909	lIndex = lR234+iR4;
2910	index2D = i*R4;
2911
2912	pred1D = P1[index2D-R4];
2913	diff = spaceFillingValue[gIndex] - pred1D;
2914
2915	itvNum = fabs(diff)/realPrecision + 1;
2916
2917	if (itvNum < exe_params->intvCapacity)
2918	{
2919	if (diff < 0) itvNum = -itvNum;
2920	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2921	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2922	}
2923	else
2924	{
2925	type[lIndex] = 0;
2926	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2927	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2928	memcpy(preDataBytes,vce->curBytes,8);
2929	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2930	P1[index2D] = vce->data;
2931	}
2932
2933	/* Process row-s3+i data s4+1 --> data e4*/
2934	for (j = 1; j < R4; j++)
2935	{
2936	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4+j;
2937	lIndex = lR234+iR4+j;
2938	index2D = i*R4+j;
2939
2940	pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1];
2941	diff = spaceFillingValue[gIndex] - pred2D;
2942
2943	itvNum = fabs(diff)/realPrecision + 1;
2944
2945	if (itvNum < exe_params->intvCapacity)
2946	{
2947	if (diff < 0) itvNum = -itvNum;
2948	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2949	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2950	}
2951	else
2952	{
2953	type[lIndex] = 0;
2954	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2955	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2956	memcpy(preDataBytes,vce->curBytes,8);
2957	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2958	P1[index2D] = vce->data;
2959	}
2960	}
2961	}
2962
2963
2964	/////////////////////////// Process layer-s2+1 --> layer-e2 ///////////////////////////
2965
2966	for (k = 1; k < R2; k++)
2967	{
2968	/* Process Row-s3 data s4*/
2969	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4;
2970	lIndex = lR234+kR34;
2971	index2D = 0;
2972
2973	pred1D = P1[index2D];
2974	diff = spaceFillingValue[gIndex] - pred1D;
2975
2976	itvNum = fabs(diff)/realPrecision + 1;
2977
2978	if (itvNum < exe_params->intvCapacity)
2979	{
2980	if (diff < 0) itvNum = -itvNum;
2981	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2982	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2983	}
2984	else
2985	{
2986	type[lIndex] = 0;
2987	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2988	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2989	memcpy(preDataBytes,vce->curBytes,8);
2990	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2991	P0[index2D] = vce->data;
2992	}
2993
2994
2995	/* Process Row-s3 data s4+1 --> data e4 */
2996	for (j = 1; j < R4; j++)
2997	{
2998	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4+j;
2999	lIndex = lR234+kR34+j;
3000	index2D = j;
3001
3002	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
3003	diff = spaceFillingValue[gIndex] - pred2D;
3004
3005	itvNum = fabs(diff)/realPrecision + 1;
3006
3007	if (itvNum < exe_params->intvCapacity)
3008	{
3009	if (diff < 0) itvNum = -itvNum;
3010	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3011	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3012	}
3013	else
3014	{
3015	type[lIndex] = 0;
3016	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3017	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3018	memcpy(preDataBytes,vce->curBytes,8);
3019	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3020	P0[index2D] = vce->data;
3021	}
3022	}
3023
3024	/* Process Row-s3+1 --> Row-e3 */
3025	for (i = 1; i < R3; i++)
3026	{
3027	/* Process Row-s3+i data s4 */
3028	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4;
3029	lIndex = lR234+kR34+i*R4;
3030	index2D = i*R4;
3031
3032	pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4];
3033	diff = spaceFillingValue[gIndex] - pred2D;
3034
3035	itvNum = fabs(diff)/realPrecision + 1;
3036
3037	if (itvNum < exe_params->intvCapacity)
3038	{
3039	if (diff < 0) itvNum = -itvNum;
3040	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3041	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3042	}
3043	else
3044	{
3045	type[lIndex] = 0;
3046	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3047	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3048	memcpy(preDataBytes,vce->curBytes,8);
3049	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3050	P0[index2D] = vce->data;
3051	}
3052
3053	/* Process Row-s3+i data s4+1 --> data e4 */
3054	for (j = 1; j < R4; j++)
3055	{
3056	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4+j;
3057	lIndex = lR234+kR34+i*R4+j;
3058	index2D = i*R4+j;
3059
3060	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
3061
3062	pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1];
3063	diff = spaceFillingValue[gIndex] - pred3D;
3064
3065	itvNum = fabs(diff)/realPrecision + 1;
3066
3067	if (itvNum < exe_params->intvCapacity)
3068	{
3069	if (diff < 0) itvNum = -itvNum;
3070	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3071	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3072	}
3073	else
3074	{
3075	type[lIndex] = 0;
3076	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3077	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3078	memcpy(preDataBytes,vce->curBytes,8);
3079	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3080	P0[index2D] = vce->data;
3081	}
3082	}
3083	}
3084
3085	double *Pt;
3086	Pt = P1;
3087	P1 = P0;
3088	P0 = Pt;
3089	}
3090	}
3091
3092	free(P0);
3093	free(P1);
3094	size_t exactDataNum = exactLeadNumArray->size;
3095
3096	TightDataPointStorageD* tdps;
3097
3098	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
3099	type, exactMidByteArray->array, exactMidByteArray->size,
3100	exactLeadNumArray->array,
3101	resiBitArray->array, resiBitArray->size,
3102	resiBitsLength,
3103	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
3104
3105	//free memory
3106	free_DIA(exactLeadNumArray);
3107	free_DIA(resiBitArray);
3108	free(type);
3109	free(vce);
3110	free(lce);
3111	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
3112
3113	return tdps;
3114	}
3115
3116	/**
3117	*
3118	* This is a fast implementation for optimize_intervals_double_3D()
3119	* */
3120	unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision){
3121	size_t i;
3122	size_t radiusIndex;
3123	size_t r23=r2*r3;
3124	double pred_value = 0, pred_err;
3125	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3126	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3127	size_t totalSampleSize = 0;
3128
3129	size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
3130	size_t offset_count_2;
3131	double * data_pos = oriData + r23 + r3 + offset_count;
3132	size_t n1_count = 1, n2_count = 1; // count i,j sum
3133	size_t len = r1 * r2 * r3;
3134	while(data_pos - oriData < len){
3135	totalSampleSize++;
3136	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
3137	pred_err = fabs(pred_value - *data_pos);
3138	radiusIndex = (pred_err/realPrecision+1)/2;
3139	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3140	{
3141	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3142	}
3143	intervals[radiusIndex]++;
3144	offset_count += confparams_cpr->sampleDistance;
3145	if(offset_count >= r3){
3146	n2_count ++;
3147	if(n2_count == r2){
3148	n1_count ++;
3149	n2_count = 1;
3150	data_pos += r3;
3151	}
3152	offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance;
3153	data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3154	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3155	if(offset_count == 0) offset_count ++;
3156	}
3157	else data_pos += confparams_cpr->sampleDistance;
3158	}
3159	//compute the appropriate number
3160	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3161	size_t sum = 0;
3162	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3163	{
3164	sum += intervals[i];
3165	if(sum>targetCount)
3166	break;
3167	}
3168	if(i>=confparams_cpr->maxRangeRadius)
3169	i = confparams_cpr->maxRangeRadius-1;
3170	unsigned int accIntervals = 2*(i+1);
3171	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3172
3173	if(powerOf2<32)
3174	powerOf2 = 32;
3175	free(intervals);
3176	return powerOf2;
3177	}
3178
3179	unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision)
3180	{
3181	size_t i;
3182	size_t radiusIndex;
3183	double pred_value = 0, pred_err;
3184	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3185	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3186	size_t totalSampleSize = 0;
3187
3188	size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
3189	size_t offset_count_2;
3190	double * data_pos = oriData + r2 + offset_count;
3191	size_t n1_count = 1; // count i sum
3192	size_t len = r1 * r2;
3193	while(data_pos - oriData < len){
3194	totalSampleSize++;
3195	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
3196	pred_err = fabs(pred_value - *data_pos);
3197	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3198	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3199	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3200	intervals[radiusIndex]++;
3201
3202	offset_count += confparams_cpr->sampleDistance;
3203	if(offset_count >= r2){
3204	n1_count ++;
3205	offset_count_2 = n1_count % confparams_cpr->sampleDistance;
3206	data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3207	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3208	if(offset_count == 0) offset_count ++;
3209	}
3210	else data_pos += confparams_cpr->sampleDistance;
3211	}
3212
3213	//compute the appropriate number
3214	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3215	size_t sum = 0;
3216	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3217	{
3218	sum += intervals[i];
3219	if(sum>targetCount)
3220	break;
3221	}
3222	if(i>=confparams_cpr->maxRangeRadius)
3223	i = confparams_cpr->maxRangeRadius-1;
3224	unsigned int accIntervals = 2*(i+1);
3225	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3226
3227	if(powerOf2<32)
3228	powerOf2 = 32;
3229
3230	free(intervals);
3231	return powerOf2;
3232	}
3233
3234	unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision)
3235	{
3236	size_t i = 0, radiusIndex;
3237	double pred_value = 0, pred_err;
3238	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3239	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3240	size_t totalSampleSize = 0;
3241
3242	double * data_pos = oriData + 2;
3243	while(data_pos - oriData < dataLength){
3244	totalSampleSize++;
3245	pred_value = data_pos[-1];
3246	pred_err = fabs(pred_value - *data_pos);
3247	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3248	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3249	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3250	intervals[radiusIndex]++;
3251
3252	data_pos += confparams_cpr->sampleDistance;
3253	}
3254	//compute the appropriate number
3255	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3256	size_t sum = 0;
3257	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3258	{
3259	sum += intervals[i];
3260	if(sum>targetCount)
3261	break;
3262	}
3263	if(i>=confparams_cpr->maxRangeRadius)
3264	i = confparams_cpr->maxRangeRadius-1;
3265
3266	unsigned int accIntervals = 2*(i+1);
3267	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3268
3269	if(powerOf2<32)
3270	powerOf2 = 32;
3271
3272	free(intervals);
3273	return powerOf2;
3274	}
3275
3276	/The above code is for sz 1.4.13; the following code is for sz 2.0/
3277	unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
3278	{
3279	double mean = 0.0;
3280	size_t len = r1 * r2;
3281	size_t mean_distance = (int) (sqrt(len));
3282
3283	double * data_pos = oriData;
3284	size_t mean_count = 0;
3285	while(data_pos - oriData < len){
3286	mean += *data_pos;
3287	mean_count ++;
3288	data_pos += mean_distance;
3289	}
3290	if(mean_count > 0) mean /= mean_count;
3291	size_t range = 8192;
3292	size_t radius = 4096;
3293	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
3294	memset(freq_intervals, 0, range*sizeof(size_t));
3295
3296	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
3297	int sampleDistance = confparams_cpr->sampleDistance;
3298	double predThreshold = confparams_cpr->predThreshold;
3299
3300	size_t i;
3301	size_t radiusIndex;
3302	double pred_value = 0, pred_err;
3303	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
3304	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
3305
3306	double mean_diff;
3307	ptrdiff_t freq_index;
3308	size_t freq_count = 0;
3309	size_t n1_count = 1;
3310	size_t offset_count = sampleDistance - 1;
3311	size_t offset_count_2 = 0;
3312	size_t sample_count = 0;
3313	data_pos = oriData + r2 + offset_count;
3314	while(data_pos - oriData < len){
3315	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
3316	pred_err = fabs(pred_value - *data_pos);
3317	if(pred_err < realPrecision) freq_count ++;
3318	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3319	if(radiusIndex>=maxRangeRadius)
3320	radiusIndex = maxRangeRadius - 1;
3321	intervals[radiusIndex]++;
3322
3323	mean_diff = *data_pos - mean;
3324	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
3325	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
3326	if(freq_index <= 0){
3327	freq_intervals[0] ++;
3328	}
3329	else if(freq_index >= range){
3330	freq_intervals[range - 1] ++;
3331	}
3332	else{
3333	freq_intervals[freq_index] ++;
3334	}
3335	offset_count += sampleDistance;
3336	if(offset_count >= r2){
3337	n1_count ++;
3338	offset_count_2 = n1_count % sampleDistance;
3339	data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
3340	offset_count = (sampleDistance - offset_count_2);
3341	if(offset_count == 0) offset_count ++;
3342	}
3343	else data_pos += sampleDistance;
3344	sample_count ++;
3345	}
3346	max_freq = freq_count 1.0/ sample_count;
3347
3348	//compute the appropriate number
3349	size_t targetCount = sample_count*predThreshold;
3350	size_t sum = 0;
3351	for(i=0;i<maxRangeRadius;i++)
3352	{
3353	sum += intervals[i];
3354	if(sum>targetCount)
3355	break;
3356	}
3357	if(i>=maxRangeRadius)
3358	i = maxRangeRadius-1;
3359	unsigned int accIntervals = 2*(i+1);
3360	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3361
3362	if(powerOf2<32)
3363	powerOf2 = 32;
3364
3365	// collect frequency
3366	size_t max_sum = 0;
3367	size_t max_index = 0;
3368	size_t tmp_sum;
3369	size_t * freq_pos = freq_intervals + 1;
3370	for(size_t i=1; i<range-2; i++){
3371	tmp_sum = freq_pos[0] + freq_pos[1];
3372	if(tmp_sum > max_sum){
3373	max_sum = tmp_sum;
3374	max_index = i;
3375	}
3376	freq_pos ++;
3377	}
3378	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
3379	mean_freq = max_sum 1.0 / sample_count;
3380
3381	free(freq_intervals);
3382	free(intervals);
3383	return powerOf2;
3384	}
3385
3386	unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
3387	{
3388	double mean = 0.0;
3389	size_t len = r1 * r2 * r3;
3390	size_t mean_distance = (int) (sqrt(len));
3391	double * data_pos = oriData;
3392	size_t offset_count = 0;
3393	size_t offset_count_2 = 0;
3394	size_t mean_count = 0;
3395	while(data_pos - oriData < len){
3396	mean += *data_pos;
3397	mean_count ++;
3398	data_pos += mean_distance;
3399	offset_count += mean_distance;
3400	offset_count_2 += mean_distance;
3401	if(offset_count >= r3){
3402	offset_count = 0;
3403	data_pos -= 1;
3404	}
3405	if(offset_count_2 >= r2 * r3){
3406	offset_count_2 = 0;
3407	data_pos -= 1;
3408	}
3409	}
3410	if(mean_count > 0) mean /= mean_count;
3411	size_t range = 8192;
3412	size_t radius = 4096;
3413	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
3414	memset(freq_intervals, 0, range*sizeof(size_t));
3415
3416	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
3417	int sampleDistance = confparams_cpr->sampleDistance;
3418	double predThreshold = confparams_cpr->predThreshold;
3419
3420	size_t i;
3421	size_t radiusIndex;
3422	size_t r23=r2*r3;
3423	double pred_value = 0, pred_err;
3424	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
3425	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
3426
3427	double mean_diff;
3428	ptrdiff_t freq_index;
3429	size_t freq_count = 0;
3430	size_t sample_count = 0;
3431
3432	offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
3433	data_pos = oriData + r23 + r3 + offset_count;
3434	size_t n1_count = 1, n2_count = 1; // count i,j sum
3435
3436	while(data_pos - oriData < len){
3437
3438	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
3439	pred_err = fabs(pred_value - *data_pos);
3440	if(pred_err < realPrecision) freq_count ++;
3441	radiusIndex = (pred_err/realPrecision+1)/2;
3442	if(radiusIndex>=maxRangeRadius)
3443	{
3444	radiusIndex = maxRangeRadius - 1;
3445	}
3446	intervals[radiusIndex]++;
3447
3448	mean_diff = *data_pos - mean;
3449	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
3450	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
3451	if(freq_index <= 0){
3452	freq_intervals[0] ++;
3453	}
3454	else if(freq_index >= range){
3455	freq_intervals[range - 1] ++;
3456	}
3457	else{
3458	freq_intervals[freq_index] ++;
3459	}
3460	offset_count += sampleDistance;
3461	if(offset_count >= r3){
3462	n2_count ++;
3463	if(n2_count == r2){
3464	n1_count ++;
3465	n2_count = 1;
3466	data_pos += r3;
3467	}
3468	offset_count_2 = (n1_count + n2_count) % sampleDistance;
3469	data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
3470	offset_count = (sampleDistance - offset_count_2);
3471	if(offset_count == 0) offset_count ++;
3472	}
3473	else data_pos += sampleDistance;
3474	sample_count ++;
3475	}
3476	max_freq = freq_count 1.0/ sample_count;
3477
3478	//compute the appropriate number
3479	size_t targetCount = sample_count*predThreshold;
3480	size_t sum = 0;
3481	for(i=0;i<maxRangeRadius;i++)
3482	{
3483	sum += intervals[i];
3484	if(sum>targetCount)
3485	break;
3486	}
3487	if(i>=maxRangeRadius)
3488	i = maxRangeRadius-1;
3489	unsigned int accIntervals = 2*(i+1);
3490	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3491
3492	if(powerOf2<32)
3493	powerOf2 = 32;
3494	// collect frequency
3495	size_t max_sum = 0;
3496	size_t max_index = 0;
3497	size_t tmp_sum;
3498	size_t * freq_pos = freq_intervals + 1;
3499	for(size_t i=1; i<range-2; i++){
3500	tmp_sum = freq_pos[0] + freq_pos[1];
3501	if(tmp_sum > max_sum){
3502	max_sum = tmp_sum;
3503	max_index = i;
3504	}
3505	freq_pos ++;
3506	}
3507	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
3508	mean_freq = max_sum 1.0 / sample_count;
3509
3510	free(freq_intervals);
3511	free(intervals);
3512	return powerOf2;
3513	}
3514
3515	#define MIN(a, b) a<b? a : b
3516	unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, double realPrecision, size_t comp_size){
3517
3518	unsigned int quantization_intervals;
3519	double sz_sample_correct_freq = -1;//0.5; //-1
3520	double dense_pos;
3521	double mean_flush_freq;
3522	unsigned char use_mean = 0;
3523
3524	if(exe_params->optQuantMode==1)
3525	{
3526	quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
3527	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
3528	updateQuantizationInfo(quantization_intervals);
3529	}
3530	else{
3531	quantization_intervals = exe_params->intvCapacity;
3532	}
3533
3534	// calculate block dims
3535	size_t num_x, num_y;
3536	size_t block_size = 16;
3537
3538	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
3539	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
3540
3541	size_t split_index_x, split_index_y;
3542	size_t early_blockcount_x, early_blockcount_y;
3543	size_t late_blockcount_x, late_blockcount_y;
3544	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
3545	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
3546
3547	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
3548	size_t num_blocks = num_x * num_y;
3549	size_t num_elements = r1 * r2;
3550
3551	size_t dim0_offset = r2;
3552
3553	int * result_type = (int ) malloc(num_elements sizeof(int));
3554	size_t unpred_data_max_size = max_num_block_elements;
3555	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
3556	size_t total_unpred = 0;
3557	size_t unpredictable_count;
3558	double * data_pos = oriData;
3559	int * type = result_type;
3560	size_t offset_x, offset_y;
3561	size_t current_blockcount_x, current_blockcount_y;
3562
3563	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
3564	double * reg_params_pos = reg_params;
3565	// move regression part out
3566	size_t params_offset_b = num_blocks;
3567	size_t params_offset_c = 2*num_blocks;
3568	for(size_t i=0; i<num_x; i++){
3569	for(size_t j=0; j<num_y; j++){
3570	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
3571	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
3572	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
3573	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
3574
3575	data_pos = oriData + offset_x * dim0_offset + offset_y;
3576
3577	{
3578	double * cur_data_pos = data_pos;
3579	double fx = 0.0;
3580	double fy = 0.0;
3581	double f = 0;
3582	double sum_x;
3583	double curData;
3584	for(size_t i=0; i<current_blockcount_x; i++){
3585	sum_x = 0;
3586	for(size_t j=0; j<current_blockcount_y; j++){
3587	curData = *cur_data_pos;
3588	sum_x += curData;
3589	fy += curData * j;
3590	cur_data_pos ++;
3591	}
3592	fx += sum_x * i;
3593	f += sum_x;
3594	cur_data_pos += dim0_offset - current_blockcount_y;
3595	}
3596	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
3597	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
3598	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
3599	reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
3600	}
3601
3602	reg_params_pos ++;
3603	}
3604	}
3605
3606	//Compress coefficient arrays
3607	double precision_a, precision_b, precision_c;
3608	double rel_param_err = 0.15/3;
3609	precision_a = rel_param_err * realPrecision / late_blockcount_x;
3610	precision_b = rel_param_err * realPrecision / late_blockcount_y;
3611	precision_c = rel_param_err * realPrecision;
3612
3613	double mean = 0;
3614	use_mean = 0;
3615	if(use_mean){
3616	// compute mean
3617	double sum = 0.0;
3618	size_t mean_count = 0;
3619	for(size_t i=0; i<num_elements; i++){
3620	if(fabs(oriData[i] - dense_pos) < realPrecision){
3621	sum += oriData[i];
3622	mean_count ++;
3623	}
3624	}
3625	if(mean_count > 0) mean = sum / mean_count;
3626	}
3627
3628
3629	double tmp_realPrecision = realPrecision;
3630
3631	// use two prediction buffers for higher performance
3632	double * unpredictable_data = result_unpredictable_data;
3633	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
3634	memset(indicator, 0, num_blocks * sizeof(unsigned char));
3635	size_t reg_count = 0;
3636	size_t strip_dim_0 = early_blockcount_x + 1;
3637	size_t strip_dim_1 = r2 + 1;
3638	size_t strip_dim0_offset = strip_dim_1;
3639	unsigned char * indicator_pos = indicator;
3640	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
3641	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
3642	memset(prediction_buffer_1, 0, prediction_buffer_size);
3643	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
3644	memset(prediction_buffer_2, 0, prediction_buffer_size);
3645	double * cur_pb_buf = prediction_buffer_1;
3646	double * next_pb_buf = prediction_buffer_2;
3647	double * cur_pb_buf_pos;
3648	double * next_pb_buf_pos;
3649	int intvCapacity = exe_params->intvCapacity;
3650	int intvRadius = exe_params->intvRadius;
3651	int use_reg = 0;
3652
3653	reg_params_pos = reg_params;
3654	// compress the regression coefficients on the fly
3655	double last_coeffcients[3] = {0.0};
3656	int coeff_intvCapacity_sz = 65536;
3657	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
3658	int * coeff_type[3];
3659	int * coeff_result_type = (int ) malloc(num_blocks3*sizeof(int));
3660	double * coeff_unpred_data[3];
3661	double * coeff_unpredictable_data = (double ) malloc(num_blocks3*sizeof(double));
3662	double precision[3];
3663	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
3664	for(int i=0; i<3; i++){
3665	coeff_type[i] = coeff_result_type + i * num_blocks;
3666	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
3667	}
3668	int coeff_index = 0;
3669	unsigned int coeff_unpredictable_count[3] = {0};
3670	if(use_mean){
3671	type = result_type;
3672	int intvCapacity_sz = intvCapacity - 2;
3673	for(size_t i=0; i<num_x; i++){
3674	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
3675	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
3676	data_pos = oriData + offset_x * dim0_offset;
3677
3678	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
3679	next_pb_buf_pos = next_pb_buf + 1;
3680	double * pb_pos = cur_pb_buf_pos;
3681	double * next_pb_pos = next_pb_buf_pos;
3682
3683	for(size_t j=0; j<num_y; j++){
3684	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
3685	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
3686
3687	/sampling: decide which predictor to use (regression or lorenzo)/
3688	{
3689	double * cur_data_pos;
3690	double curData;
3691	double pred_reg, pred_sz;
3692	double err_sz = 0.0, err_reg = 0.0;
3693	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
3694	// [1, 9] [3, 7] [7, 3] [9, 1]
3695	int count = 0;
3696	for(int i=1; i<current_blockcount_x; i+=2){
3697	cur_data_pos = data_pos + i * dim0_offset + i;
3698	curData = *cur_data_pos;
3699	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
3700	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
3701
3702	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
3703
3704	err_reg += fabs(pred_reg - curData);
3705
3706	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
3707	curData = *cur_data_pos;
3708	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
3709	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
3710	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
3711
3712	err_reg += fabs(pred_reg - curData);
3713
3714	count += 2;
3715	}
3716
3717	use_reg = (err_reg < err_sz);
3718	}
3719	if(use_reg)
3720	{
3721	{
3722	/predict coefficients in current block via previous reg_block/
3723	double cur_coeff;
3724	double diff, itvNum;
3725	for(int e=0; e<3; e++){
3726	cur_coeff = reg_params_pos[e*num_blocks];
3727	diff = cur_coeff - last_coeffcients[e];
3728	itvNum = fabs(diff)/precision[e] + 1;
3729	if (itvNum < coeff_intvCapacity_sz){
3730	if (diff < 0) itvNum = -itvNum;
3731	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
3732	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
3733	//ganrantee comporession error against the case of machine-epsilon
3734	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
3735	coeff_type[e][coeff_index] = 0;
3736	last_coeffcients[e] = cur_coeff;
3737	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
3738	}
3739	}
3740	else{
3741	coeff_type[e][coeff_index] = 0;
3742	last_coeffcients[e] = cur_coeff;
3743	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
3744	}
3745	}
3746	coeff_index ++;
3747	}
3748	double curData;
3749	double pred;
3750	double itvNum;
3751	double diff;
3752	size_t index = 0;
3753	size_t block_unpredictable_count = 0;
3754	double * cur_data_pos = data_pos;
3755	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
3756	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
3757	curData = *cur_data_pos;
3758	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3759	diff = curData - pred;
3760	itvNum = fabs(diff)/realPrecision + 1;
3761	if (itvNum < intvCapacity){
3762	if (diff < 0) itvNum = -itvNum;
3763	type[index] = (int) (itvNum/2) + intvRadius;
3764	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3765	//ganrantee comporession error against the case of machine-epsilon
3766	if(fabs(curData - pred)>realPrecision){
3767	type[index] = 0;
3768	pred = curData;
3769	unpredictable_data[block_unpredictable_count ++] = curData;
3770	}
3771	}
3772	else{
3773	type[index] = 0;
3774	pred = curData;
3775	unpredictable_data[block_unpredictable_count ++] = curData;
3776	}
3777	index ++;
3778	cur_data_pos ++;
3779	}
3780	/dealing with the last jj (boundary)/
3781	{
3782	size_t jj = current_blockcount_y - 1;
3783	curData = *cur_data_pos;
3784	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3785	diff = curData - pred;
3786	itvNum = fabs(diff)/realPrecision + 1;
3787	if (itvNum < intvCapacity){
3788	if (diff < 0) itvNum = -itvNum;
3789	type[index] = (int) (itvNum/2) + intvRadius;
3790	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3791	//ganrantee comporession error against the case of machine-epsilon
3792	if(fabs(curData - pred)>realPrecision){
3793	type[index] = 0;
3794	pred = curData;
3795	unpredictable_data[block_unpredictable_count ++] = curData;
3796	}
3797	}
3798	else{
3799	type[index] = 0;
3800	pred = curData;
3801	unpredictable_data[block_unpredictable_count ++] = curData;
3802	}
3803
3804	// assign value to block surfaces
3805	pb_pos[ii * strip_dim0_offset + jj] = pred;
3806	index ++;
3807	cur_data_pos ++;
3808	}
3809	cur_data_pos += dim0_offset - current_blockcount_y;
3810	}
3811	/dealing with the last ii (boundary)/
3812	{
3813	size_t ii = current_blockcount_x - 1;
3814	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
3815	curData = *cur_data_pos;
3816	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3817	diff = curData - pred;
3818	itvNum = fabs(diff)/realPrecision + 1;
3819	if (itvNum < intvCapacity){
3820	if (diff < 0) itvNum = -itvNum;
3821	type[index] = (int) (itvNum/2) + intvRadius;
3822	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3823	//ganrantee comporession error against the case of machine-epsilon
3824	if(fabs(curData - pred)>realPrecision){
3825	type[index] = 0;
3826	pred = curData;
3827	unpredictable_data[block_unpredictable_count ++] = curData;
3828	}
3829	}
3830	else{
3831	type[index] = 0;
3832	pred = curData;
3833	unpredictable_data[block_unpredictable_count ++] = curData;
3834	}
3835	// assign value to next prediction buffer
3836	next_pb_pos[jj] = pred;
3837	index ++;
3838	cur_data_pos ++;
3839	}
3840	/dealing with the last jj (boundary)/
3841	{
3842	size_t jj = current_blockcount_y - 1;
3843	curData = *cur_data_pos;
3844	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
3845	diff = curData - pred;
3846	itvNum = fabs(diff)/realPrecision + 1;
3847	if (itvNum < intvCapacity){
3848	if (diff < 0) itvNum = -itvNum;
3849	type[index] = (int) (itvNum/2) + intvRadius;
3850	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
3851	//ganrantee comporession error against the case of machine-epsilon
3852	if(fabs(curData - pred)>realPrecision){
3853	type[index] = 0;
3854	pred = curData;
3855	unpredictable_data[block_unpredictable_count ++] = curData;
3856	}
3857	}
3858	else{
3859	type[index] = 0;
3860	pred = curData;
3861	unpredictable_data[block_unpredictable_count ++] = curData;
3862	}
3863
3864	// assign value to block surfaces
3865	pb_pos[ii * strip_dim0_offset + jj] = pred;
3866	// assign value to next prediction buffer
3867	next_pb_pos[jj] = pred;
3868
3869	index ++;
3870	cur_data_pos ++;
3871	}
3872	} // end ii == -1
3873	unpredictable_count = block_unpredictable_count;
3874	total_unpred += unpredictable_count;
3875	unpredictable_data += unpredictable_count;
3876	reg_count ++;
3877	}// end use_reg
3878	else{
3879	// use SZ
3880	// SZ predication
3881	unpredictable_count = 0;
3882	double * cur_pb_pos = pb_pos;
3883	double * cur_data_pos = data_pos;
3884	double curData;
3885	double pred2D;
3886	double itvNum, diff;
3887	size_t index = 0;
3888	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
3889	for(size_t jj=0; jj<current_blockcount_y; jj++){
3890	curData = *cur_data_pos;
3891	if(fabs(curData - mean) <= realPrecision){
3892	// adjust type[index] to intvRadius for coherence with freq in reg
3893	type[index] = intvRadius;
3894	*cur_pb_pos = mean;
3895	}
3896	else
3897	{
3898	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
3899	diff = curData - pred2D;
3900	itvNum = fabs(diff)/realPrecision + 1;
3901	if (itvNum < intvCapacity_sz){
3902	if (diff < 0) itvNum = -itvNum;
3903	type[index] = (int) (itvNum/2) + intvRadius;
3904	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
3905	if(type[index] <= intvRadius) type[index] -= 1;
3906	//ganrantee comporession error against the case of machine-epsilon
3907	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
3908	type[index] = 0;
3909	*cur_pb_pos = curData;
3910	unpredictable_data[unpredictable_count ++] = curData;
3911	}
3912	}
3913	else{
3914	type[index] = 0;
3915	*cur_pb_pos = curData;
3916	unpredictable_data[unpredictable_count ++] = curData;
3917	}
3918	}
3919	index ++;
3920	cur_pb_pos ++;
3921	cur_data_pos ++;
3922	}
3923	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
3924	cur_data_pos += dim0_offset - current_blockcount_y;
3925	}
3926	/dealing with the last ii (boundary)/
3927	{
3928	// ii == current_blockcount_x - 1
3929	for(size_t jj=0; jj<current_blockcount_y; jj++){
3930	curData = *cur_data_pos;
3931	if(fabs(curData - mean) <= realPrecision){
3932	// adjust type[index] to intvRadius for coherence with freq in reg
3933	type[index] = intvRadius;
3934	*cur_pb_pos = mean;
3935	}
3936	else
3937	{
3938	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
3939	diff = curData - pred2D;
3940	itvNum = fabs(diff)/realPrecision + 1;
3941	if (itvNum < intvCapacity_sz){
3942	if (diff < 0) itvNum = -itvNum;
3943	type[index] = (int) (itvNum/2) + intvRadius;
3944	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
3945	if(type[index] <= intvRadius) type[index] -= 1;
3946	//ganrantee comporession error against the case of machine-epsilon
3947	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
3948	type[index] = 0;
3949	*cur_pb_pos = curData;
3950	unpredictable_data[unpredictable_count ++] = curData;
3951	}
3952	}
3953	else{
3954	type[index] = 0;
3955	*cur_pb_pos = curData;
3956	unpredictable_data[unpredictable_count ++] = curData;
3957	}
3958	}
3959	next_pb_pos[jj] = *cur_pb_pos;
3960	index ++;
3961	cur_pb_pos ++;
3962	cur_data_pos ++;
3963	}
3964	}
3965	total_unpred += unpredictable_count;
3966	unpredictable_data += unpredictable_count;
3967	// change indicator
3968	indicator_pos[j] = 1;
3969	}// end SZ
3970	reg_params_pos ++;
3971	data_pos += current_blockcount_y;
3972	pb_pos += current_blockcount_y;
3973	next_pb_pos += current_blockcount_y;
3974	type += current_blockcount_x * current_blockcount_y;
3975	}// end j
3976	indicator_pos += num_y;
3977	double * tmp;
3978	tmp = cur_pb_buf;
3979	cur_pb_buf = next_pb_buf;
3980	next_pb_buf = tmp;
3981	}// end i
3982	}// end use mean
3983	else{
3984	type = result_type;
3985	int intvCapacity_sz = intvCapacity - 2;
3986	for(size_t i=0; i<num_x; i++){
3987	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
3988	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
3989	data_pos = oriData + offset_x * dim0_offset;
3990
3991	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
3992	next_pb_buf_pos = next_pb_buf + 1;
3993	double * pb_pos = cur_pb_buf_pos;
3994	double * next_pb_pos = next_pb_buf_pos;
3995
3996	for(size_t j=0; j<num_y; j++){
3997	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
3998	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
3999	/sampling/
4000	{
4001	// sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
4002	double * cur_data_pos;
4003	double curData;
4004	double pred_reg, pred_sz;
4005	double err_sz = 0.0, err_reg = 0.0;
4006	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
4007	// [1, 9] [3, 7] [7, 3] [9, 1]
4008	int count = 0;
4009	for(int i=1; i<current_blockcount_x; i+=2){
4010	cur_data_pos = data_pos + i * dim0_offset + i;
4011	curData = *cur_data_pos;
4012	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4013	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
4014	err_sz += fabs(pred_sz - curData);
4015	err_reg += fabs(pred_reg - curData);
4016
4017	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
4018	curData = *cur_data_pos;
4019	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4020	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
4021	err_sz += fabs(pred_sz - curData);
4022	err_reg += fabs(pred_reg - curData);
4023
4024	count += 2;
4025	}
4026	err_sz += realPrecision * count * 0.81;
4027	use_reg = (err_reg < err_sz);
4028
4029	}
4030	if(use_reg)
4031	{
4032	{
4033	/predict coefficients in current block via previous reg_block/
4034	double cur_coeff;
4035	double diff, itvNum;
4036	for(int e=0; e<3; e++){
4037	cur_coeff = reg_params_pos[e*num_blocks];
4038	diff = cur_coeff - last_coeffcients[e];
4039	itvNum = fabs(diff)/precision[e] + 1;
4040	if (itvNum < coeff_intvCapacity_sz){
4041	if (diff < 0) itvNum = -itvNum;
4042	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4043	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4044	//ganrantee comporession error against the case of machine-epsilon
4045	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4046	coeff_type[e][coeff_index] = 0;
4047	last_coeffcients[e] = cur_coeff;
4048	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4049	}
4050	}
4051	else{
4052	coeff_type[e][coeff_index] = 0;
4053	last_coeffcients[e] = cur_coeff;
4054	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4055	}
4056	}
4057	coeff_index ++;
4058	}
4059	double curData;
4060	double pred;
4061	double itvNum;
4062	double diff;
4063	size_t index = 0;
4064	size_t block_unpredictable_count = 0;
4065	double * cur_data_pos = data_pos;
4066	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4067	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4068	curData = *cur_data_pos;
4069	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4070	diff = curData - pred;
4071	itvNum = fabs(diff)/realPrecision + 1;
4072	if (itvNum < intvCapacity){
4073	if (diff < 0) itvNum = -itvNum;
4074	type[index] = (int) (itvNum/2) + intvRadius;
4075	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4076	//ganrantee comporession error against the case of machine-epsilon
4077	if(fabs(curData - pred)>realPrecision){
4078	type[index] = 0;
4079	pred = curData;
4080	unpredictable_data[block_unpredictable_count ++] = curData;
4081	}
4082	}
4083	else{
4084	type[index] = 0;
4085	pred = curData;
4086	unpredictable_data[block_unpredictable_count ++] = curData;
4087	}
4088	index ++;
4089	cur_data_pos ++;
4090	}
4091	/dealing with the last jj (boundary)/
4092	{
4093	// jj == current_blockcount_y - 1
4094	size_t jj = current_blockcount_y - 1;
4095	curData = *cur_data_pos;
4096	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4097	diff = curData - pred;
4098	itvNum = fabs(diff)/realPrecision + 1;
4099	if (itvNum < intvCapacity){
4100	if (diff < 0) itvNum = -itvNum;
4101	type[index] = (int) (itvNum/2) + intvRadius;
4102	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4103	//ganrantee comporession error against the case of machine-epsilon
4104	if(fabs(curData - pred)>realPrecision){
4105	type[index] = 0;
4106	pred = curData;
4107	unpredictable_data[block_unpredictable_count ++] = curData;
4108	}
4109	}
4110	else{
4111	type[index] = 0;
4112	pred = curData;
4113	unpredictable_data[block_unpredictable_count ++] = curData;
4114	}
4115
4116	// assign value to block surfaces
4117	pb_pos[ii * strip_dim0_offset + jj] = pred;
4118	index ++;
4119	cur_data_pos ++;
4120	}
4121	cur_data_pos += dim0_offset - current_blockcount_y;
4122	}
4123	/dealing with the last ii (boundary)/
4124	{
4125	size_t ii = current_blockcount_x - 1;
4126	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4127	curData = *cur_data_pos;
4128	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4129	diff = curData - pred;
4130	itvNum = fabs(diff)/realPrecision + 1;
4131	if (itvNum < intvCapacity){
4132	if (diff < 0) itvNum = -itvNum;
4133	type[index] = (int) (itvNum/2) + intvRadius;
4134	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4135	//ganrantee comporession error against the case of machine-epsilon
4136	if(fabs(curData - pred)>realPrecision){
4137	type[index] = 0;
4138	pred = curData;
4139	unpredictable_data[block_unpredictable_count ++] = curData;
4140	}
4141	}
4142	else{
4143	type[index] = 0;
4144	pred = curData;
4145	unpredictable_data[block_unpredictable_count ++] = curData;
4146	}
4147	// assign value to next prediction buffer
4148	next_pb_pos[jj] = pred;
4149	index ++;
4150	cur_data_pos ++;
4151	}
4152	/dealing with the last jj (boundary)/
4153	{
4154	// jj == current_blockcount_y - 1
4155	size_t jj = current_blockcount_y - 1;
4156	curData = *cur_data_pos;
4157	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4158	diff = curData - pred;
4159	itvNum = fabs(diff)/realPrecision + 1;
4160	if (itvNum < intvCapacity){
4161	if (diff < 0) itvNum = -itvNum;
4162	type[index] = (int) (itvNum/2) + intvRadius;
4163	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4164	//ganrantee comporession error against the case of machine-epsilon
4165	if(fabs(curData - pred)>realPrecision){
4166	type[index] = 0;
4167	pred = curData;
4168	unpredictable_data[block_unpredictable_count ++] = curData;
4169	}
4170	}
4171	else{
4172	type[index] = 0;
4173	pred = curData;
4174	unpredictable_data[block_unpredictable_count ++] = curData;
4175	}
4176
4177	// assign value to block surfaces
4178	pb_pos[ii * strip_dim0_offset + jj] = pred;
4179	// assign value to next prediction buffer
4180	next_pb_pos[jj] = pred;
4181
4182	index ++;
4183	cur_data_pos ++;
4184	}
4185	} // end ii == -1
4186	unpredictable_count = block_unpredictable_count;
4187	total_unpred += unpredictable_count;
4188	unpredictable_data += unpredictable_count;
4189	reg_count ++;
4190	}// end use_reg
4191	else{
4192	// use SZ
4193	// SZ predication
4194	unpredictable_count = 0;
4195	double * cur_pb_pos = pb_pos;
4196	double * cur_data_pos = data_pos;
4197	double curData;
4198	double pred2D;
4199	double itvNum, diff;
4200	size_t index = 0;
4201	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4202	for(size_t jj=0; jj<current_blockcount_y; jj++){
4203	curData = *cur_data_pos;
4204
4205	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4206	diff = curData - pred2D;
4207	itvNum = fabs(diff)/realPrecision + 1;
4208	if (itvNum < intvCapacity_sz){
4209	if (diff < 0) itvNum = -itvNum;
4210	type[index] = (int) (itvNum/2) + intvRadius;
4211	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4212	//ganrantee comporession error against the case of machine-epsilon
4213	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4214	type[index] = 0;
4215	*cur_pb_pos = curData;
4216	unpredictable_data[unpredictable_count ++] = curData;
4217	}
4218	}
4219	else{
4220	type[index] = 0;
4221	*cur_pb_pos = curData;
4222	unpredictable_data[unpredictable_count ++] = curData;
4223	}
4224
4225	index ++;
4226	cur_pb_pos ++;
4227	cur_data_pos ++;
4228	}
4229	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
4230	cur_data_pos += dim0_offset - current_blockcount_y;
4231	}
4232	/dealing with the last ii (boundary)/
4233	{
4234	// ii == current_blockcount_x - 1
4235	for(size_t jj=0; jj<current_blockcount_y; jj++){
4236	curData = *cur_data_pos;
4237
4238	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4239	diff = curData - pred2D;
4240	itvNum = fabs(diff)/realPrecision + 1;
4241	if (itvNum < intvCapacity_sz){
4242	if (diff < 0) itvNum = -itvNum;
4243	type[index] = (int) (itvNum/2) + intvRadius;
4244	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4245	//ganrantee comporession error against the case of machine-epsilon
4246	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4247	type[index] = 0;
4248	*cur_pb_pos = curData;
4249	unpredictable_data[unpredictable_count ++] = curData;
4250	}
4251	}
4252	else{
4253	type[index] = 0;
4254	*cur_pb_pos = curData;
4255	unpredictable_data[unpredictable_count ++] = curData;
4256	}
4257	next_pb_pos[jj] = *cur_pb_pos;
4258	index ++;
4259	cur_pb_pos ++;
4260	cur_data_pos ++;
4261	}
4262	}
4263	total_unpred += unpredictable_count;
4264	unpredictable_data += unpredictable_count;
4265	// change indicator
4266	indicator_pos[j] = 1;
4267	}// end SZ
4268	reg_params_pos ++;
4269	data_pos += current_blockcount_y;
4270	pb_pos += current_blockcount_y;
4271	next_pb_pos += current_blockcount_y;
4272	type += current_blockcount_x * current_blockcount_y;
4273	}// end j
4274	indicator_pos += num_y;
4275	double * tmp;
4276	tmp = cur_pb_buf;
4277	cur_pb_buf = next_pb_buf;
4278	next_pb_buf = tmp;
4279	}// end i
4280	}
4281	free(prediction_buffer_1);
4282	free(prediction_buffer_2);
4283
4284	int stateNum = 2*quantization_intervals;
4285	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4286
4287	size_t nodeCount = 0;
4288	size_t i = 0;
4289	init(huffmanTree, result_type, num_elements);
4290	for (i = 0; i < stateNum; i++)
4291	if (huffmanTree->code[i]) nodeCount++;
4292	nodeCount = nodeCount*2-1;
4293
4294	unsigned char *treeBytes;
4295	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4296
4297	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
4298	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
4299	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
4300	unsigned char * result_pos = result;
4301	initRandomAccessBytes(result_pos);
4302	result_pos += meta_data_offset;
4303
4304	sizeToBytes(result_pos, num_elements);
4305	result_pos += exe_params->SZ_SIZE_TYPE;
4306
4307	intToBytes_bigEndian(result_pos, block_size);
4308	result_pos += sizeof(int);
4309	doubleToBytes(result_pos, realPrecision);
4310	result_pos += sizeof(double);
4311	intToBytes_bigEndian(result_pos, quantization_intervals);
4312	result_pos += sizeof(int);
4313	intToBytes_bigEndian(result_pos, treeByteSize);
4314	result_pos += sizeof(int);
4315	intToBytes_bigEndian(result_pos, nodeCount);
4316	result_pos += sizeof(int);
4317	memcpy(result_pos, treeBytes, treeByteSize);
4318	result_pos += treeByteSize;
4319	free(treeBytes);
4320
4321	memcpy(result_pos, &use_mean, sizeof(unsigned char));
4322	result_pos += sizeof(unsigned char);
4323	memcpy(result_pos, &mean, sizeof(double));
4324	result_pos += sizeof(double);
4325
4326	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
4327	result_pos += indicator_size;
4328
4329	//convert the lead/mid/resi to byte stream
4330	if(reg_count>0){
4331	for(int e=0; e<3; e++){
4332	int stateNum = 2*coeff_intvCapacity_sz;
4333	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4334	size_t nodeCount = 0;
4335	init(huffmanTree, coeff_type[e], reg_count);
4336	size_t i = 0;
4337	for (i = 0; i < huffmanTree->stateNum; i++)
4338	if (huffmanTree->code[i]) nodeCount++;
4339	nodeCount = nodeCount*2-1;
4340	unsigned char *treeBytes;
4341	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4342	doubleToBytes(result_pos, precision[e]);
4343	result_pos += sizeof(double);
4344	intToBytes_bigEndian(result_pos, coeff_intvRadius);
4345	result_pos += sizeof(int);
4346	intToBytes_bigEndian(result_pos, treeByteSize);
4347	result_pos += sizeof(int);
4348	intToBytes_bigEndian(result_pos, nodeCount);
4349	result_pos += sizeof(int);
4350	memcpy(result_pos, treeBytes, treeByteSize);
4351	result_pos += treeByteSize;
4352	free(treeBytes);
4353	size_t typeArray_size = 0;
4354	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
4355	sizeToBytes(result_pos, typeArray_size);
4356	result_pos += sizeof(size_t) + typeArray_size;
4357	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
4358	result_pos += sizeof(int);
4359	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
4360	result_pos += coeff_unpredictable_count[e]*sizeof(double);
4361	SZ_ReleaseHuffman(huffmanTree);
4362	}
4363	}
4364	free(coeff_result_type);
4365	free(coeff_unpredictable_data);
4366
4367	//record the number of unpredictable data and also store them
4368	memcpy(result_pos, &total_unpred, sizeof(size_t));
4369	result_pos += sizeof(size_t);
4370	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
4371	result_pos += total_unpred * sizeof(double);
4372	size_t typeArray_size = 0;
4373	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
4374	result_pos += typeArray_size;
4375
4376	size_t totalEncodeSize = result_pos - result;
4377	free(indicator);
4378	free(result_unpredictable_data);
4379	free(result_type);
4380	free(reg_params);
4381
4382	SZ_ReleaseHuffman(huffmanTree);
4383	*comp_size = totalEncodeSize;
4384
4385	return result;
4386	}
4387
4388	unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
4389
4390	unsigned int quantization_intervals;
4391	double sz_sample_correct_freq = -1;//0.5; //-1
4392	double dense_pos;
4393	double mean_flush_freq;
4394	unsigned char use_mean = 0;
4395
4396	// calculate block dims
4397	size_t num_x, num_y, num_z;
4398	size_t block_size = 6;
4399	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
4400	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
4401	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
4402
4403	size_t split_index_x, split_index_y, split_index_z;
4404	size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
4405	size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
4406	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
4407	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
4408	SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
4409
4410	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
4411	size_t num_blocks = num_x * num_y * num_z;
4412	size_t num_elements = r1 * r2 * r3;
4413
4414	size_t dim0_offset = r2 * r3;
4415	size_t dim1_offset = r3;
4416
4417	int * result_type = (int ) malloc(num_elements sizeof(int));
4418	size_t unpred_data_max_size = max_num_block_elements;
4419	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
4420	size_t total_unpred = 0;
4421	size_t unpredictable_count;
4422	size_t max_unpred_count = 0;
4423	double * data_pos = oriData;
4424	int * type = result_type;
4425	size_t type_offset;
4426	size_t offset_x, offset_y, offset_z;
4427	size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
4428
4429	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
4430	double * reg_params_pos = reg_params;
4431	// move regression part out
4432	size_t params_offset_b = num_blocks;
4433	size_t params_offset_c = 2*num_blocks;
4434	size_t params_offset_d = 3*num_blocks;
4435	for(size_t i=0; i<num_x; i++){
4436	for(size_t j=0; j<num_y; j++){
4437	for(size_t k=0; k<num_z; k++){
4438	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4439	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4440	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
4441	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4442	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4443	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
4444
4445	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
4446	/Calculate regression coefficients/
4447	{
4448	double * cur_data_pos = data_pos;
4449	double fx = 0.0;
4450	double fy = 0.0;
4451	double fz = 0.0;
4452	double f = 0;
4453	double sum_x, sum_y;
4454	double curData;
4455	for(size_t i=0; i<current_blockcount_x; i++){
4456	sum_x = 0;
4457	for(size_t j=0; j<current_blockcount_y; j++){
4458	sum_y = 0;
4459	for(size_t k=0; k<current_blockcount_z; k++){
4460	curData = *cur_data_pos;
4461	// f += curData;
4462	// fx += curData * i;
4463	// fy += curData * j;
4464	// fz += curData * k;
4465	sum_y += curData;
4466	fz += curData * k;
4467	cur_data_pos ++;
4468	}
4469	fy += sum_y * j;
4470	sum_x += sum_y;
4471	cur_data_pos += dim1_offset - current_blockcount_z;
4472	}
4473	fx += sum_x * i;
4474	f += sum_x;
4475	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
4476	}
4477	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
4478	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
4479	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
4480	reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
4481	reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
4482	}
4483	reg_params_pos ++;
4484	}
4485	}
4486	}
4487
4488	//Compress coefficient arrays
4489	double precision_a, precision_b, precision_c, precision_d;
4490	double rel_param_err = 0.025;
4491	precision_a = rel_param_err * realPrecision / late_blockcount_x;
4492	precision_b = rel_param_err * realPrecision / late_blockcount_y;
4493	precision_c = rel_param_err * realPrecision / late_blockcount_z;
4494	precision_d = rel_param_err * realPrecision;
4495
4496	if(exe_params->optQuantMode==1)
4497	{
4498	quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
4499	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
4500	updateQuantizationInfo(quantization_intervals);
4501	}
4502	else{
4503	quantization_intervals = exe_params->intvCapacity;
4504	}
4505
4506	double mean = 0;
4507	if(use_mean){
4508	// compute mean
4509	double sum = 0.0;
4510	size_t mean_count = 0;
4511	for(size_t i=0; i<num_elements; i++){
4512	if(fabs(oriData[i] - dense_pos) < realPrecision){
4513	sum += oriData[i];
4514	mean_count ++;
4515	}
4516	}
4517	if(mean_count > 0) mean = sum / mean_count;
4518	}
4519
4520	double tmp_realPrecision = realPrecision;
4521
4522	// use two prediction buffers for higher performance
4523	double * unpredictable_data = result_unpredictable_data;
4524	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
4525	memset(indicator, 0, num_blocks * sizeof(unsigned char));
4526	size_t reg_count = 0;
4527	size_t strip_dim_0 = early_blockcount_x + 1;
4528	size_t strip_dim_1 = r2 + 1;
4529	size_t strip_dim_2 = r3 + 1;
4530	size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
4531	size_t strip_dim1_offset = strip_dim_2;
4532	unsigned char * indicator_pos = indicator;
4533
4534	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
4535	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
4536	memset(prediction_buffer_1, 0, prediction_buffer_size);
4537	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
4538	memset(prediction_buffer_2, 0, prediction_buffer_size);
4539	double * cur_pb_buf = prediction_buffer_1;
4540	double * next_pb_buf = prediction_buffer_2;
4541	double * cur_pb_buf_pos;
4542	double * next_pb_buf_pos;
4543	int intvCapacity = exe_params->intvCapacity;
4544	int intvRadius = exe_params->intvRadius;
4545	int use_reg = 0;
4546	double noise = realPrecision * 1.22;
4547
4548	reg_params_pos = reg_params;
4549	// compress the regression coefficients on the fly
4550	double last_coeffcients[4] = {0.0};
4551	int coeff_intvCapacity_sz = 65536;
4552	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
4553	int * coeff_type[4];
4554	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
4555	double * coeff_unpred_data[4];
4556	double * coeff_unpredictable_data = (double ) malloc(num_blocks4*sizeof(double));
4557	double precision[4];
4558	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
4559	for(int i=0; i<4; i++){
4560	coeff_type[i] = coeff_result_type + i * num_blocks;
4561	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
4562	}
4563	int coeff_index = 0;
4564	unsigned int coeff_unpredictable_count[4] = {0};
4565
4566	if(use_mean){
4567	int intvCapacity_sz = intvCapacity - 2;
4568	for(size_t i=0; i<num_x; i++){
4569	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4570	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4571	for(size_t j=0; j<num_y; j++){
4572	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4573	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4574	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
4575	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
4576	type = result_type + type_offset;
4577
4578	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
4579	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
4580	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
4581
4582	size_t current_blockcount_z;
4583	double * pb_pos = cur_pb_buf_pos;
4584	double * next_pb_pos = next_pb_buf_pos;
4585	size_t strip_unpredictable_count = 0;
4586	for(size_t k=0; k<num_z; k++){
4587	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
4588
4589	/sampling and decide which predictor/
4590	{
4591	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
4592	double * cur_data_pos;
4593	double curData;
4594	double pred_reg, pred_sz;
4595	double err_sz = 0.0, err_reg = 0.0;
4596	int bmi = 0;
4597	if(i>0 && j>0 && k>0){
4598	for(int i=0; i<block_size; i++){
4599	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4600	curData = *cur_data_pos;
4601	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4602	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4603	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4604	err_reg += fabs(pred_reg - curData);
4605
4606	bmi = block_size - i;
4607	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4608	curData = *cur_data_pos;
4609	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4610	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4611	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4612	err_reg += fabs(pred_reg - curData);
4613
4614	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4615	curData = *cur_data_pos;
4616	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4617	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4618	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4619	err_reg += fabs(pred_reg - curData);
4620
4621	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4622	curData = *cur_data_pos;
4623	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4624	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4625	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4626	err_reg += fabs(pred_reg - curData);
4627	}
4628	}
4629	else{
4630	for(int i=1; i<block_size; i++){
4631	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4632	curData = *cur_data_pos;
4633	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4634	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4635	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4636	err_reg += fabs(pred_reg - curData);
4637
4638	bmi = block_size - i;
4639	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4640	curData = *cur_data_pos;
4641	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4642	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4643	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4644	err_reg += fabs(pred_reg - curData);
4645
4646	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4647	curData = *cur_data_pos;
4648	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4649	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4650	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4651	err_reg += fabs(pred_reg - curData);
4652
4653	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4654	curData = *cur_data_pos;
4655	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4656	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4657	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
4658	err_reg += fabs(pred_reg - curData);
4659
4660	}
4661	}
4662	use_reg = (err_reg < err_sz);
4663	}
4664	if(use_reg){
4665	{
4666	/predict coefficients in current block via previous reg_block/
4667	double cur_coeff;
4668	double diff, itvNum;
4669	for(int e=0; e<4; e++){
4670	cur_coeff = reg_params_pos[e*num_blocks];
4671	diff = cur_coeff - last_coeffcients[e];
4672	itvNum = fabs(diff)/precision[e] + 1;
4673	if (itvNum < coeff_intvCapacity_sz){
4674	if (diff < 0) itvNum = -itvNum;
4675	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4676	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4677	//ganrantee comporession error against the case of machine-epsilon
4678	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4679	coeff_type[e][coeff_index] = 0;
4680	last_coeffcients[e] = cur_coeff;
4681	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4682	}
4683	}
4684	else{
4685	coeff_type[e][coeff_index] = 0;
4686	last_coeffcients[e] = cur_coeff;
4687	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4688	}
4689	}
4690	coeff_index ++;
4691	}
4692	double curData;
4693	double pred;
4694	double itvNum;
4695	double diff;
4696	size_t index = 0;
4697	size_t block_unpredictable_count = 0;
4698	double * cur_data_pos = data_pos;
4699	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4700	for(size_t jj=0; jj<current_blockcount_y; jj++){
4701	for(size_t kk=0; kk<current_blockcount_z; kk++){
4702	curData = *cur_data_pos;
4703	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
4704	diff = curData - pred;
4705	itvNum = fabs(diff)/tmp_realPrecision + 1;
4706	if (itvNum < intvCapacity){
4707	if (diff < 0) itvNum = -itvNum;
4708	type[index] = (int) (itvNum/2) + intvRadius;
4709	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
4710	//ganrantee comporession error against the case of machine-epsilon
4711	if(fabs(curData - pred)>tmp_realPrecision){
4712	type[index] = 0;
4713	pred = curData;
4714	unpredictable_data[block_unpredictable_count ++] = curData;
4715	}
4716	}
4717	else{
4718	type[index] = 0;
4719	pred = curData;
4720	unpredictable_data[block_unpredictable_count ++] = curData;
4721	}
4722	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
4723	// assign value to block surfaces
4724	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
4725	}
4726	index ++;
4727	cur_data_pos ++;
4728	}
4729	cur_data_pos += dim1_offset - current_blockcount_z;
4730	}
4731	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
4732	}
4733	/dealing with the last ii (boundary)/
4734	{
4735	// ii == current_blockcount_x - 1
4736	size_t ii = current_blockcount_x - 1;
4737	for(size_t jj=0; jj<current_blockcount_y; jj++){
4738	for(size_t kk=0; kk<current_blockcount_z; kk++){
4739	curData = *cur_data_pos;
4740	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
4741	diff = curData - pred;
4742	itvNum = fabs(diff)/tmp_realPrecision + 1;
4743	if (itvNum < intvCapacity){
4744	if (diff < 0) itvNum = -itvNum;
4745	type[index] = (int) (itvNum/2) + intvRadius;
4746	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
4747	//ganrantee comporession error against the case of machine-epsilon
4748	if(fabs(curData - pred)>tmp_realPrecision){
4749	type[index] = 0;
4750	pred = curData;
4751	unpredictable_data[block_unpredictable_count ++] = curData;
4752	}
4753	}
4754	else{
4755	type[index] = 0;
4756	pred = curData;
4757	unpredictable_data[block_unpredictable_count ++] = curData;
4758	}
4759
4760	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
4761	// assign value to block surfaces
4762	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
4763	}
4764	// assign value to next prediction buffer
4765	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
4766	index ++;
4767	cur_data_pos ++;
4768	}
4769	cur_data_pos += dim1_offset - current_blockcount_z;
4770	}
4771	}
4772	unpredictable_count = block_unpredictable_count;
4773	strip_unpredictable_count += unpredictable_count;
4774	unpredictable_data += unpredictable_count;
4775
4776	reg_count ++;
4777	}
4778	else{
4779	// use SZ
4780	// SZ predication
4781	unpredictable_count = 0;
4782	double * cur_pb_pos = pb_pos;
4783	double * cur_data_pos = data_pos;
4784	double curData;
4785	double pred3D;
4786	double itvNum, diff;
4787	size_t index = 0;
4788	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4789	for(size_t jj=0; jj<current_blockcount_y; jj++){
4790	for(size_t kk=0; kk<current_blockcount_z; kk++){
4791
4792	curData = *cur_data_pos;
4793	if(fabs(curData - mean) <= realPrecision){
4794	// adjust type[index] to intvRadius for coherence with freq in reg
4795	type[index] = intvRadius;
4796	*cur_pb_pos = mean;
4797	}
4798	else
4799	{
4800	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
4801	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
4802	diff = curData - pred3D;
4803	itvNum = fabs(diff)/realPrecision + 1;
4804	if (itvNum < intvCapacity_sz){
4805	if (diff < 0) itvNum = -itvNum;
4806	type[index] = (int) (itvNum/2) + intvRadius;
4807	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4808	if(type[index] <= intvRadius) type[index] -= 1;
4809	//ganrantee comporession error against the case of machine-epsilon
4810	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4811	type[index] = 0;
4812	*cur_pb_pos = curData;
4813	unpredictable_data[unpredictable_count ++] = curData;
4814	}
4815	}
4816	else{
4817	type[index] = 0;
4818	*cur_pb_pos = curData;
4819	unpredictable_data[unpredictable_count ++] = curData;
4820	}
4821	}
4822	index ++;
4823	cur_pb_pos ++;
4824	cur_data_pos ++;
4825	}
4826	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
4827	cur_data_pos += dim1_offset - current_blockcount_z;
4828	}
4829	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
4830	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
4831	}
4832	/dealing with the last ii (boundary)/
4833	{
4834	// ii == current_blockcount_x - 1
4835	for(size_t jj=0; jj<current_blockcount_y; jj++){
4836	for(size_t kk=0; kk<current_blockcount_z; kk++){
4837
4838	curData = *cur_data_pos;
4839	if(fabs(curData - mean) <= realPrecision){
4840	// adjust type[index] to intvRadius for coherence with freq in reg
4841	type[index] = intvRadius;
4842	*cur_pb_pos = mean;
4843	}
4844	else
4845	{
4846	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
4847	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
4848	diff = curData - pred3D;
4849	itvNum = fabs(diff)/realPrecision + 1;
4850	if (itvNum < intvCapacity_sz){
4851	if (diff < 0) itvNum = -itvNum;
4852	type[index] = (int) (itvNum/2) + intvRadius;
4853	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4854	if(type[index] <= intvRadius) type[index] -= 1;
4855	//ganrantee comporession error against the case of machine-epsilon
4856	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4857	type[index] = 0;
4858	*cur_pb_pos = curData;
4859	unpredictable_data[unpredictable_count ++] = curData;
4860	}
4861	}
4862	else{
4863	type[index] = 0;
4864	*cur_pb_pos = curData;
4865	unpredictable_data[unpredictable_count ++] = curData;
4866	}
4867	}
4868	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
4869	index ++;
4870	cur_pb_pos ++;
4871	cur_data_pos ++;
4872	}
4873	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
4874	cur_data_pos += dim1_offset - current_blockcount_z;
4875	}
4876	}
4877	strip_unpredictable_count += unpredictable_count;
4878	unpredictable_data += unpredictable_count;
4879	// change indicator
4880	indicator_pos[k] = 1;
4881	}// end SZ
4882
4883	reg_params_pos ++;
4884	data_pos += current_blockcount_z;
4885	pb_pos += current_blockcount_z;
4886	next_pb_pos += current_blockcount_z;
4887	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
4888
4889	} // end k
4890
4891	if(strip_unpredictable_count > max_unpred_count){
4892	max_unpred_count = strip_unpredictable_count;
4893	}
4894	total_unpred += strip_unpredictable_count;
4895	indicator_pos += num_z;
4896	}// end j
4897	double * tmp;
4898	tmp = cur_pb_buf;
4899	cur_pb_buf = next_pb_buf;
4900	next_pb_buf = tmp;
4901	}// end i
4902	}
4903	else{
4904	int intvCapacity_sz = intvCapacity - 2;
4905	for(size_t i=0; i<num_x; i++){
4906	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4907	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4908
4909	for(size_t j=0; j<num_y; j++){
4910	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4911	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4912	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
4913	// copy bottom plane from plane buffer
4914	// memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double));
4915	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
4916	type = result_type + type_offset;
4917
4918	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
4919	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
4920	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
4921
4922	size_t current_blockcount_z;
4923	double * pb_pos = cur_pb_buf_pos;
4924	double * next_pb_pos = next_pb_buf_pos;
4925	size_t strip_unpredictable_count = 0;
4926	for(size_t k=0; k<num_z; k++){
4927	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
4928	/sampling/
4929	{
4930	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
4931	double * cur_data_pos;
4932	double curData;
4933	double pred_reg, pred_sz;
4934	double err_sz = 0.0, err_reg = 0.0;
4935	int bmi;
4936	if(i>0 && j>0 && k>0){
4937	for(int i=0; i<block_size; i++){
4938	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4939	curData = *cur_data_pos;
4940	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4941	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4942	err_sz += fabs(pred_sz - curData) + noise;
4943	err_reg += fabs(pred_reg - curData);
4944
4945	bmi = block_size - i;
4946	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4947	curData = *cur_data_pos;
4948	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4949	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4950	err_sz += fabs(pred_sz - curData) + noise;
4951	err_reg += fabs(pred_reg - curData);
4952
4953	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4954	curData = *cur_data_pos;
4955	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4956	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4957	err_sz += fabs(pred_sz - curData) + noise;
4958	err_reg += fabs(pred_reg - curData);
4959
4960	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4961	curData = *cur_data_pos;
4962	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4963	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4964	err_sz += fabs(pred_sz - curData) + noise;
4965	err_reg += fabs(pred_reg - curData);
4966	}
4967	}
4968	else{
4969	for(int i=1; i<block_size; i++){
4970	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
4971	curData = *cur_data_pos;
4972	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4973	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4974	err_sz += fabs(pred_sz - curData) + noise;
4975	err_reg += fabs(pred_reg - curData);
4976
4977	bmi = block_size - i;
4978	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
4979	curData = *cur_data_pos;
4980	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4981	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4982	err_sz += fabs(pred_sz - curData) + noise;
4983	err_reg += fabs(pred_reg - curData);
4984
4985	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
4986	curData = *cur_data_pos;
4987	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4988	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
4989	err_sz += fabs(pred_sz - curData) + noise;
4990	err_reg += fabs(pred_reg - curData);
4991
4992	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
4993	curData = *cur_data_pos;
4994	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
4995	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
4996	err_sz += fabs(pred_sz - curData) + noise;
4997	err_reg += fabs(pred_reg - curData);
4998	}
4999	}
5000	use_reg = (err_reg < err_sz);
5001
5002	}
5003	if(use_reg)
5004	{
5005	{
5006	/predict coefficients in current block via previous reg_block/
5007	double cur_coeff;
5008	double diff, itvNum;
5009	for(int e=0; e<4; e++){
5010	cur_coeff = reg_params_pos[e*num_blocks];
5011	diff = cur_coeff - last_coeffcients[e];
5012	itvNum = fabs(diff)/precision[e] + 1;
5013	if (itvNum < coeff_intvCapacity_sz){
5014	if (diff < 0) itvNum = -itvNum;
5015	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
5016	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
5017	//ganrantee comporession error against the case of machine-epsilon
5018	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
5019	coeff_type[e][coeff_index] = 0;
5020	last_coeffcients[e] = cur_coeff;
5021	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5022	}
5023	}
5024	else{
5025	coeff_type[e][coeff_index] = 0;
5026	last_coeffcients[e] = cur_coeff;
5027	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5028	}
5029	}
5030	coeff_index ++;
5031	}
5032	double curData;
5033	double pred;
5034	double itvNum;
5035	double diff;
5036	size_t index = 0;
5037	size_t block_unpredictable_count = 0;
5038	double * cur_data_pos = data_pos;
5039	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5040	for(size_t jj=0; jj<current_blockcount_y; jj++){
5041	for(size_t kk=0; kk<current_blockcount_z; kk++){
5042
5043	curData = *cur_data_pos;
5044	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5045	diff = curData - pred;
5046	itvNum = fabs(diff)/tmp_realPrecision + 1;
5047	if (itvNum < intvCapacity){
5048	if (diff < 0) itvNum = -itvNum;
5049	type[index] = (int) (itvNum/2) + intvRadius;
5050	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5051	//ganrantee comporession error against the case of machine-epsilon
5052	if(fabs(curData - pred)>tmp_realPrecision){
5053	type[index] = 0;
5054	pred = curData;
5055	unpredictable_data[block_unpredictable_count ++] = curData;
5056	}
5057	}
5058	else{
5059	type[index] = 0;
5060	pred = curData;
5061	unpredictable_data[block_unpredictable_count ++] = curData;
5062	}
5063
5064	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5065	// assign value to block surfaces
5066	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5067	}
5068	index ++;
5069	cur_data_pos ++;
5070	}
5071	cur_data_pos += dim1_offset - current_blockcount_z;
5072	}
5073	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5074	}
5075	/dealing with the last ii (boundary)/
5076	{
5077	// ii == current_blockcount_x - 1
5078	size_t ii = current_blockcount_x - 1;
5079	for(size_t jj=0; jj<current_blockcount_y; jj++){
5080	for(size_t kk=0; kk<current_blockcount_z; kk++){
5081	curData = *cur_data_pos;
5082	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5083	diff = curData - pred;
5084	itvNum = fabs(diff)/tmp_realPrecision + 1;
5085	if (itvNum < intvCapacity){
5086	if (diff < 0) itvNum = -itvNum;
5087	type[index] = (int) (itvNum/2) + intvRadius;
5088	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5089	//ganrantee comporession error against the case of machine-epsilon
5090	if(fabs(curData - pred)>tmp_realPrecision){
5091	type[index] = 0;
5092	pred = curData;
5093	unpredictable_data[block_unpredictable_count ++] = curData;
5094	}
5095	}
5096	else{
5097	type[index] = 0;
5098	pred = curData;
5099	unpredictable_data[block_unpredictable_count ++] = curData;
5100	}
5101
5102	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5103	// assign value to block surfaces
5104	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5105	}
5106	// assign value to next prediction buffer
5107	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
5108	index ++;
5109	cur_data_pos ++;
5110	}
5111	cur_data_pos += dim1_offset - current_blockcount_z;
5112	}
5113	}
5114	unpredictable_count = block_unpredictable_count;
5115	strip_unpredictable_count += unpredictable_count;
5116	unpredictable_data += unpredictable_count;
5117	reg_count ++;
5118	}
5119	else{
5120	// use SZ
5121	// SZ predication
5122	unpredictable_count = 0;
5123	double * cur_pb_pos = pb_pos;
5124	double * cur_data_pos = data_pos;
5125	double curData;
5126	double pred3D;
5127	double itvNum, diff;
5128	size_t index = 0;
5129	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5130	for(size_t jj=0; jj<current_blockcount_y; jj++){
5131	for(size_t kk=0; kk<current_blockcount_z; kk++){
5132
5133	curData = *cur_data_pos;
5134	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5135	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5136	diff = curData - pred3D;
5137	itvNum = fabs(diff)/realPrecision + 1;
5138	if (itvNum < intvCapacity_sz){
5139	if (diff < 0) itvNum = -itvNum;
5140	type[index] = (int) (itvNum/2) + intvRadius;
5141	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5142	//ganrantee comporession error against the case of machine-epsilon
5143	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5144	type[index] = 0;
5145	*cur_pb_pos = curData;
5146	unpredictable_data[unpredictable_count ++] = curData;
5147	}
5148	}
5149	else{
5150	type[index] = 0;
5151	*cur_pb_pos = curData;
5152	unpredictable_data[unpredictable_count ++] = curData;
5153	}
5154	index ++;
5155	cur_pb_pos ++;
5156	cur_data_pos ++;
5157	}
5158	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5159	cur_data_pos += dim1_offset - current_blockcount_z;
5160	}
5161	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
5162	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5163	}
5164	/dealing with the last ii (boundary)/
5165	{
5166	// ii == current_blockcount_x - 1
5167	for(size_t jj=0; jj<current_blockcount_y; jj++){
5168	for(size_t kk=0; kk<current_blockcount_z; kk++){
5169
5170	curData = *cur_data_pos;
5171	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5172	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5173	diff = curData - pred3D;
5174	itvNum = fabs(diff)/realPrecision + 1;
5175	if (itvNum < intvCapacity_sz){
5176	if (diff < 0) itvNum = -itvNum;
5177	type[index] = (int) (itvNum/2) + intvRadius;
5178	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5179	//ganrantee comporession error against the case of machine-epsilon
5180	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5181	type[index] = 0;
5182	*cur_pb_pos = curData;
5183	unpredictable_data[unpredictable_count ++] = curData;
5184	}
5185	}
5186	else{
5187	type[index] = 0;
5188	*cur_pb_pos = curData;
5189	unpredictable_data[unpredictable_count ++] = curData;
5190	}
5191	// assign value to next prediction buffer
5192	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
5193	index ++;
5194	cur_pb_pos ++;
5195	cur_data_pos ++;
5196	}
5197	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5198	cur_data_pos += dim1_offset - current_blockcount_z;
5199	}
5200	}
5201	strip_unpredictable_count += unpredictable_count;
5202	unpredictable_data += unpredictable_count;
5203	// change indicator
5204	indicator_pos[k] = 1;
5205	}// end SZ
5206
5207	reg_params_pos ++;
5208	data_pos += current_blockcount_z;
5209	pb_pos += current_blockcount_z;
5210	next_pb_pos += current_blockcount_z;
5211	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
5212
5213	}
5214
5215	if(strip_unpredictable_count > max_unpred_count){
5216	max_unpred_count = strip_unpredictable_count;
5217	}
5218	total_unpred += strip_unpredictable_count;
5219	indicator_pos += num_z;
5220	}
5221	double * tmp;
5222	tmp = cur_pb_buf;
5223	cur_pb_buf = next_pb_buf;
5224	next_pb_buf = tmp;
5225	}
5226	}
5227
5228	free(prediction_buffer_1);
5229	free(prediction_buffer_2);
5230
5231	int stateNum = 2*quantization_intervals;
5232	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
5233
5234	size_t nodeCount = 0;
5235	init(huffmanTree, result_type, num_elements);
5236	size_t i = 0;
5237	for (i = 0; i < huffmanTree->stateNum; i++)
5238	if (huffmanTree->code[i]) nodeCount++;
5239	nodeCount = nodeCount*2-1;
5240
5241	unsigned char *treeBytes;
5242	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
5243
5244	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
5245	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
5246	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
5247	unsigned char * result_pos = result;
5248	initRandomAccessBytes(result_pos);
5249
5250	result_pos += meta_data_offset;
5251
5252	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
5253	result_pos += exe_params->SZ_SIZE_TYPE;
5254
5255	intToBytes_bigEndian(result_pos, block_size);
5256	result_pos += sizeof(int);
5257	doubleToBytes(result_pos, realPrecision);
5258	result_pos += sizeof(double);
5259	intToBytes_bigEndian(result_pos, quantization_intervals);
5260	result_pos += sizeof(int);
5261	intToBytes_bigEndian(result_pos, treeByteSize);
5262	result_pos += sizeof(int);
5263	intToBytes_bigEndian(result_pos, nodeCount);
5264	result_pos += sizeof(int);
5265	memcpy(result_pos, treeBytes, treeByteSize);
5266	result_pos += treeByteSize;
5267	free(treeBytes);
5268
5269	memcpy(result_pos, &use_mean, sizeof(unsigned char));
5270	result_pos += sizeof(unsigned char);
5271	memcpy(result_pos, &mean, sizeof(double));
5272	result_pos += sizeof(double);
5273	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
5274	result_pos += indicator_size;
5275
5276	//convert the lead/mid/resi to byte stream
5277	if(reg_count > 0){
5278	for(int e=0; e<4; e++){
5279	int stateNum = 2*coeff_intvCapacity_sz;
5280	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
5281	size_t nodeCount = 0;
5282	init(huffmanTree, coeff_type[e], reg_count);
5283	size_t i = 0;
5284	for (i = 0; i < huffmanTree->stateNum; i++)
5285	if (huffmanTree->code[i]) nodeCount++;
5286	nodeCount = nodeCount*2-1;
5287	unsigned char *treeBytes;
5288	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
5289	doubleToBytes(result_pos, precision[e]);
5290	result_pos += sizeof(double);
5291	intToBytes_bigEndian(result_pos, coeff_intvRadius);
5292	result_pos += sizeof(int);
5293	intToBytes_bigEndian(result_pos, treeByteSize);
5294	result_pos += sizeof(int);
5295	intToBytes_bigEndian(result_pos, nodeCount);
5296	result_pos += sizeof(int);
5297	memcpy(result_pos, treeBytes, treeByteSize);
5298	result_pos += treeByteSize;
5299	free(treeBytes);
5300	size_t typeArray_size = 0;
5301	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
5302	sizeToBytes(result_pos, typeArray_size);
5303	result_pos += sizeof(size_t) + typeArray_size;
5304	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
5305	result_pos += sizeof(int);
5306	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
5307	result_pos += coeff_unpredictable_count[e]*sizeof(double);
5308	SZ_ReleaseHuffman(huffmanTree);
5309	}
5310	}
5311	free(coeff_result_type);
5312	free(coeff_unpredictable_data);
5313
5314	//record the number of unpredictable data and also store them
5315	memcpy(result_pos, &total_unpred, sizeof(size_t));
5316	result_pos += sizeof(size_t);
5317	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
5318	result_pos += total_unpred * sizeof(double);
5319	size_t typeArray_size = 0;
5320	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
5321	result_pos += typeArray_size;
5322	size_t totalEncodeSize = result_pos - result;
5323	free(indicator);
5324	free(result_unpredictable_data);
5325	free(result_type);
5326	free(reg_params);
5327
5328
5329	SZ_ReleaseHuffman(huffmanTree);
5330	*comp_size = totalEncodeSize;
5331	return result;
5332	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: