Context Navigation

source: thirdparty/SZ/sz/src/sz_float.c @ e6aa0eb

Revision e6aa0eb, 240.4 KB checked in by Hal Finkel <hfinkel@…>, 6 years ago (diff)
add stddef.h for ptrdiff_t
Property mode set to `100644`

Line
1	/**
2	* @file sz_float.c
3	* @author Sheng Di, Dingwen Tao, Xin Liang
4	* @date Aug, 2016
5	* @brief SZ_Init, Compression and Decompression functions
6	* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
7	* See COPYRIGHT in top-level directory.
8	*/
9
10
11	#include <stdio.h>
12	#include <stdlib.h>
13	#include <stddef.h>
14	#include <string.h>
15	#include <unistd.h>
16	#include <math.h>
17	#include "sz.h"
18	#include "CompressElement.h"
19	#include "DynamicByteArray.h"
20	#include "DynamicIntArray.h"
21	#include "TightDataPointStorageF.h"
22	#include "sz_float.h"
23	#include "sz_float_pwr.h"
24	#include "szd_float.h"
25	#include "szd_float_pwr.h"
26	#include "zlib.h"
27	#include "rw.h"
28	#include "sz_float_ts.h"
29	#include "utility.h"
30
31	unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize)
32	{
33	outSize = dataLengthsizeof(float);
34	unsigned char* out = (unsigned char)malloc(dataLengthsizeof(float));
35	memcpy(out, data, dataLength*sizeof(float));
36	return out;
37	}
38	unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision)
39	{
40	size_t i = 0, radiusIndex;
41	float pred_value = 0, pred_err;
42	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
43	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
44	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
45	for(i=2;i<dataLength;i++)
46	{
47	if(i%confparams_cpr->sampleDistance==0)
48	{
49	//pred_value = 2*oriData[i-1] - oriData[i-2];
50	pred_value = oriData[i-1];
51	pred_err = fabs(pred_value - oriData[i]);
52	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
53	if(radiusIndex>=confparams_cpr->maxRangeRadius)
54	radiusIndex = confparams_cpr->maxRangeRadius - 1;
55	intervals[radiusIndex]++;
56	}
57	}
58	//compute the appropriate number
59	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
60	size_t sum = 0;
61	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
62	{
63	sum += intervals[i];
64	if(sum>targetCount)
65	break;
66	}
67	if(i>=confparams_cpr->maxRangeRadius)
68	i = confparams_cpr->maxRangeRadius-1;
69
70	unsigned int accIntervals = 2*(i+1);
71	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
72
73	if(powerOf2<32)
74	powerOf2 = 32;
75
76	free(intervals);
77	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
78	return powerOf2;
79	}
80
81	unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, double realPrecision)
82	{
83	size_t i,j, index;
84	size_t radiusIndex;
85	float pred_value = 0, pred_err;
86	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
87	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
88	size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance;
89
90	//float max = oriData[0];
91	//float min = oriData[0];
92
93	for(i=1;i<r1;i++)
94	{
95	for(j=1;j<r2;j++)
96	{
97	if((i+j)%confparams_cpr->sampleDistance==0)
98	{
99	index = i*r2+j;
100	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
101	pred_err = fabs(pred_value - oriData[index]);
102	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
103	if(radiusIndex>=confparams_cpr->maxRangeRadius)
104	radiusIndex = confparams_cpr->maxRangeRadius - 1;
105	intervals[radiusIndex]++;
106
107	// if (max < oriData[index]) max = oriData[index];
108	// if (min > oriData[index]) min = oriData[index];
109	}
110	}
111	}
112	//compute the appropriate number
113	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
114	size_t sum = 0;
115	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
116	{
117	sum += intervals[i];
118	if(sum>targetCount)
119	break;
120	}
121	if(i>=confparams_cpr->maxRangeRadius)
122	i = confparams_cpr->maxRangeRadius-1;
123	unsigned int accIntervals = 2*(i+1);
124	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
125
126	if(powerOf2<32)
127	powerOf2 = 32;
128
129	// struct timeval costStart, costEnd;
130	// double cost_est = 0;
131	//
132	// gettimeofday(&costStart, NULL);
133	//
134	// //compute estimate of bit-rate and distortion
135	// double est_br = 0;
136	// double est_psnr = 0;
137	// double c1 = log2(targetCount)+1;
138	// double c2 = -20.0log10(realPrecision) + 20.0log10(max-min) + 10.0*log10(3);
139	//
140	// for (i = 0; i < powerOf2/2; i++)
141	// {
142	// int count = intervals[i];
143	// if (count != 0)
144	// est_br += count*log2(count);
145	// est_psnr += count;
146	// }
147	//
148	// //compute estimate of bit-rate
149	// est_br -= c1*est_psnr;
150	// est_br /= totalSampleSize;
151	// est_br = -est_br;
152	//
153	// //compute estimate of psnr
154	// est_psnr /= totalSampleSize;
155	// printf ("sum of P(i) = %lf\n", est_psnr);
156	// est_psnr = -10.0*log10(est_psnr);
157	// est_psnr += c2;
158	//
159	// printf ("estimate bitrate = %.2f\n", est_br);
160	// printf ("estimate psnr = %.2f\n",est_psnr);
161	//
162	// gettimeofday(&costEnd, NULL);
163	// cost_est = ((costEnd.tv_sec1000000+costEnd.tv_usec)-(costStart.tv_sec1000000+costStart.tv_usec))/1000000.0;
164	//
165	// printf ("analysis time = %f\n", cost_est);
166
167	free(intervals);
168	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
169	return powerOf2;
170	}
171
172	unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
173	{
174	size_t i,j,k, index;
175	size_t radiusIndex;
176	size_t r23=r2*r3;
177	float pred_value = 0, pred_err;
178	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
179	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
180	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)/confparams_cpr->sampleDistance;
181
182	//float max = oriData[0];
183	//float min = oriData[0];
184
185	for(i=1;i<r1;i++)
186	{
187	for(j=1;j<r2;j++)
188	{
189	for(k=1;k<r3;k++)
190	{
191	if((i+j+k)%confparams_cpr->sampleDistance==0)
192	{
193	index = ir23+jr3+k;
194	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
195	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
196	pred_err = fabs(pred_value - oriData[index]);
197	radiusIndex = (pred_err/realPrecision+1)/2;
198	if(radiusIndex>=confparams_cpr->maxRangeRadius)
199	{
200	radiusIndex = confparams_cpr->maxRangeRadius - 1;
201	//printf("radiusIndex=%d\n", radiusIndex);
202	}
203	intervals[radiusIndex]++;
204
205	// if (max < oriData[index]) max = oriData[index];
206	// if (min > oriData[index]) min = oriData[index];
207	}
208	}
209	}
210	}
211	//compute the appropriate number
212	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
213	size_t sum = 0;
214	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
215	{
216	sum += intervals[i];
217	if(sum>targetCount)
218	break;
219	}
220	if(i>=confparams_cpr->maxRangeRadius)
221	i = confparams_cpr->maxRangeRadius-1;
222	unsigned int accIntervals = 2*(i+1);
223	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
224
225	if(powerOf2<32)
226	powerOf2 = 32;
227
228	// struct timeval costStart, costEnd;
229	// double cost_est = 0;
230	//
231	// gettimeofday(&costStart, NULL);
232	//
233	// //compute estimate of bit-rate and distortion
234	// double est_br = 0;
235	// double est_psnr = 0;
236	// double c1 = log2(targetCount)+1;
237	// double c2 = -20.0log10(realPrecision) + 20.0log10(max-min) + 10.0*log10(3);
238	//
239	// for (i = 0; i < powerOf2/2; i++)
240	// {
241	// int count = intervals[i];
242	// if (count != 0)
243	// est_br += count*log2(count);
244	// est_psnr += count;
245	// }
246	//
247	// //compute estimate of bit-rate
248	// est_br -= c1*est_psnr;
249	// est_br /= totalSampleSize;
250	// est_br = -est_br;
251	//
252	// //compute estimate of psnr
253	// est_psnr /= totalSampleSize;
254	// printf ("sum of P(i) = %lf\n", est_psnr);
255	// est_psnr = -10.0*log10(est_psnr);
256	// est_psnr += c2;
257	//
258	// printf ("estimate bitrate = %.2f\n", est_br);
259	// printf ("estimate psnr = %.2f\n",est_psnr);
260	//
261	// gettimeofday(&costEnd, NULL);
262	// cost_est = ((costEnd.tv_sec1000000+costEnd.tv_usec)-(costStart.tv_sec1000000+costStart.tv_usec))/1000000.0;
263	//
264	// printf ("analysis time = %f\n", cost_est);
265
266	free(intervals);
267	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
268	return powerOf2;
269	}
270
271
272	unsigned int optimize_intervals_float_4D(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision)
273	{
274	size_t i,j,k,l, index;
275	size_t radiusIndex;
276	size_t r234=r2r3r4;
277	size_t r34=r3*r4;
278	float pred_value = 0, pred_err;
279	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
280	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
281	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)*(r4-1)/confparams_cpr->sampleDistance;
282	for(i=1;i<r1;i++)
283	{
284	for(j=1;j<r2;j++)
285	{
286	for(k=1;k<r3;k++)
287	{
288	for (l=1;l<r4;l++)
289	{
290	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
291	{
292	index = ir234+jr34+k*r4+l;
293	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34]
294	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
295	pred_err = fabs(pred_value - oriData[index]);
296	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
297	if(radiusIndex>=confparams_cpr->maxRangeRadius)
298	radiusIndex = confparams_cpr->maxRangeRadius - 1;
299	intervals[radiusIndex]++;
300	}
301	}
302	}
303	}
304	}
305	//compute the appropriate number
306	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
307	size_t sum = 0;
308	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
309	{
310	sum += intervals[i];
311	if(sum>targetCount)
312	break;
313	}
314	if(i>=confparams_cpr->maxRangeRadius)
315	i = confparams_cpr->maxRangeRadius-1;
316
317	unsigned int accIntervals = 2*(i+1);
318	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
319
320	if(powerOf2<32)
321	powerOf2 = 32;
322
323	free(intervals);
324	return powerOf2;
325	}
326
327	TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData,
328	size_t dataLength, double realPrecision, float valueRangeSize, float medianValue_f)
329	{
330	#ifdef HAVE_TIMECMPR
331	float* decData = NULL;
332	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
333	decData = (float*)(multisteps->hist_data);
334	#endif
335
336	unsigned int quantization_intervals;
337	if(exe_params->optQuantMode==1)
338	quantization_intervals = optimize_intervals_float_1D_opt(oriData, dataLength, realPrecision);
339	else
340	quantization_intervals = exe_params->intvCapacity;
341	updateQuantizationInfo(quantization_intervals);
342
343	size_t i;
344	int reqLength;
345	float medianValue = medianValue_f;
346	short radExpo = getExponent_float(valueRangeSize/2);
347
348	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
349
350	int* type = (int) malloc(dataLengthsizeof(int));
351
352	float* spaceFillingValue = oriData; //
353
354	DynamicIntArray *exactLeadNumArray;
355	new_DIA(&exactLeadNumArray, DynArrayInitLen);
356
357	DynamicByteArray *exactMidByteArray;
358	new_DBA(&exactMidByteArray, DynArrayInitLen);
359
360	DynamicIntArray *resiBitArray;
361	new_DIA(&resiBitArray, DynArrayInitLen);
362
363	unsigned char preDataBytes[4];
364	intToBytes_bigEndian(preDataBytes, 0);
365
366	int reqBytesLength = reqLength/8;
367	int resiBitsLength = reqLength%8;
368	float last3CmprsData[3] = {0};
369
370	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
371	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
372
373	//add the first data
374	type[0] = 0;
375	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
376	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
377	memcpy(preDataBytes,vce->curBytes,4);
378	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
379	listAdd_float(last3CmprsData, vce->data);
380	#ifdef HAVE_TIMECMPR
381	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
382	decData[0] = vce->data;
383	#endif
384
385	//add the second data
386	type[1] = 0;
387	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
388	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
389	memcpy(preDataBytes,vce->curBytes,4);
390	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
391	listAdd_float(last3CmprsData, vce->data);
392	#ifdef HAVE_TIMECMPR
393	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
394	decData[1] = vce->data;
395	#endif
396	int state;
397	double checkRadius;
398	float curData;
399	float pred;
400	float predAbsErr;
401	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
402	double interval = 2*realPrecision;
403
404	for(i=2;i<dataLength;i++)
405	{
406	curData = spaceFillingValue[i];
407	//pred = 2*last3CmprsData[0] - last3CmprsData[1];
408	pred = last3CmprsData[0];
409	predAbsErr = fabs(curData - pred);
410	if(predAbsErr<checkRadius)
411	{
412	state = (predAbsErr/realPrecision+1)/2;
413	if(curData>=pred)
414	{
415	type[i] = exe_params->intvRadius+state;
416	pred = pred + state*interval;
417	}
418	else //curData<pred
419	{
420	type[i] = exe_params->intvRadius-state;
421	pred = pred - state*interval;
422	}
423
424	//double-check the prediction error in case of machine-epsilon impact
425	if(fabs(curData-pred)>realPrecision)
426	{
427	type[i] = 0;
428	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
429	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
430	memcpy(preDataBytes,vce->curBytes,4);
431	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
432
433	listAdd_float(last3CmprsData, vce->data);
434	#ifdef HAVE_TIMECMPR
435	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
436	decData[i] = vce->data;
437	#endif
438	}
439	else
440	{
441	listAdd_float(last3CmprsData, pred);
442	#ifdef HAVE_TIMECMPR
443	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
444	decData[i] = pred;
445	#endif
446	}
447	continue;
448	}
449
450	//unpredictable data processing
451	type[i] = 0;
452	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
453	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
454	memcpy(preDataBytes,vce->curBytes,4);
455	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
456
457	listAdd_float(last3CmprsData, vce->data);
458	#ifdef HAVE_TIMECMPR
459	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
460	decData[i] = vce->data;
461	#endif
462
463	}//end of for
464
465	// char* expSegmentsInBytes;
466	// int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes);
467	size_t exactDataNum = exactLeadNumArray->size;
468
469	TightDataPointStorageF* tdps;
470
471	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
472	type, exactMidByteArray->array, exactMidByteArray->size,
473	exactLeadNumArray->array,
474	resiBitArray->array, resiBitArray->size,
475	resiBitsLength,
476	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
477
478	//sdi:Debug
479	/* int sum =0;
480	for(i=0;i<dataLength;i++)
481	if(type[i]==0) sum++;
482	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
483
484	//free memory
485	free_DIA(exactLeadNumArray);
486	free_DIA(resiBitArray);
487	free(type);
488	free(vce);
489	free(lce);
490	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
491
492	return tdps;
493	}
494
495	void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, TightDataPointStorageF* tdps,
496	unsigned char** newByteData, size_t *outSize)
497	{
498	int floatSize=sizeof(float);
499	size_t k = 0, i;
500	tdps->isLossless = 1;
501	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + floatSize*dataLength;
502	newByteData = (unsigned char)malloc(totalByteLength);
503
504	unsigned char dsLengthBytes[8];
505	for (i = 0; i < 3; i++)//3
506	(*newByteData)[k++] = versionNumber[i];
507
508	if(exe_params->SZ_SIZE_TYPE==4)//1
509	(*newByteData)[k++] = 16; //00010000
510	else
511	(*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8
512
513	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
514	k = k + MetaDataByteLength;
515
516	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
517	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
518	(*newByteData)[k++] = dsLengthBytes[i];
519
520	if(sysEndianType==BIG_ENDIAN_SYSTEM)
521	memcpy((newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLengthfloatSize);
522	else
523	{
524	unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
525	for(i=0;i<dataLength;i++,p+=floatSize)
526	floatToBytes(p, oriData[i]);
527	}
528	*outSize = totalByteLength;
529	}
530
531	char SZ_compress_args_float_NoCkRngeNoGzip_1D(unsigned char** newByteData, float *oriData,
532	size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f)
533	{
534	char compressionType = 0;
535	TightDataPointStorageF* tdps = NULL;
536
537	#ifdef HAVE_TIMECMPR
538	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
539	{
540	int timestep = sz_tsc->currentStep;
541	if(timestep % confparams_cpr->snapshotCmprStep != 0)
542	{
543	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
544	compressionType = 1; //time-series based compression
545	}
546	else
547	{
548	tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
549	compressionType = 0; //snapshot-based compression
550	multisteps->lastSnapshotStep = timestep;
551	}
552	}
553	else
554	#endif
555	tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
556
557	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
558
559	if(outSize>dataLengthsizeof(float))
560	SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
561
562	free_TightDataPointStorageF(tdps);
563	return compressionType;
564	}
565
566	TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size_t r2, double realPrecision, float valueRangeSize, float medianValue_f)
567	{
568	#ifdef HAVE_TIMECMPR
569	float* decData = NULL;
570	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
571	decData = (float*)(multisteps->hist_data);
572	#endif
573
574	unsigned int quantization_intervals;
575	if(exe_params->optQuantMode==1)
576	{
577	quantization_intervals = optimize_intervals_float_2D_opt(oriData, r1, r2, realPrecision);
578	updateQuantizationInfo(quantization_intervals);
579	}
580	else
581	quantization_intervals = exe_params->intvCapacity;
582	size_t i,j;
583	int reqLength;
584	float pred1D, pred2D;
585	float diff = 0.0;
586	double itvNum = 0;
587	float P0, P1;
588
589	size_t dataLength = r1*r2;
590
591	P0 = (float)malloc(r2sizeof(float));
592	memset(P0, 0, r2*sizeof(float));
593	P1 = (float)malloc(r2sizeof(float));
594	memset(P1, 0, r2*sizeof(float));
595
596	float medianValue = medianValue_f;
597	short radExpo = getExponent_float(valueRangeSize/2);
598	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
599
600	int* type = (int) malloc(dataLengthsizeof(int));
601	//type[dataLength]=0;
602
603	float* spaceFillingValue = oriData; //
604
605	DynamicIntArray *exactLeadNumArray;
606	new_DIA(&exactLeadNumArray, DynArrayInitLen);
607
608	DynamicByteArray *exactMidByteArray;
609	new_DBA(&exactMidByteArray, DynArrayInitLen);
610
611	DynamicIntArray *resiBitArray;
612	new_DIA(&resiBitArray, DynArrayInitLen);
613
614	type[0] = 0;
615	unsigned char preDataBytes[4];
616	intToBytes_bigEndian(preDataBytes, 0);
617
618	int reqBytesLength = reqLength/8;
619	int resiBitsLength = reqLength%8;
620
621	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
622	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
623
624	/* Process Row-0 data 0*/
625	type[0] = 0;
626	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
627	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
628	memcpy(preDataBytes,vce->curBytes,4);
629	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
630	P1[0] = vce->data;
631	#ifdef HAVE_TIMECMPR
632	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
633	decData[0] = vce->data;
634	#endif
635
636	float curData;
637
638	/* Process Row-0 data 1*/
639	pred1D = P1[0];
640	curData = spaceFillingValue[1];
641	diff = curData - pred1D;
642
643	itvNum = fabs(diff)/realPrecision + 1;
644
645	if (itvNum < exe_params->intvCapacity)
646	{
647	if (diff < 0) itvNum = -itvNum;
648	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
649	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
650
651	//ganrantee comporession error against the case of machine-epsilon
652	if(fabs(spaceFillingValue[1]-P1[1])>realPrecision)
653	{
654	type[1] = 0;
655	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
656	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
657	memcpy(preDataBytes,vce->curBytes,4);
658	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
659
660	P1[1] = vce->data;
661	}
662	}
663	else
664	{
665	type[1] = 0;
666	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
667	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
668	memcpy(preDataBytes,vce->curBytes,4);
669	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
670	P1[1] = vce->data;
671	}
672	#ifdef HAVE_TIMECMPR
673	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
674	decData[1] = P1[1];
675	#endif
676
677	/* Process Row-0 data 2 --> data r2-1 */
678	for (j = 2; j < r2; j++)
679	{
680	pred1D = 2*P1[j-1] - P1[j-2];
681	curData = spaceFillingValue[j];
682	diff = curData - pred1D;
683
684	itvNum = fabs(diff)/realPrecision + 1;
685
686	if (itvNum < exe_params->intvCapacity)
687	{
688	if (diff < 0) itvNum = -itvNum;
689	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
690	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
691
692	//ganrantee comporession error against the case of machine-epsilon
693	if(fabs(curData-P1[j])>realPrecision)
694	{
695	type[j] = 0;
696	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
697	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
698	memcpy(preDataBytes,vce->curBytes,4);
699	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
700
701	P1[j] = vce->data;
702	}
703	}
704	else
705	{
706	type[j] = 0;
707	compressSingleFloatValue(vce,curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
708	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
709	memcpy(preDataBytes,vce->curBytes,4);
710	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
711	P1[j] = vce->data;
712	}
713	#ifdef HAVE_TIMECMPR
714	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
715	decData[j] = P1[j];
716	#endif
717	}
718
719	/* Process Row-1 --> Row-r1-1 */
720	size_t index;
721	for (i = 1; i < r1; i++)
722	{
723	/* Process row-i data 0 */
724	index = i*r2;
725	pred1D = P1[0];
726	curData = spaceFillingValue[index];
727	diff = curData - pred1D;
728
729	itvNum = fabs(diff)/realPrecision + 1;
730
731	if (itvNum < exe_params->intvCapacity)
732	{
733	if (diff < 0) itvNum = -itvNum;
734	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
735	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
736
737	//ganrantee comporession error against the case of machine-epsilon
738	if(fabs(curData-P0[0])>realPrecision)
739	{
740	type[index] = 0;
741	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
742	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
743	memcpy(preDataBytes,vce->curBytes,4);
744	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
745
746	P0[0] = vce->data;
747	}
748	}
749	else
750	{
751	type[index] = 0;
752	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
753	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
754	memcpy(preDataBytes,vce->curBytes,4);
755	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
756	P0[0] = vce->data;
757	}
758	#ifdef HAVE_TIMECMPR
759	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
760	decData[index] = P0[0];
761	#endif
762
763	/* Process row-i data 1 --> r2-1*/
764	for (j = 1; j < r2; j++)
765	{
766	index = i*r2+j;
767	pred2D = P0[j-1] + P1[j] - P1[j-1];
768
769	curData = spaceFillingValue[index];
770	diff = curData - pred2D;
771
772	itvNum = fabs(diff)/realPrecision + 1;
773
774	if (itvNum < exe_params->intvCapacity)
775	{
776	if (diff < 0) itvNum = -itvNum;
777	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
778	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
779
780	//ganrantee comporession error against the case of machine-epsilon
781	if(fabs(curData-P0[j])>realPrecision)
782	{
783	type[index] = 0;
784	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
785	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
786	memcpy(preDataBytes,vce->curBytes,4);
787	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
788
789	P0[j] = vce->data;
790	}
791	}
792	else
793	{
794	type[index] = 0;
795	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
796	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
797	memcpy(preDataBytes,vce->curBytes,4);
798	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
799	P0[j] = vce->data;
800	}
801	#ifdef HAVE_TIMECMPR
802	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
803	decData[index] = P0[j];
804	#endif
805	}
806
807	float *Pt;
808	Pt = P1;
809	P1 = P0;
810	P0 = Pt;
811	}
812
813	if(r2!=1)
814	free(P0);
815	free(P1);
816	size_t exactDataNum = exactLeadNumArray->size;
817
818	TightDataPointStorageF* tdps;
819
820	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
821	type, exactMidByteArray->array, exactMidByteArray->size,
822	exactLeadNumArray->array,
823	resiBitArray->array, resiBitArray->size,
824	resiBitsLength,
825	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
826
827	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
828	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
829
830	// for(i = 3800;i<3844;i++)
831	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
832
833	//free memory
834	free_DIA(exactLeadNumArray);
835	free_DIA(resiBitArray);
836	free(type);
837	free(vce);
838	free(lce);
839	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
840
841	return tdps;
842	}
843
844	/**
845	*
846	* Note: @r1 is high dimension
847	* @r2 is low dimension
848	* */
849	char SZ_compress_args_float_NoCkRngeNoGzip_2D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
850	{
851	size_t dataLength = r1*r2;
852	char compressionType = 0;
853	TightDataPointStorageF* tdps = NULL;
854
855	#ifdef HAVE_TIMECMPR
856	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
857	{
858	int timestep = sz_tsc->currentStep;
859	if(timestep % confparams_cpr->snapshotCmprStep != 0)
860	{
861	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
862	compressionType = 1; //time-series based compression
863	}
864	else
865	{
866	tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
867	compressionType = 0; //snapshot-based compression
868	multisteps->lastSnapshotStep = timestep;
869	}
870	}
871	else
872	#endif
873	tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
874
875	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
876
877	if(outSize>dataLengthsizeof(float))
878	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
879
880	free_TightDataPointStorageF(tdps);
881
882	return compressionType;
883	}
884
885	TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float valueRangeSize, float medianValue_f)
886	{
887	#ifdef HAVE_TIMECMPR
888	float* decData = NULL;
889	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
890	decData = (float*)(multisteps->hist_data);
891	#endif
892
893	unsigned int quantization_intervals;
894	if(exe_params->optQuantMode==1)
895	{
896	quantization_intervals = optimize_intervals_float_3D_opt(oriData, r1, r2, r3, realPrecision);
897	updateQuantizationInfo(quantization_intervals);
898	}
899	else
900	quantization_intervals = exe_params->intvCapacity;
901	size_t i,j,k;
902	int reqLength;
903	float pred1D, pred2D, pred3D;
904	float diff = 0.0;
905	double itvNum = 0;
906	float P0, P1;
907
908	size_t dataLength = r1r2r3;
909	size_t r23 = r2*r3;
910	P0 = (float)malloc(r23sizeof(float));
911	P1 = (float)malloc(r23sizeof(float));
912
913	float medianValue = medianValue_f;
914	short radExpo = getExponent_float(valueRangeSize/2);
915	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
916
917	int* type = (int) malloc(dataLengthsizeof(int));
918
919	float* spaceFillingValue = oriData; //
920
921	DynamicIntArray *exactLeadNumArray;
922	new_DIA(&exactLeadNumArray, DynArrayInitLen);
923
924	DynamicByteArray *exactMidByteArray;
925	new_DBA(&exactMidByteArray, DynArrayInitLen);
926
927	DynamicIntArray *resiBitArray;
928	new_DIA(&resiBitArray, DynArrayInitLen);
929
930	unsigned char preDataBytes[4];
931	intToBytes_bigEndian(preDataBytes, 0);
932
933	int reqBytesLength = reqLength/8;
934	int resiBitsLength = reqLength%8;
935
936	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
937	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
938
939
940	/////////////////////////// Process layer-0 ///////////////////////////
941	/* Process Row-0 data 0*/
942	type[0] = 0;
943	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
944	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
945	memcpy(preDataBytes,vce->curBytes,4);
946	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
947	P1[0] = vce->data;
948	#ifdef HAVE_TIMECMPR
949	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
950	decData[0] = P1[0];
951	#endif
952
953	float curData;
954
955	/* Process Row-0 data 1*/
956	pred1D = P1[0];
957	curData = spaceFillingValue[1];
958	diff = curData - pred1D;
959
960	itvNum = fabs(diff)/realPrecision + 1;
961
962	if (itvNum < exe_params->intvCapacity)
963	{
964	if (diff < 0) itvNum = -itvNum;
965	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
966	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
967
968	//ganrantee comporession error against the case of machine-epsilon
969	if(fabs(curData-P1[1])>realPrecision)
970	{
971	type[1] = 0;
972	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
973	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
974	memcpy(preDataBytes,vce->curBytes,4);
975	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
976
977	P1[1] = vce->data;
978	}
979	}
980	else
981	{
982	type[1] = 0;
983	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
984	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
985	memcpy(preDataBytes,vce->curBytes,4);
986	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
987	P1[1] = vce->data;
988	}
989	#ifdef HAVE_TIMECMPR
990	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
991	decData[1] = P1[1];
992	#endif
993
994	/* Process Row-0 data 2 --> data r3-1 */
995	for (j = 2; j < r3; j++)
996	{
997	pred1D = 2*P1[j-1] - P1[j-2];
998	curData = spaceFillingValue[j];
999	diff = curData - pred1D;
1000
1001	itvNum = fabs(diff)/realPrecision + 1;
1002
1003	if (itvNum < exe_params->intvCapacity)
1004	{
1005	if (diff < 0) itvNum = -itvNum;
1006	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
1007	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
1008
1009	//ganrantee comporession error against the case of machine-epsilon
1010	if(fabs(curData-P1[j])>realPrecision)
1011	{
1012	type[j] = 0;
1013	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1014	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1015	memcpy(preDataBytes,vce->curBytes,4);
1016	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1017
1018	P1[j] = vce->data;
1019	}
1020	}
1021	else
1022	{
1023	type[j] = 0;
1024	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1025	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1026	memcpy(preDataBytes,vce->curBytes,4);
1027	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1028	P1[j] = vce->data;
1029	}
1030	#ifdef HAVE_TIMECMPR
1031	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1032	decData[j] = P1[j];
1033	#endif
1034	}
1035
1036	/* Process Row-1 --> Row-r2-1 */
1037	size_t index;
1038	for (i = 1; i < r2; i++)
1039	{
1040	/* Process row-i data 0 */
1041	index = i*r3;
1042	pred1D = P1[index-r3];
1043	curData = spaceFillingValue[index];
1044	diff = curData - pred1D;
1045
1046	itvNum = fabs(diff)/realPrecision + 1;
1047
1048	if (itvNum < exe_params->intvCapacity)
1049	{
1050	if (diff < 0) itvNum = -itvNum;
1051	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1052	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1053
1054	//ganrantee comporession error against the case of machine-epsilon
1055	if(fabs(curData-P1[index])>realPrecision)
1056	{
1057	type[index] = 0;
1058	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1059	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1060	memcpy(preDataBytes,vce->curBytes,4);
1061	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1062
1063	P1[index] = vce->data;
1064	}
1065	}
1066	else
1067	{
1068	type[index] = 0;
1069	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1070	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1071	memcpy(preDataBytes,vce->curBytes,4);
1072	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1073	P1[index] = vce->data;
1074	}
1075	#ifdef HAVE_TIMECMPR
1076	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1077	decData[index] = P1[index];
1078	#endif
1079
1080	/* Process row-i data 1 --> data r3-1*/
1081	for (j = 1; j < r3; j++)
1082	{
1083	index = i*r3+j;
1084	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
1085
1086	curData = spaceFillingValue[index];
1087	diff = curData - pred2D;
1088
1089	itvNum = fabs(diff)/realPrecision + 1;
1090
1091	if (itvNum < exe_params->intvCapacity)
1092	{
1093	if (diff < 0) itvNum = -itvNum;
1094	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1095	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1096
1097	//ganrantee comporession error against the case of machine-epsilon
1098	if(fabs(curData-P1[index])>realPrecision)
1099	{
1100	type[index] = 0;
1101	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1102	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1103	memcpy(preDataBytes,vce->curBytes,4);
1104	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1105
1106	P1[index] = vce->data;
1107	}
1108	}
1109	else
1110	{
1111	type[index] = 0;
1112	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1113	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1114	memcpy(preDataBytes,vce->curBytes,4);
1115	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1116	P1[index] = vce->data;
1117	}
1118	#ifdef HAVE_TIMECMPR
1119	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1120	decData[index] = P1[index];
1121	#endif
1122	}
1123	}
1124
1125
1126	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
1127
1128	for (k = 1; k < r1; k++)
1129	{
1130	/* Process Row-0 data 0*/
1131	index = k*r23;
1132	pred1D = P1[0];
1133	curData = spaceFillingValue[index];
1134	diff = curData - pred1D;
1135
1136	itvNum = fabs(diff)/realPrecision + 1;
1137
1138	if (itvNum < exe_params->intvCapacity)
1139	{
1140	if (diff < 0) itvNum = -itvNum;
1141	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1142	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1143
1144	//ganrantee comporession error against the case of machine-epsilon
1145	if(fabs(curData-P0[0])>realPrecision)
1146	{
1147	type[index] = 0;
1148	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1149	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1150	memcpy(preDataBytes,vce->curBytes,4);
1151	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1152
1153	P0[0] = vce->data;
1154	}
1155	}
1156	else
1157	{
1158	type[index] = 0;
1159	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1160	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1161	memcpy(preDataBytes,vce->curBytes,4);
1162	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1163	P0[0] = vce->data;
1164	}
1165	#ifdef HAVE_TIMECMPR
1166	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1167	decData[index] = P0[0];
1168	#endif
1169
1170	/* Process Row-0 data 1 --> data r3-1 */
1171	for (j = 1; j < r3; j++)
1172	{
1173	//index = kr2r3+j;
1174	index ++;
1175	pred2D = P0[j-1] + P1[j] - P1[j-1];
1176	curData = spaceFillingValue[index];
1177	diff = spaceFillingValue[index] - pred2D;
1178
1179	itvNum = fabs(diff)/realPrecision + 1;
1180
1181	if (itvNum < exe_params->intvCapacity)
1182	{
1183	if (diff < 0) itvNum = -itvNum;
1184	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1185	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1186	//ganrantee comporession error against the case of machine-epsilon
1187	if(fabs(curData-P0[j])>realPrecision)
1188	{
1189	type[index] = 0;
1190	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1191	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1192	memcpy(preDataBytes,vce->curBytes,4);
1193	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1194
1195	P0[j] = vce->data;
1196	}
1197	}
1198	else
1199	{
1200	type[index] = 0;
1201	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1202	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1203	memcpy(preDataBytes,vce->curBytes,4);
1204	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1205	P0[j] = vce->data;
1206	}
1207	#ifdef HAVE_TIMECMPR
1208	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1209	decData[index] = P0[j];
1210	#endif
1211	}
1212
1213	/* Process Row-1 --> Row-r2-1 */
1214	size_t index2D;
1215	for (i = 1; i < r2; i++)
1216	{
1217	/* Process Row-i data 0 */
1218	index = kr23 + ir3;
1219	index2D = i*r3;
1220	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
1221	curData = spaceFillingValue[index];
1222	diff = spaceFillingValue[index] - pred2D;
1223
1224	itvNum = fabs(diff)/realPrecision + 1;
1225
1226	if (itvNum < exe_params->intvCapacity)
1227	{
1228	if (diff < 0) itvNum = -itvNum;
1229	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1230	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1231	//ganrantee comporession error against the case of machine-epsilon
1232	if(fabs(curData-P0[index2D])>realPrecision)
1233	{
1234	type[index] = 0;
1235	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1236	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1237	memcpy(preDataBytes,vce->curBytes,4);
1238	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1239
1240	P0[index2D] = vce->data;
1241	}
1242	}
1243	else
1244	{
1245	type[index] = 0;
1246	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1247	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1248	memcpy(preDataBytes,vce->curBytes,4);
1249	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1250	P0[index2D] = vce->data;
1251	}
1252	#ifdef HAVE_TIMECMPR
1253	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1254	decData[index] = P0[index2D];
1255	#endif
1256
1257	/* Process Row-i data 1 --> data r3-1 */
1258	for (j = 1; j < r3; j++)
1259	{
1260	// if(k==63&&i==43&&j==27)
1261	// printf("i=%d\n", i);
1262	//index = kr2r3 + i*r3 + j;
1263	index ++;
1264	index2D = i*r3 + j;
1265	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
1266	curData = spaceFillingValue[index];
1267	diff = curData - pred3D;
1268
1269	itvNum = fabs(diff)/realPrecision + 1;
1270
1271	if (itvNum < exe_params->intvCapacity)
1272	{
1273	if (diff < 0) itvNum = -itvNum;
1274	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1275	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1276
1277	//ganrantee comporession error against the case of machine-epsilon
1278	if(fabs(curData-P0[index2D])>realPrecision)
1279	{
1280	type[index] = 0;
1281	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1282	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1283	memcpy(preDataBytes,vce->curBytes,4);
1284	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1285
1286	P0[index2D] = vce->data;
1287	}
1288	}
1289	else
1290	{
1291	type[index] = 0;
1292	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1293	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1294	memcpy(preDataBytes,vce->curBytes,4);
1295	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1296	P0[index2D] = vce->data;
1297	}
1298	#ifdef HAVE_TIMECMPR
1299	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1300	decData[index] = P0[index2D];
1301	#endif
1302	}
1303	}
1304
1305	float *Pt;
1306	Pt = P1;
1307	P1 = P0;
1308	P0 = Pt;
1309	}
1310	if(r23!=1)
1311	free(P0);
1312	free(P1);
1313	size_t exactDataNum = exactLeadNumArray->size;
1314
1315	TightDataPointStorageF* tdps;
1316
1317	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
1318	type, exactMidByteArray->array, exactMidByteArray->size,
1319	exactLeadNumArray->array,
1320	resiBitArray->array, resiBitArray->size,
1321	resiBitsLength,
1322	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1323
1324	//sdi:Debug
1325	/* int sum =0;
1326	for(i=0;i<dataLength;i++)
1327	if(type[i]==0) sum++;
1328	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
1329
1330
1331	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
1332	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
1333
1334	//free memory
1335	free_DIA(exactLeadNumArray);
1336	free_DIA(resiBitArray);
1337	free(type);
1338	free(vce);
1339	free(lce);
1340	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1341
1342	return tdps;
1343	}
1344
1345	char SZ_compress_args_float_NoCkRngeNoGzip_3D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
1346	{
1347	size_t dataLength = r1r2r3;
1348	char compressionType = 0;
1349	TightDataPointStorageF* tdps = NULL;
1350
1351	#ifdef HAVE_TIMECMPR
1352	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1353	{
1354	int timestep = sz_tsc->currentStep;
1355	if(timestep % confparams_cpr->snapshotCmprStep != 0)
1356	{
1357	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
1358	compressionType = 1; //time-series based compression
1359	}
1360	else
1361	{
1362	if(sz_with_regression == SZ_NO_REGRESSION)
1363	tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
1364	else
1365	*newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
1366	compressionType = 0; //snapshot-based compression
1367	multisteps->lastSnapshotStep = timestep;
1368	}
1369	}
1370	else
1371	#endif
1372	tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
1373
1374	if(tdps!=NULL)
1375	{
1376	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
1377	if(outSize>dataLengthsizeof(float))
1378	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1379	free_TightDataPointStorageF(tdps);
1380	}
1381
1382	return compressionType;
1383	}
1384
1385
1386	TightDataPointStorageF* SZ_compress_float_4D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, float valueRangeSize, float medianValue_f)
1387	{
1388	unsigned int quantization_intervals;
1389	if(exe_params->optQuantMode==1)
1390	{
1391	quantization_intervals = optimize_intervals_float_4D(oriData, r1, r2, r3, r4, realPrecision);
1392	updateQuantizationInfo(quantization_intervals);
1393	}
1394	else
1395	quantization_intervals = exe_params->intvCapacity;
1396
1397	size_t i,j,k;
1398	int reqLength;
1399	float pred1D, pred2D, pred3D;
1400	float diff = 0.0;
1401	double itvNum = 0;
1402	float P0, P1;
1403
1404	size_t dataLength = r1r2r3*r4;
1405
1406	size_t r234 = r2r3r4;
1407	size_t r34 = r3*r4;
1408
1409	P0 = (float)malloc(r34sizeof(float));
1410	P1 = (float)malloc(r34sizeof(float));
1411
1412	float medianValue = medianValue_f;
1413	short radExpo = getExponent_float(valueRangeSize/2);
1414	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
1415
1416	int* type = (int) malloc(dataLengthsizeof(int));
1417
1418	float* spaceFillingValue = oriData; //
1419
1420	DynamicIntArray *exactLeadNumArray;
1421	new_DIA(&exactLeadNumArray, DynArrayInitLen);
1422
1423	DynamicByteArray *exactMidByteArray;
1424	new_DBA(&exactMidByteArray, DynArrayInitLen);
1425
1426	DynamicIntArray *resiBitArray;
1427	new_DIA(&resiBitArray, DynArrayInitLen);
1428
1429	unsigned char preDataBytes[4];
1430	intToBytes_bigEndian(preDataBytes, 0);
1431
1432	int reqBytesLength = reqLength/8;
1433	int resiBitsLength = reqLength%8;
1434
1435	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
1436	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
1437
1438
1439	size_t l;
1440	for (l = 0; l < r1; l++)
1441	{
1442
1443	/////////////////////////// Process layer-0 ///////////////////////////
1444	/* Process Row-0 data 0*/
1445	size_t index = l*r234;
1446	size_t index2D = 0;
1447
1448	type[index] = 0;
1449	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1450	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1451	memcpy(preDataBytes,vce->curBytes,4);
1452	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1453	P1[index2D] = vce->data;
1454
1455	/* Process Row-0 data 1*/
1456	index = l*r234+1;
1457	index2D = 1;
1458
1459	pred1D = P1[index2D-1];
1460	diff = spaceFillingValue[index] - pred1D;
1461
1462	itvNum = fabs(diff)/realPrecision + 1;
1463
1464	if (itvNum < exe_params->intvCapacity)
1465	{
1466	if (diff < 0) itvNum = -itvNum;
1467	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1468	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1469	}
1470	else
1471	{
1472	type[index] = 0;
1473	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1474	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1475	memcpy(preDataBytes,vce->curBytes,4);
1476	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1477	P1[index2D] = vce->data;
1478	}
1479
1480	/* Process Row-0 data 2 --> data r4-1 */
1481	for (j = 2; j < r4; j++)
1482	{
1483	index = l*r234+j;
1484	index2D = j;
1485
1486	pred1D = 2*P1[index2D-1] - P1[index2D-2];
1487	diff = spaceFillingValue[index] - pred1D;
1488
1489	itvNum = fabs(diff)/realPrecision + 1;
1490
1491	if (itvNum < exe_params->intvCapacity)
1492	{
1493	if (diff < 0) itvNum = -itvNum;
1494	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1495	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1496	}
1497	else
1498	{
1499	type[index] = 0;
1500	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1501	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1502	memcpy(preDataBytes,vce->curBytes,4);
1503	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1504	P1[index2D] = vce->data;
1505	}
1506	}
1507
1508	/* Process Row-1 --> Row-r3-1 */
1509	for (i = 1; i < r3; i++)
1510	{
1511	/* Process row-i data 0 */
1512	index = lr234+ir4;
1513	index2D = i*r4;
1514
1515	pred1D = P1[index2D-r4];
1516	diff = spaceFillingValue[index] - pred1D;
1517
1518	itvNum = fabs(diff)/realPrecision + 1;
1519
1520	if (itvNum < exe_params->intvCapacity)
1521	{
1522	if (diff < 0) itvNum = -itvNum;
1523	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1524	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1525	}
1526	else
1527	{
1528	type[index] = 0;
1529	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1530	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1531	memcpy(preDataBytes,vce->curBytes,4);
1532	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1533	P1[index2D] = vce->data;
1534	}
1535
1536	/* Process row-i data 1 --> data r4-1*/
1537	for (j = 1; j < r4; j++)
1538	{
1539	index = lr234+ir4+j;
1540	index2D = i*r4+j;
1541
1542	pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1];
1543
1544	diff = spaceFillingValue[index] - pred2D;
1545
1546	itvNum = fabs(diff)/realPrecision + 1;
1547
1548	if (itvNum < exe_params->intvCapacity)
1549	{
1550	if (diff < 0) itvNum = -itvNum;
1551	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1552	P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1553	}
1554	else
1555	{
1556	type[index] = 0;
1557	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1558	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1559	memcpy(preDataBytes,vce->curBytes,4);
1560	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1561	P1[index2D] = vce->data;
1562	}
1563	}
1564	}
1565
1566
1567	/////////////////////////// Process layer-1 --> layer-r2-1 ///////////////////////////
1568
1569	for (k = 1; k < r2; k++)
1570	{
1571	/* Process Row-0 data 0*/
1572	index = lr234+kr34;
1573	index2D = 0;
1574
1575	pred1D = P1[index2D];
1576	diff = spaceFillingValue[index] - pred1D;
1577
1578	itvNum = fabs(diff)/realPrecision + 1;
1579
1580	if (itvNum < exe_params->intvCapacity)
1581	{
1582	if (diff < 0) itvNum = -itvNum;
1583	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1584	P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1585	}
1586	else
1587	{
1588	type[index] = 0;
1589	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1590	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1591	memcpy(preDataBytes,vce->curBytes,4);
1592	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1593	P0[index2D] = vce->data;
1594	}
1595
1596	/* Process Row-0 data 1 --> data r4-1 */
1597	for (j = 1; j < r4; j++)
1598	{
1599	index = lr234+kr34+j;
1600	index2D = j;
1601
1602	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
1603	diff = spaceFillingValue[index] - pred2D;
1604
1605	itvNum = fabs(diff)/realPrecision + 1;
1606
1607	if (itvNum < exe_params->intvCapacity)
1608	{
1609	if (diff < 0) itvNum = -itvNum;
1610	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1611	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1612	}
1613	else
1614	{
1615	type[index] = 0;
1616	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1617	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1618	memcpy(preDataBytes,vce->curBytes,4);
1619	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1620	P0[index2D] = vce->data;
1621	}
1622	}
1623
1624	/* Process Row-1 --> Row-r3-1 */
1625	for (i = 1; i < r3; i++)
1626	{
1627	/* Process Row-i data 0 */
1628	index = lr234+kr34+i*r4;
1629	index2D = i*r4;
1630
1631	pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4];
1632	diff = spaceFillingValue[index] - pred2D;
1633
1634	itvNum = fabs(diff)/realPrecision + 1;
1635
1636	if (itvNum < exe_params->intvCapacity)
1637	{
1638	if (diff < 0) itvNum = -itvNum;
1639	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1640	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1641	}
1642	else
1643	{
1644	type[index] = 0;
1645	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1646	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1647	memcpy(preDataBytes,vce->curBytes,4);
1648	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1649	P0[index2D] = vce->data;
1650	}
1651
1652	/* Process Row-i data 1 --> data r4-1 */
1653	for (j = 1; j < r4; j++)
1654	{
1655	index = lr234+kr34+i*r4+j;
1656	index2D = i*r4+j;
1657
1658	pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1];
1659	diff = spaceFillingValue[index] - pred3D;
1660
1661
1662	itvNum = fabs(diff)/realPrecision + 1;
1663
1664	if (itvNum < exe_params->intvCapacity)
1665	{
1666	if (diff < 0) itvNum = -itvNum;
1667	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
1668	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
1669	}
1670	else
1671	{
1672	type[index] = 0;
1673	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
1674	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
1675	memcpy(preDataBytes,vce->curBytes,4);
1676	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
1677	P0[index2D] = vce->data;
1678	}
1679	}
1680	}
1681
1682	float *Pt;
1683	Pt = P1;
1684	P1 = P0;
1685	P0 = Pt;
1686	}
1687	}
1688
1689	free(P0);
1690	free(P1);
1691	size_t exactDataNum = exactLeadNumArray->size;
1692
1693	TightDataPointStorageF* tdps;
1694
1695	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
1696	type, exactMidByteArray->array, exactMidByteArray->size,
1697	exactLeadNumArray->array,
1698	resiBitArray->array, resiBitArray->size,
1699	resiBitsLength,
1700	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
1701
1702	//free memory
1703	free_DIA(exactLeadNumArray);
1704	free_DIA(resiBitArray);
1705	free(type);
1706	free(vce);
1707	free(lce);
1708	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
1709
1710	return tdps;
1711	}
1712
1713	char SZ_compress_args_float_NoCkRngeNoGzip_4D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
1714	{
1715	TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_f);
1716
1717	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
1718
1719	int dataLength = r1r2r3*r4;
1720	if(outSize>dataLengthsizeof(float))
1721	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
1722
1723	free_TightDataPointStorageF(tdps);
1724
1725	return 0;
1726	}
1727
1728	void SZ_compress_args_float_withinRange(unsigned char** newByteData, float oriData, size_t dataLength, size_t outSize)
1729	{
1730	TightDataPointStorageF* tdps = (TightDataPointStorageF*) malloc(sizeof(TightDataPointStorageF));
1731	tdps->rtypeArray = NULL;
1732	tdps->typeArray = NULL;
1733	tdps->leadNumArray = NULL;
1734	tdps->residualMidBits = NULL;
1735
1736	tdps->allSameData = 1;
1737	tdps->dataSeriesLength = dataLength;
1738	tdps->exactMidBytes = (unsigned char)malloc(sizeof(unsigned char)4);
1739	tdps->pwrErrBoundBytes = NULL;
1740	tdps->isLossless = 0;
1741	float value = oriData[0];
1742	floatToBytes(tdps->exactMidBytes, value);
1743	tdps->exactMidBytes_size = 4;
1744
1745	size_t tmpOutSize;
1746	//unsigned char *tmpByteData;
1747	convertTDPStoFlatBytes_float(tdps, newByteData, &tmpOutSize);
1748
1749	//newByteData = (unsigned char)malloc(sizeof(unsigned char)*12); //for floating-point data (1+3+4+4)
1750	//memcpy(*newByteData, tmpByteData, 12);
1751	*outSize = tmpOutSize; //8+SZ_SIZE_TYPE; //8==3+1+4(float_size)
1752	free_TightDataPointStorageF(tdps);
1753	}
1754
1755	int SZ_compress_args_float_wRngeNoGzip(unsigned char** newByteData, float *oriData,
1756	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1757	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
1758	{
1759	int status = SZ_SCES;
1760	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1761	float valueRangeSize = 0, medianValue = 0;
1762
1763	float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
1764	float max = min+valueRangeSize;
1765	double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1766
1767	if(valueRangeSize <= realPrecision)
1768	{
1769	SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
1770	}
1771	else
1772	{
1773	// SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize);
1774	if(r5==0&&r4==0&&r3==0&&r2==0)
1775	{
1776	if(errBoundMode>=PW_REL)
1777	{
1778	SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
1779	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
1780	}
1781	else
1782	SZ_compress_args_float_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
1783	}
1784	else if(r5==0&&r4==0&&r3==0)
1785	{
1786	if(errBoundMode>=PW_REL)
1787	SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max);
1788	else
1789	SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1790	}
1791	else if(r5==0&&r4==0)
1792	{
1793	if(errBoundMode>=PW_REL)
1794	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max);
1795	else
1796	SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1797	}
1798	else if(r5==0)
1799	{
1800	if(errBoundMode>=PW_REL)
1801	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max);
1802	else
1803	SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
1804	}
1805	}
1806	return status;
1807	}
1808
1809	int SZ_compress_args_float(unsigned char** newByteData, float *oriData,
1810	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
1811	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
1812	{
1813	confparams_cpr->errorBoundMode = errBoundMode;
1814	if(errBoundMode==PW_REL)
1815	{
1816	confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
1817	//confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
1818	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE && r3 != 0 )
1819	{
1820	printf("Error: Current version doesn't support 3D data compression with point-wise relative error bound being based on pwrType=AVG\n");
1821	exit(0);
1822	return SZ_NSCS;
1823	}
1824	}
1825	int status = SZ_SCES;
1826	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
1827
1828	if(dataLength <= MIN_NUM_OF_ELEMENTS)
1829	{
1830	*newByteData = SZ_skip_compress_float(oriData, dataLength, outSize);
1831	return status;
1832	}
1833
1834	float valueRangeSize = 0, medianValue = 0;
1835
1836	float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
1837	float max = min+valueRangeSize;
1838	double realPrecision = 0;
1839
1840	if(confparams_cpr->errorBoundMode==PSNR)
1841	{
1842	confparams_cpr->errorBoundMode = ABS;
1843	realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize);
1844	//printf("realPrecision=%lf\n", realPrecision);
1845	}
1846	else
1847	realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1848
1849	if(valueRangeSize <= realPrecision)
1850	{
1851	SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
1852	}
1853	else
1854	{
1855	size_t tmpOutSize = 0;
1856	unsigned char* tmpByteData;
1857
1858	if (r2==0)
1859	{
1860	if(confparams_cpr->errorBoundMode>=PW_REL)
1861	{
1862	SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
1863	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
1864	}
1865	else
1866	#ifdef HAVE_TIMECMPR
1867	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1868	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1869	else
1870	#endif
1871	SZ_compress_args_float_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1872	}
1873	else
1874	if (r3==0)
1875	{
1876	if(confparams_cpr->errorBoundMode>=PW_REL)
1877	SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
1878	else
1879	#ifdef HAVE_TIMECMPR
1880	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1881	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1882	else
1883	#endif
1884	{
1885	if(sz_with_regression == SZ_NO_REGRESSION)
1886	SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1887	else
1888	tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
1889	}
1890	}
1891	else
1892	if (r4==0)
1893	{
1894	if(confparams_cpr->errorBoundMode>=PW_REL)
1895	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
1896	else
1897	#ifdef HAVE_TIMECMPR
1898	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1899	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1900	else
1901	#endif
1902	{
1903	if(sz_with_regression == SZ_NO_REGRESSION)
1904	SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1905	else
1906	tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
1907	}
1908	}
1909	else
1910	if (r5==0)
1911	{
1912	if(confparams_cpr->errorBoundMode>=PW_REL)
1913	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
1914	//ToDO
1915	//SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr(&tmpByteData, oriData, r4, r3, r2, r1, &tmpOutSize, min, max);
1916	else
1917	#ifdef HAVE_TIMECMPR
1918	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
1919	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1920	else
1921	#endif
1922	{
1923	if(sz_with_regression == SZ_NO_REGRESSION)
1924	SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
1925	else
1926	tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
1927	}
1928	}
1929	else
1930	{
1931	printf("Error: doesn't support 5 dimensions for now.\n");
1932	status = SZ_DERR; //dimension error
1933	}
1934	//Call Gzip to do the further compression.
1935	if(confparams_cpr->szMode==SZ_BEST_SPEED)
1936	{
1937	*outSize = tmpOutSize;
1938	*newByteData = tmpByteData;
1939	}
1940	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION \|\| confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION)
1941	{
1942	*outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
1943	free(tmpByteData);
1944	}
1945	else
1946	{
1947	printf("Error: Wrong setting of confparams_cpr->szMode in the float compression.\n");
1948	status = SZ_MERR; //mode error
1949	}
1950	}
1951
1952	return status;
1953	}
1954
1955
1956	void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue)
1957	{
1958	short reqExpo = getPrecisionReqLength_double(realPrecision);
1959	*reqLength = 9+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength
1960	if(*reqLength<9)
1961	*reqLength = 9;
1962	if(*reqLength>32)
1963	{
1964	*reqLength = 32;
1965	*medianValue = 0;
1966	}
1967	}
1968
1969	//TODO
1970	int SZ_compress_args_float_subblock(unsigned char* compressedBytes, float *oriData,
1971	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
1972	size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
1973	size_t e5, size_t e4, size_t e3, size_t e2, size_t e1,
1974	size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio)
1975	{
1976	int status = SZ_SCES;
1977	float valueRangeSize = 0, medianValue = 0;
1978	computeRangeSize_float_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1);
1979
1980	double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
1981
1982	if(valueRangeSize <= realPrecision)
1983	{
1984	//TODO
1985	//SZ_compress_args_float_withinRange_subblock();
1986	}
1987	else
1988	{
1989	if (r2==0)
1990	{
1991	if(errBoundMode>=PW_REL)
1992	{
1993	//TODO
1994	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_subblock();
1995	printf ("Current subblock version does not support point-wise relative error bound.\n");
1996	}
1997	else
1998	SZ_compress_args_float_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1);
1999	}
2000	else
2001	if (r3==0)
2002	{
2003	//TODO
2004	if(errBoundMode>=PW_REL)
2005	{
2006	//TODO
2007	//SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_subblock();
2008	printf ("Current subblock version does not support point-wise relative error bound.\n");
2009	}
2010	else
2011	SZ_compress_args_float_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1);
2012	}
2013	else
2014	if (r4==0)
2015	{
2016	if(errBoundMode>=PW_REL)
2017	{
2018	//TODO
2019	//SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_subblock();
2020	printf ("Current subblock version does not support point-wise relative error bound.\n");
2021	}
2022	else
2023	SZ_compress_args_float_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1);
2024	}
2025	else
2026	if (r5==0)
2027	{
2028	if(errBoundMode>=PW_REL)
2029	{
2030	//TODO
2031	//SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr_subblock();
2032	printf ("Current subblock version does not support point-wise relative error bound.\n");
2033	}
2034	else
2035	SZ_compress_args_float_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
2036	}
2037	else
2038	{
2039	printf("Error: doesn't support 5 dimensions for now.\n");
2040	status = SZ_DERR; //dimension error
2041	}
2042	}
2043	return status;
2044	}
2045
2046	void SZ_compress_args_float_NoCkRnge_1D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2047	size_t r1, size_t s1, size_t e1)
2048	{
2049	TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r1, s1, e1);
2050
2051	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2052	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2053	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2054	{
2055	unsigned char *tmpCompBytes;
2056	size_t tmpOutSize;
2057	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2058	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2059	free(tmpCompBytes);
2060	}
2061	else
2062	{
2063	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2064	}
2065
2066	//TODO
2067	// if(outSize>dataLengthsizeof(float))
2068	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2069
2070	free_TightDataPointStorageF(tdps);
2071	}
2072
2073	void SZ_compress_args_float_NoCkRnge_2D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2074	size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1)
2075	{
2076	TightDataPointStorageF* tdps = SZ_compress_float_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r2, r1, s2, s1, e2, e1);
2077
2078	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2079	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2080	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2081	{
2082	unsigned char *tmpCompBytes;
2083	size_t tmpOutSize;
2084	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2085	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2086	free(tmpCompBytes);
2087	}
2088	else
2089	{
2090	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2091	}
2092
2093	//TODO
2094	// if(outSize>dataLengthsizeof(float))
2095	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2096
2097	free_TightDataPointStorageF(tdps);
2098	}
2099
2100	void SZ_compress_args_float_NoCkRnge_3D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2101	size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1)
2102	{
2103	TightDataPointStorageF* tdps = SZ_compress_float_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r3, r2, r1, s3, s2, s1, e3, e2, e1);
2104
2105	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2106	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2107	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2108	{
2109	unsigned char *tmpCompBytes;
2110	size_t tmpOutSize;
2111	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2112	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2113	free(tmpCompBytes);
2114	}
2115	else
2116	{
2117	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2118	}
2119
2120	//TODO
2121	// if(outSize>dataLengthsizeof(float))
2122	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2123
2124	free_TightDataPointStorageF(tdps);
2125	}
2126
2127	void SZ_compress_args_float_NoCkRnge_4D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
2128	size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1)
2129	{
2130	TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
2131
2132	if (confparams_cpr->szMode==SZ_BEST_SPEED)
2133	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
2134	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
2135	{
2136	unsigned char *tmpCompBytes;
2137	size_t tmpOutSize;
2138	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
2139	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
2140	free(tmpCompBytes);
2141	}
2142	else
2143	{
2144	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
2145	}
2146
2147	//TODO
2148	// if(outSize>dataLengthsizeof(float))
2149	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
2150
2151	free_TightDataPointStorageF(tdps);
2152
2153	}
2154
2155	unsigned int optimize_intervals_float_1D_subblock(float *oriData, double realPrecision, size_t r1, size_t s1, size_t e1)
2156	{
2157	size_t dataLength = e1 - s1 + 1;
2158	oriData = oriData + s1;
2159
2160	size_t i = 0;
2161	unsigned long radiusIndex;
2162	float pred_value = 0, pred_err;
2163	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2164	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2165	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
2166	for(i=2;i<dataLength;i++)
2167	{
2168	if(i%confparams_cpr->sampleDistance==0)
2169	{
2170	pred_value = 2*oriData[i-1] - oriData[i-2];
2171	//pred_value = oriData[i-1];
2172	pred_err = fabs(pred_value - oriData[i]);
2173	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2174	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2175	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2176	intervals[radiusIndex]++;
2177	}
2178	}
2179	//compute the appropriate number
2180	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2181	size_t sum = 0;
2182	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2183	{
2184	sum += intervals[i];
2185	if(sum>targetCount)
2186	break;
2187	}
2188	if(i>=confparams_cpr->maxRangeRadius)
2189	i = confparams_cpr->maxRangeRadius-1;
2190
2191	unsigned int accIntervals = 2*(i+1);
2192	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2193
2194	if(powerOf2<32)
2195	powerOf2 = 32;
2196
2197	free(intervals);
2198	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
2199	return powerOf2;
2200	}
2201
2202	unsigned int optimize_intervals_float_2D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
2203	{
2204	size_t R1 = e1 - s1 + 1;
2205	size_t R2 = e2 - s2 + 1;
2206
2207	size_t i,j, index;
2208	unsigned long radiusIndex;
2209	float pred_value = 0, pred_err;
2210	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2211	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2212	size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance;
2213	for(i=s1+1;i<=e1;i++)
2214	{
2215	for(j=s2+1;j<=e2;j++)
2216	{
2217	if((i+j)%confparams_cpr->sampleDistance==0)
2218	{
2219	index = i*r2+j;
2220	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
2221	pred_err = fabs(pred_value - oriData[index]);
2222	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2223	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2224	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2225	intervals[radiusIndex]++;
2226	}
2227	}
2228	}
2229	//compute the appropriate number
2230	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2231	size_t sum = 0;
2232	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2233	{
2234	sum += intervals[i];
2235	if(sum>targetCount)
2236	break;
2237	}
2238	if(i>=confparams_cpr->maxRangeRadius)
2239	i = confparams_cpr->maxRangeRadius-1;
2240	unsigned int accIntervals = 2*(i+1);
2241	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2242
2243	if(powerOf2<32)
2244	powerOf2 = 32;
2245
2246	free(intervals);
2247	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
2248	return powerOf2;
2249	}
2250
2251	unsigned int optimize_intervals_float_3D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
2252	{
2253	size_t R1 = e1 - s1 + 1;
2254	size_t R2 = e2 - s2 + 1;
2255	size_t R3 = e3 - s3 + 1;
2256
2257	size_t r23 = r2*r3;
2258
2259	size_t i,j,k, index;
2260	unsigned long radiusIndex;
2261	float pred_value = 0, pred_err;
2262	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2263	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2264	size_t totalSampleSize = R1R2R3/confparams_cpr->sampleDistance;
2265	for(i=s1+1;i<=e1;i++)
2266	{
2267	for(j=s2+1;j<=e2;j++)
2268	{
2269	for(k=s3+1;k<=e3;k++)
2270	{
2271	if((i+j+k)%confparams_cpr->sampleDistance==0)
2272	{
2273	index = ir23+jr3+k;
2274	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
2275	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
2276	pred_err = fabs(pred_value - oriData[index]);
2277	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2278	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2279	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2280	intervals[radiusIndex]++;
2281	}
2282	}
2283	}
2284	}
2285	//compute the appropriate number
2286	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2287	size_t sum = 0;
2288	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2289	{
2290	sum += intervals[i];
2291	if(sum>targetCount)
2292	break;
2293	}
2294	if(i>=confparams_cpr->maxRangeRadius)
2295	i = confparams_cpr->maxRangeRadius-1;
2296	unsigned int accIntervals = 2*(i+1);
2297	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2298
2299	if(powerOf2<32)
2300	powerOf2 = 32;
2301
2302	free(intervals);
2303	return powerOf2;
2304	}
2305
2306	unsigned int optimize_intervals_float_4D_subblock(float *oriData, double realPrecision,
2307	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
2308	{
2309	size_t R1 = e1 - s1 + 1;
2310	size_t R2 = e2 - s2 + 1;
2311	size_t R3 = e3 - s3 + 1;
2312	size_t R4 = e4 - s4 + 1;
2313
2314	size_t r34 = r3*r4;
2315	size_t r234 = r2r3r4;
2316
2317	size_t i,j,k,l, index;
2318	unsigned long radiusIndex;
2319	float pred_value = 0, pred_err;
2320	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
2321	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
2322	size_t totalSampleSize = R1R2R3*R4/confparams_cpr->sampleDistance;
2323	for(i=s1+1;i<=e1;i++)
2324	{
2325	for(j=s2+1;j<=e2;j++)
2326	{
2327	for(k=s3+1;k<=e3;k++)
2328	{
2329	for (l=s4+1;l<=e4;l++)
2330	{
2331	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
2332	{
2333	index = ir234+jr34+k*r4+l;
2334	pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34]
2335	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
2336	pred_err = fabs(pred_value - oriData[index]);
2337	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
2338	if(radiusIndex>=confparams_cpr->maxRangeRadius)
2339	radiusIndex = confparams_cpr->maxRangeRadius - 1;
2340	intervals[radiusIndex]++;
2341	}
2342	}
2343	}
2344	}
2345	}
2346	//compute the appropriate number
2347	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
2348	size_t sum = 0;
2349	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
2350	{
2351	sum += intervals[i];
2352	if(sum>targetCount)
2353	break;
2354	}
2355	if(i>=confparams_cpr->maxRangeRadius)
2356	i = confparams_cpr->maxRangeRadius-1;
2357
2358	unsigned int accIntervals = 2*(i+1);
2359	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
2360
2361	if(powerOf2<32)
2362	powerOf2 = 32;
2363
2364	free(intervals);
2365	return powerOf2;
2366	}
2367
2368	TightDataPointStorageF* SZ_compress_float_1D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
2369	size_t r1, size_t s1, size_t e1)
2370	{
2371	size_t dataLength = e1 - s1 + 1;
2372	unsigned int quantization_intervals;
2373	if(exe_params->optQuantMode==1)
2374	quantization_intervals = optimize_intervals_float_1D_subblock(oriData, realPrecision, r1, s1, e1);
2375	else
2376	quantization_intervals = exe_params->intvCapacity;
2377	updateQuantizationInfo(quantization_intervals);
2378
2379	size_t i;
2380	int reqLength;
2381	float medianValue = medianValue_f;
2382	short radExpo = getExponent_float(valueRangeSize/2);
2383
2384	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
2385
2386	int* type = (int) malloc(dataLengthsizeof(int));
2387
2388	float* spaceFillingValue = oriData + s1;
2389
2390	DynamicIntArray *exactLeadNumArray;
2391	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2392
2393	DynamicByteArray *exactMidByteArray;
2394	new_DBA(&exactMidByteArray, DynArrayInitLen);
2395
2396	DynamicIntArray *resiBitArray;
2397	new_DIA(&resiBitArray, DynArrayInitLen);
2398
2399	type[0] = 0;
2400
2401	unsigned char preDataBytes[4];
2402	intToBytes_bigEndian(preDataBytes, 0);
2403
2404	int reqBytesLength = reqLength/8;
2405	int resiBitsLength = reqLength%8;
2406	float last3CmprsData[3] = {0};
2407
2408	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
2409	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2410
2411	//add the first data
2412	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2413	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2414	memcpy(preDataBytes,vce->curBytes,4);
2415	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2416	listAdd_float(last3CmprsData, vce->data);
2417
2418	//add the second data
2419	type[1] = 0;
2420	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2421	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2422	memcpy(preDataBytes,vce->curBytes,4);
2423	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2424	listAdd_float(last3CmprsData, vce->data);
2425
2426	int state;
2427	double checkRadius;
2428	float curData;
2429	float pred;
2430	float predAbsErr;
2431	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
2432	double interval = 2*realPrecision;
2433
2434	for(i=2;i<dataLength;i++)
2435	{
2436	curData = spaceFillingValue[i];
2437	pred = 2*last3CmprsData[0] - last3CmprsData[1];
2438	predAbsErr = fabs(curData - pred);
2439	if(predAbsErr<=checkRadius)
2440	{
2441	state = (predAbsErr/realPrecision+1)/2;
2442	if(curData>=pred)
2443	{
2444	type[i] = exe_params->intvRadius+state;
2445	pred = pred + state*interval;
2446	}
2447	else
2448	{
2449	type[i] = exe_params->intvRadius-state;
2450	pred = pred - state*interval;
2451	}
2452
2453	listAdd_float(last3CmprsData, pred);
2454	continue;
2455	}
2456
2457	//unpredictable data processing
2458	type[i] = 0;
2459	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2460	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2461	memcpy(preDataBytes,vce->curBytes,4);
2462	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2463
2464	listAdd_float(last3CmprsData, vce->data);
2465	}
2466
2467	size_t exactDataNum = exactLeadNumArray->size;
2468
2469	TightDataPointStorageF* tdps;
2470
2471	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
2472	type, exactMidByteArray->array, exactMidByteArray->size,
2473	exactLeadNumArray->array,
2474	resiBitArray->array, resiBitArray->size,
2475	resiBitsLength,
2476	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2477
2478	//free memory
2479	free_DIA(exactLeadNumArray);
2480	free_DIA(resiBitArray);
2481	free(type);
2482	free(vce);
2483	free(lce);
2484	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2485
2486	return tdps;
2487	}
2488
2489	TightDataPointStorageF* SZ_compress_float_2D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
2490	size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
2491	{
2492	unsigned int quantization_intervals;
2493	if(exe_params->optQuantMode==1)
2494	{
2495	quantization_intervals = optimize_intervals_float_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2);
2496	updateQuantizationInfo(quantization_intervals);
2497	}
2498	else
2499	quantization_intervals = exe_params->intvCapacity;
2500
2501	size_t i,j;
2502	int reqLength;
2503	float pred1D, pred2D;
2504	float diff = 0.0;
2505	double itvNum = 0;
2506	float P0, P1;
2507
2508	size_t R1 = e1 - s1 + 1;
2509	size_t R2 = e2 - s2 + 1;
2510	size_t dataLength = R1*R2;
2511
2512	P0 = (float)malloc(R2sizeof(float));
2513	memset(P0, 0, R2*sizeof(float));
2514	P1 = (float)malloc(R2sizeof(float));
2515	memset(P1, 0, R2*sizeof(float));
2516
2517	float medianValue = medianValue_f;
2518	short radExpo = getExponent_float(valueRangeSize/2);
2519	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
2520
2521	int* type = (int) malloc(dataLengthsizeof(int));
2522
2523	float* spaceFillingValue = oriData; //
2524
2525	DynamicIntArray *exactLeadNumArray;
2526	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2527
2528	DynamicByteArray *exactMidByteArray;
2529	new_DBA(&exactMidByteArray, DynArrayInitLen);
2530
2531	DynamicIntArray *resiBitArray;
2532	new_DIA(&resiBitArray, DynArrayInitLen);
2533
2534	unsigned char preDataBytes[4];
2535	intToBytes_bigEndian(preDataBytes, 0);
2536
2537	int reqBytesLength = reqLength/8;
2538	int resiBitsLength = reqLength%8;
2539
2540	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
2541	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2542
2543	/* Process Row-s1 data s2*/
2544	size_t gIndex;
2545	size_t lIndex;
2546
2547	gIndex = s1*r2+s2;
2548	lIndex = 0;
2549
2550	type[lIndex] = 0;
2551	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2552	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2553	memcpy(preDataBytes,vce->curBytes,4);
2554	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2555	P1[0] = vce->data;
2556
2557	/* Process Row-s1 data s2+1*/
2558	gIndex = s1*r2+(s2+1);
2559	lIndex = 1;
2560
2561	pred1D = P1[0];
2562	diff = spaceFillingValue[gIndex] - pred1D;
2563
2564	itvNum = fabs(diff)/realPrecision + 1;
2565
2566	if (itvNum < exe_params->intvCapacity)
2567	{
2568	if (diff < 0) itvNum = -itvNum;
2569	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2570	P1[1] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2571	}
2572	else
2573	{
2574	type[lIndex] = 0;
2575	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2576	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2577	memcpy(preDataBytes,vce->curBytes,4);
2578	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2579	P1[1] = vce->data;
2580	}
2581
2582	/* Process Row-s1 data s2+2 --> data e2 */
2583	for (j = 2; j < R2; j++)
2584	{
2585	gIndex = s1*r2+(s2+j);
2586	lIndex = j;
2587
2588	pred1D = 2*P1[j-1] - P1[j-2];
2589	diff = spaceFillingValue[gIndex] - pred1D;
2590
2591	itvNum = fabs(diff)/realPrecision + 1;
2592
2593	if (itvNum < exe_params->intvCapacity)
2594	{
2595	if (diff < 0) itvNum = -itvNum;
2596	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2597	P1[j] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2598	}
2599	else
2600	{
2601	type[lIndex] = 0;
2602	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2603	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2604	memcpy(preDataBytes,vce->curBytes,4);
2605	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2606	P1[j] = vce->data;
2607	}
2608	}
2609
2610	/* Process Row-s1+1 --> Row-e1 */
2611	for (i = 1; i < R1; i++)
2612	{
2613	/* Process row-s1+i data s2 */
2614	gIndex = (s1+i)*r2+s2;
2615	lIndex = i*R2;
2616
2617	pred1D = P1[0];
2618	diff = spaceFillingValue[gIndex] - pred1D;
2619
2620	itvNum = fabs(diff)/realPrecision + 1;
2621
2622	if (itvNum < exe_params->intvCapacity)
2623	{
2624	if (diff < 0) itvNum = -itvNum;
2625	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2626	P0[0] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2627	}
2628	else
2629	{
2630	type[lIndex] = 0;
2631	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2632	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2633	memcpy(preDataBytes,vce->curBytes,4);
2634	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2635	P0[0] = vce->data;
2636	}
2637
2638	/* Process row-s1+i data s2+1 --> e2 */
2639	for (j = 1; j < R2; j++)
2640	{
2641	gIndex = (s1+i)*r2+(s2+j);
2642	lIndex = i*R2+j;
2643
2644	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
2645
2646	pred2D = P0[j-1] + P1[j] - P1[j-1];
2647
2648	diff = spaceFillingValue[gIndex] - pred2D;
2649
2650	itvNum = fabs(diff)/realPrecision + 1;
2651
2652	if (itvNum < exe_params->intvCapacity)
2653	{
2654	if (diff < 0) itvNum = -itvNum;
2655	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2656	P0[j] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2657	}
2658	else
2659	{
2660	type[lIndex] = 0;
2661	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2662	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2663	memcpy(preDataBytes,vce->curBytes,4);
2664	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2665	P0[j] = vce->data;
2666	}
2667	}
2668
2669	float *Pt;
2670	Pt = P1;
2671	P1 = P0;
2672	P0 = Pt;
2673	}
2674
2675	free(P0);
2676	free(P1);
2677	size_t exactDataNum = exactLeadNumArray->size;
2678
2679	TightDataPointStorageF* tdps;
2680
2681	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
2682	type, exactMidByteArray->array, exactMidByteArray->size,
2683	exactLeadNumArray->array,
2684	resiBitArray->array, resiBitArray->size,
2685	resiBitsLength,
2686	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
2687
2688	//free memory
2689	free_DIA(exactLeadNumArray);
2690	free_DIA(resiBitArray);
2691	free(type);
2692	free(vce);
2693	free(lce);
2694	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
2695
2696	return tdps;
2697	}
2698
2699	TightDataPointStorageF* SZ_compress_float_3D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
2700	size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
2701	{
2702	unsigned int quantization_intervals;
2703	if(exe_params->optQuantMode==1)
2704	{
2705	quantization_intervals = optimize_intervals_float_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3);
2706	updateQuantizationInfo(quantization_intervals);
2707	}
2708	else
2709	quantization_intervals = exe_params->intvCapacity;
2710
2711	size_t i,j,k;
2712	int reqLength;
2713	float pred1D, pred2D, pred3D;
2714	float diff = 0.0;
2715	double itvNum = 0;
2716	float P0, P1;
2717
2718	size_t R1 = e1 - s1 + 1;
2719	size_t R2 = e2 - s2 + 1;
2720	size_t R3 = e3 - s3 + 1;
2721	size_t dataLength = R1R2R3;
2722
2723	size_t r23 = r2*r3;
2724	size_t R23 = R2*R3;
2725
2726	P0 = (float)malloc(R23sizeof(float));
2727	P1 = (float)malloc(R23sizeof(float));
2728
2729	float medianValue = medianValue_f;
2730	short radExpo = getExponent_float(valueRangeSize/2);
2731	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
2732
2733	int* type = (int) malloc(dataLengthsizeof(int));
2734	//type[dataLength]=0;
2735
2736	float* spaceFillingValue = oriData; //
2737
2738	DynamicIntArray *exactLeadNumArray;
2739	new_DIA(&exactLeadNumArray, DynArrayInitLen);
2740
2741	DynamicByteArray *exactMidByteArray;
2742	new_DBA(&exactMidByteArray, DynArrayInitLen);
2743
2744	DynamicIntArray *resiBitArray;
2745	new_DIA(&resiBitArray, DynArrayInitLen);
2746
2747	unsigned char preDataBytes[4];
2748	intToBytes_bigEndian(preDataBytes, 0);
2749
2750	int reqBytesLength = reqLength/8;
2751	int resiBitsLength = reqLength%8;
2752
2753	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
2754	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
2755
2756
2757	/////////////////////////// Process layer-s1 ///////////////////////////
2758	/* Process Row-s2 data s3*/
2759	size_t gIndex; //global index
2760	size_t lIndex; //local index
2761	size_t index2D; //local 2D index
2762
2763	gIndex = s1r23+s2r3+s3;
2764	lIndex = 0;
2765	index2D = 0;
2766
2767	type[lIndex] = 0;
2768	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2769	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2770	memcpy(preDataBytes,vce->curBytes,4);
2771	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2772	P1[index2D] = vce->data;
2773
2774	/* Process Row-s2 data s3+1*/
2775	gIndex = s1r23+s2r3+s3+1;
2776	lIndex = 1;
2777	index2D = 1;
2778
2779	pred1D = P1[index2D-1];
2780	diff = spaceFillingValue[gIndex] - pred1D;
2781
2782	itvNum = fabs(diff)/realPrecision + 1;
2783
2784	if (itvNum < exe_params->intvCapacity)
2785	{
2786	if (diff < 0) itvNum = -itvNum;
2787	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2788	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2789	}
2790	else
2791	{
2792	type[lIndex] = 0;
2793	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2794	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2795	memcpy(preDataBytes,vce->curBytes,4);
2796	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2797	P1[index2D] = vce->data;
2798	}
2799
2800	/* Process Row-s2 data s3+2 --> data e3 */
2801	for (j = 2; j < R3; j++)
2802	{
2803	gIndex = s1r23+s2r3+s3+j;
2804	lIndex = j;
2805	index2D = j;
2806
2807	pred1D = 2*P1[index2D-1] - P1[index2D-2];
2808	diff = spaceFillingValue[gIndex] - pred1D;
2809
2810	itvNum = fabs(diff)/realPrecision + 1;
2811
2812	if (itvNum < exe_params->intvCapacity)
2813	{
2814	if (diff < 0) itvNum = -itvNum;
2815	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2816	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2817	}
2818	else
2819	{
2820	type[lIndex] = 0;
2821	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2822	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2823	memcpy(preDataBytes,vce->curBytes,4);
2824	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2825	P1[index2D] = vce->data;
2826	}
2827	}
2828
2829	/* Process Row-s2+1 --> Row-e2 */
2830	for (i = 1; i < R2; i++)
2831	{
2832	/* Process row-s2+i data s3 */
2833	gIndex = s1r23+(s2+i)r3+s3;
2834	lIndex = i*R3;
2835	index2D = i*R3;
2836
2837	pred1D = P1[index2D-R3];
2838	diff = spaceFillingValue[gIndex] - pred1D;
2839
2840	itvNum = fabs(diff)/realPrecision + 1;
2841
2842	if (itvNum < exe_params->intvCapacity)
2843	{
2844	if (diff < 0) itvNum = -itvNum;
2845	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2846	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2847	}
2848	else
2849	{
2850	type[lIndex] = 0;
2851	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2852	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2853	memcpy(preDataBytes,vce->curBytes,4);
2854	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2855	P1[index2D] = vce->data;
2856	}
2857
2858	/* Process row-s2+i data s3+1 --> data e3*/
2859	for (j = 1; j < R3; j++)
2860	{
2861	gIndex = s1r23+(s2+i)r3+s3+j;
2862	lIndex = i*R3+j;
2863	index2D = i*R3+j;
2864
2865	pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1];
2866	diff = spaceFillingValue[gIndex] - pred2D;
2867
2868	itvNum = fabs(diff)/realPrecision + 1;
2869
2870	if (itvNum < exe_params->intvCapacity)
2871	{
2872	if (diff < 0) itvNum = -itvNum;
2873	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2874	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2875	}
2876	else
2877	{
2878	type[lIndex] = 0;
2879	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2880	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2881	memcpy(preDataBytes,vce->curBytes,4);
2882	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2883	P1[index2D] = vce->data;
2884	}
2885	}
2886	}
2887
2888
2889	/////////////////////////// Process layer-s1+1 --> layer-e1 ///////////////////////////
2890
2891	for (k = 1; k < R1; k++)
2892	{
2893	/* Process Row-s2 data s3*/
2894	gIndex = (s1+k)r23+s2r3+s3;
2895	lIndex = k*R23;
2896	index2D = 0;
2897
2898	pred1D = P1[index2D];
2899	diff = spaceFillingValue[gIndex] - pred1D;
2900
2901	itvNum = fabs(diff)/realPrecision + 1;
2902
2903	if (itvNum < exe_params->intvCapacity)
2904	{
2905	if (diff < 0) itvNum = -itvNum;
2906	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2907	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2908	}
2909	else
2910	{
2911	type[lIndex] = 0;
2912	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2913	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2914	memcpy(preDataBytes,vce->curBytes,4);
2915	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2916	P0[index2D] = vce->data;
2917	}
2918
2919	/* Process Row-s2 data s3+1 --> data e3 */
2920	for (j = 1; j < R3; j++)
2921	{
2922	gIndex = (s1+k)r23+s2r3+s3+j;
2923	lIndex = k*R23+j;
2924	index2D = j;
2925
2926	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
2927	diff = spaceFillingValue[gIndex] - pred2D;
2928
2929	itvNum = fabs(diff)/realPrecision + 1;
2930
2931	if (itvNum < exe_params->intvCapacity)
2932	{
2933	if (diff < 0) itvNum = -itvNum;
2934	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2935	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2936	}
2937	else
2938	{
2939	type[lIndex] = 0;
2940	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2941	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2942	memcpy(preDataBytes,vce->curBytes,4);
2943	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2944	P0[index2D] = vce->data;
2945	}
2946	}
2947
2948	/* Process Row-s2+1 --> Row-e2 */
2949	for (i = 1; i < R2; i++)
2950	{
2951	/* Process Row-s2+i data s3 */
2952	gIndex = (s1+k)r23+(s2+i)r3+s3;
2953	lIndex = kR23+iR3;
2954	index2D = i*R3;
2955
2956	pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3];
2957	diff = spaceFillingValue[gIndex] - pred2D;
2958
2959	itvNum = fabs(diff)/realPrecision + 1;
2960
2961	if (itvNum < exe_params->intvCapacity)
2962	{
2963	if (diff < 0) itvNum = -itvNum;
2964	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2965	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2966	}
2967	else
2968	{
2969	type[lIndex] = 0;
2970	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
2971	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
2972	memcpy(preDataBytes,vce->curBytes,4);
2973	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
2974	P0[index2D] = vce->data;
2975	}
2976
2977	/* Process Row-s2+i data s3+1 --> data e3 */
2978	for (j = 1; j < R3; j++)
2979	{
2980	gIndex = (s1+k)r23+(s2+i)r3+s3+j;
2981	lIndex = kR23+iR3+j;
2982	index2D = i*R3+j;
2983
2984	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
2985
2986	pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1];
2987	diff = spaceFillingValue[gIndex] - pred3D;
2988
2989	itvNum = fabs(diff)/realPrecision + 1;
2990
2991	if (itvNum < exe_params->intvCapacity)
2992	{
2993	if (diff < 0) itvNum = -itvNum;
2994	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
2995	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
2996	}
2997	else
2998	{
2999	type[lIndex] = 0;
3000	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3001	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3002	memcpy(preDataBytes,vce->curBytes,4);
3003	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3004	P0[index2D] = vce->data;
3005	}
3006	}
3007	}
3008
3009	float *Pt;
3010	Pt = P1;
3011	P1 = P0;
3012	P0 = Pt;
3013	}
3014
3015	free(P0);
3016	free(P1);
3017	size_t exactDataNum = exactLeadNumArray->size;
3018
3019	TightDataPointStorageF* tdps;
3020
3021	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
3022	type, exactMidByteArray->array, exactMidByteArray->size,
3023	exactLeadNumArray->array,
3024	resiBitArray->array, resiBitArray->size,
3025	resiBitsLength,
3026	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
3027
3028	//free memory
3029	free_DIA(exactLeadNumArray);
3030	free_DIA(resiBitArray);
3031	free(type);
3032	free(vce);
3033	free(lce);
3034	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
3035
3036	return tdps;
3037	}
3038
3039	TightDataPointStorageF* SZ_compress_float_4D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
3040	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
3041	{
3042	unsigned int quantization_intervals;
3043	if(exe_params->optQuantMode==1)
3044	{
3045	quantization_intervals = optimize_intervals_float_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4);
3046	updateQuantizationInfo(quantization_intervals);
3047	}
3048	else
3049	quantization_intervals = exe_params->intvCapacity;
3050
3051	size_t i,j,k;
3052	int reqLength;
3053	float pred1D, pred2D, pred3D;
3054	float diff = 0.0;
3055	double itvNum = 0;
3056	float P0, P1;
3057
3058	size_t R1 = e1 - s1 + 1;
3059	size_t R2 = e2 - s2 + 1;
3060	size_t R3 = e3 - s3 + 1;
3061	size_t R4 = e4 - s4 + 1;
3062
3063	size_t dataLength = R1R2R3*R4;
3064
3065	size_t r34 = r3*r4;
3066	size_t r234 = r2r3r4;
3067	size_t R34 = R3*R4;
3068	size_t R234 = R2R3R4;
3069
3070	P0 = (float)malloc(R34sizeof(float));
3071	P1 = (float)malloc(R34sizeof(float));
3072
3073	float medianValue = medianValue_f;
3074	short radExpo = getExponent_float(valueRangeSize/2);
3075	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
3076
3077	int* type = (int) malloc(dataLengthsizeof(int));
3078
3079	float* spaceFillingValue = oriData; //
3080
3081	DynamicIntArray *exactLeadNumArray;
3082	new_DIA(&exactLeadNumArray, DynArrayInitLen);
3083
3084	DynamicByteArray *exactMidByteArray;
3085	new_DBA(&exactMidByteArray, DynArrayInitLen);
3086
3087	DynamicIntArray *resiBitArray;
3088	new_DIA(&resiBitArray, DynArrayInitLen);
3089
3090	unsigned char preDataBytes[4];
3091	intToBytes_bigEndian(preDataBytes, 0);
3092
3093	int reqBytesLength = reqLength/8;
3094	int resiBitsLength = reqLength%8;
3095
3096	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
3097	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
3098
3099
3100	size_t l;
3101	for (l = 0; l < R1; l++)
3102	{
3103
3104	/////////////////////////// Process layer-s2 ///////////////////////////
3105	/* Process Row-s3 data s4*/
3106	size_t gIndex; //global index
3107	size_t lIndex; //local index
3108	size_t index2D; //local 2D index
3109
3110	gIndex = (s1+l)r234+s2r34+s3*r4+s4;
3111	lIndex = l*R234;
3112	index2D = 0;
3113
3114	type[lIndex] = 0;
3115	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3116	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3117	memcpy(preDataBytes,vce->curBytes,4);
3118	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3119	P1[index2D] = vce->data;
3120
3121	/* Process Row-s3 data s4+1*/
3122	gIndex = (s1+l)r234+s2r34+s3*r4+s4+1;
3123	lIndex = l*R234+1;
3124	index2D = 1;
3125
3126	pred1D = P1[index2D-1];
3127	diff = spaceFillingValue[gIndex] - pred1D;
3128
3129	itvNum = fabs(diff)/realPrecision + 1;
3130
3131	if (itvNum < exe_params->intvCapacity)
3132	{
3133	if (diff < 0) itvNum = -itvNum;
3134	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3135	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3136	}
3137	else
3138	{
3139	type[lIndex] = 0;
3140	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3141	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3142	memcpy(preDataBytes,vce->curBytes,4);
3143	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3144	P1[index2D] = vce->data;
3145	}
3146
3147	/* Process Row-s3 data s4+2 --> data e4 */
3148	for (j = 2; j < R4; j++)
3149	{
3150	gIndex = (s1+l)r234+s2r34+s3*r4+s4+j;
3151	lIndex = l*R234+j;
3152	index2D = j;
3153
3154	pred1D = 2*P1[index2D-1] - P1[index2D-2];
3155	diff = spaceFillingValue[gIndex] - pred1D;
3156
3157	itvNum = fabs(diff)/realPrecision + 1;
3158
3159	if (itvNum < exe_params->intvCapacity)
3160	{
3161	if (diff < 0) itvNum = -itvNum;
3162	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3163	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3164	}
3165	else
3166	{
3167	type[lIndex] = 0;
3168	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3169	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3170	memcpy(preDataBytes,vce->curBytes,4);
3171	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3172	P1[index2D] = vce->data;
3173	}
3174	}
3175
3176	/* Process Row-s3+1 --> Row-e3 */
3177	for (i = 1; i < R3; i++)
3178	{
3179	/* Process row-s2+i data s3 */
3180	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4;
3181	lIndex = lR234+iR4;
3182	index2D = i*R4;
3183
3184	pred1D = P1[index2D-R4];
3185	diff = spaceFillingValue[gIndex] - pred1D;
3186
3187	itvNum = fabs(diff)/realPrecision + 1;
3188
3189	if (itvNum < exe_params->intvCapacity)
3190	{
3191	if (diff < 0) itvNum = -itvNum;
3192	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3193	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3194	}
3195	else
3196	{
3197	type[lIndex] = 0;
3198	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3199	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3200	memcpy(preDataBytes,vce->curBytes,4);
3201	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3202	P1[index2D] = vce->data;
3203	}
3204
3205	/* Process row-s3+i data s4+1 --> data e4*/
3206	for (j = 1; j < R4; j++)
3207	{
3208	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4+j;
3209	lIndex = lR234+iR4+j;
3210	index2D = i*R4+j;
3211
3212	pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1];
3213	diff = spaceFillingValue[gIndex] - pred2D;
3214
3215	itvNum = fabs(diff)/realPrecision + 1;
3216
3217	if (itvNum < exe_params->intvCapacity)
3218	{
3219	if (diff < 0) itvNum = -itvNum;
3220	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3221	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3222	}
3223	else
3224	{
3225	type[lIndex] = 0;
3226	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3227	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3228	memcpy(preDataBytes,vce->curBytes,4);
3229	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3230	P1[index2D] = vce->data;
3231	}
3232	}
3233	}
3234
3235
3236	/////////////////////////// Process layer-s2+1 --> layer-e2 ///////////////////////////
3237
3238	for (k = 1; k < R2; k++)
3239	{
3240	/* Process Row-s3 data s4*/
3241	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4;
3242	lIndex = lR234+kR34;
3243	index2D = 0;
3244
3245	pred1D = P1[index2D];
3246	diff = spaceFillingValue[gIndex] - pred1D;
3247
3248	itvNum = fabs(diff)/realPrecision + 1;
3249
3250	if (itvNum < exe_params->intvCapacity)
3251	{
3252	if (diff < 0) itvNum = -itvNum;
3253	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3254	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3255	}
3256	else
3257	{
3258	type[lIndex] = 0;
3259	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3260	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3261	memcpy(preDataBytes,vce->curBytes,4);
3262	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3263	P0[index2D] = vce->data;
3264	}
3265
3266	/* Process Row-s3 data s4+1 --> data e4 */
3267	for (j = 1; j < R4; j++)
3268	{
3269	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4+j;
3270	lIndex = lR234+kR34+j;
3271	index2D = j;
3272
3273	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
3274	diff = spaceFillingValue[gIndex] - pred2D;
3275
3276	itvNum = fabs(diff)/realPrecision + 1;
3277
3278	if (itvNum < exe_params->intvCapacity)
3279	{
3280	if (diff < 0) itvNum = -itvNum;
3281	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3282	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3283	}
3284	else
3285	{
3286	type[lIndex] = 0;
3287	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3288	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3289	memcpy(preDataBytes,vce->curBytes,4);
3290	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3291	P0[index2D] = vce->data;
3292	}
3293	}
3294
3295	/* Process Row-s3+1 --> Row-e3 */
3296	for (i = 1; i < R3; i++)
3297	{
3298	/* Process Row-s3+i data s4 */
3299	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4;
3300	lIndex = lR234+kR34+i*R4;
3301	index2D = i*R4;
3302
3303	pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4];
3304	diff = spaceFillingValue[gIndex] - pred2D;
3305
3306	itvNum = fabs(diff)/realPrecision + 1;
3307
3308	if (itvNum < exe_params->intvCapacity)
3309	{
3310	if (diff < 0) itvNum = -itvNum;
3311	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3312	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3313	}
3314	else
3315	{
3316	type[lIndex] = 0;
3317	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3318	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3319	memcpy(preDataBytes,vce->curBytes,4);
3320	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3321	P0[index2D] = vce->data;
3322	}
3323
3324	/* Process Row-s3+i data s4+1 --> data e4 */
3325	for (j = 1; j < R4; j++)
3326	{
3327	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4+j;
3328	lIndex = lR234+kR34+i*R4+j;
3329	index2D = i*R4+j;
3330
3331	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
3332
3333	pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1];
3334	diff = spaceFillingValue[gIndex] - pred3D;
3335
3336	itvNum = fabs(diff)/realPrecision + 1;
3337
3338	if (itvNum < exe_params->intvCapacity)
3339	{
3340	if (diff < 0) itvNum = -itvNum;
3341	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
3342	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
3343	}
3344	else
3345	{
3346	type[lIndex] = 0;
3347	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
3348	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
3349	memcpy(preDataBytes,vce->curBytes,4);
3350	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
3351	P0[index2D] = vce->data;
3352	}
3353	}
3354	}
3355
3356	float *Pt;
3357	Pt = P1;
3358	P1 = P0;
3359	P0 = Pt;
3360	}
3361
3362	}
3363
3364	free(P0);
3365	free(P1);
3366	size_t exactDataNum = exactLeadNumArray->size;
3367
3368	TightDataPointStorageF* tdps;
3369
3370	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
3371	type, exactMidByteArray->array, exactMidByteArray->size,
3372	exactLeadNumArray->array,
3373	resiBitArray->array, resiBitArray->size,
3374	resiBitsLength,
3375	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
3376
3377	//free memory
3378	free_DIA(exactLeadNumArray);
3379	free_DIA(resiBitArray);
3380	free(type);
3381	free(vce);
3382	free(lce);
3383	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
3384
3385	return tdps;
3386	}
3387
3388	unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
3389	{
3390	size_t i;
3391	size_t radiusIndex;
3392	size_t r23=r2*r3;
3393	float pred_value = 0, pred_err;
3394	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3395	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3396	size_t totalSampleSize = 0;
3397
3398	size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
3399	size_t offset_count_2;
3400	float * data_pos = oriData + r23 + r3 + offset_count;
3401	size_t n1_count = 1, n2_count = 1; // count i,j sum
3402	size_t len = r1 * r2 * r3;
3403	while(data_pos - oriData < len){
3404	totalSampleSize++;
3405	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
3406	pred_err = fabs(pred_value - *data_pos);
3407	radiusIndex = (pred_err/realPrecision+1)/2;
3408	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3409	{
3410	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3411	}
3412	intervals[radiusIndex]++;
3413	offset_count += confparams_cpr->sampleDistance;
3414	if(offset_count >= r3){
3415	n2_count ++;
3416	if(n2_count == r2){
3417	n1_count ++;
3418	n2_count = 1;
3419	data_pos += r3;
3420	}
3421	offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance;
3422	data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3423	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3424	if(offset_count == 0) offset_count ++;
3425	}
3426	else data_pos += confparams_cpr->sampleDistance;
3427	}
3428	//compute the appropriate number
3429	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3430	size_t sum = 0;
3431	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3432	{
3433	sum += intervals[i];
3434	if(sum>targetCount)
3435	break;
3436	}
3437	if(i>=confparams_cpr->maxRangeRadius)
3438	i = confparams_cpr->maxRangeRadius-1;
3439	unsigned int accIntervals = 2*(i+1);
3440	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3441
3442	if(powerOf2<32)
3443	powerOf2 = 32;
3444	free(intervals);
3445	return powerOf2;
3446	}
3447
3448	size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){
3449
3450	size_t dim0_offset = dim_1 * dim_2;
3451	size_t dim1_offset = dim_2;
3452
3453	// data_pos = block_ori_data;
3454	// for(size_t i=0; i<block_dim_0; i++){
3455	// for(size_t j=0; j<block_dim_1; j++){
3456	// for(size_t k=0; k<block_dim_2; k++){
3457	// sum += *data_pos;
3458	// data_pos ++;
3459	// }
3460	// data_pos += dim1_offset - block_dim_2;
3461	// }
3462	// data_pos += dim0_offset - block_dim_1 * dim1_offset;
3463	// }
3464	// size_t num_elements = block_dim_0 * block_dim_1 * block_dim_2;
3465	// if(num_elements > 0) mean[0] = sum / num_elements;
3466	// else mean[0] = 0.0;
3467	mean[0] = block_ori_data[0];
3468
3469	size_t unpredictable_count = 0;
3470	size_t r1, r2, r3;
3471	r1 = block_dim_0;
3472	r2 = block_dim_1;
3473	r3 = block_dim_2;
3474
3475	float * cur_data_pos = block_ori_data;
3476	float curData;
3477	float pred1D, pred2D, pred3D;
3478	double itvNum;
3479	double diff;
3480	size_t i, j, k;
3481	size_t r23 = r2*r3;
3482	// Process Row-0 data 0
3483	pred1D = mean[0];
3484	curData = *cur_data_pos;
3485	diff = curData - pred1D;
3486	itvNum = fabs(diff)/realPrecision + 1;
3487	if (itvNum < exe_params->intvCapacity){
3488	if (diff < 0) itvNum = -itvNum;
3489	type[0] = (int) (itvNum/2) + exe_params->intvRadius;
3490	P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
3491	//ganrantee comporession error against the case of machine-epsilon
3492	if(fabs(curData-P1[0])>realPrecision){
3493	type[0] = 0;
3494	P1[0] = curData;
3495	unpredictable_data[unpredictable_count ++] = curData;
3496	}
3497	}
3498	else{
3499	type[0] = 0;
3500	P1[0] = curData;
3501	unpredictable_data[unpredictable_count ++] = curData;
3502	}
3503
3504	/* Process Row-0 data 1*/
3505	pred1D = P1[0];
3506	curData = cur_data_pos[1];
3507	diff = curData - pred1D;
3508	itvNum = fabs(diff)/realPrecision + 1;
3509	if (itvNum < exe_params->intvCapacity){
3510	if (diff < 0) itvNum = -itvNum;
3511	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
3512	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
3513	//ganrantee comporession error against the case of machine-epsilon
3514	if(fabs(curData-P1[1])>realPrecision){
3515	type[1] = 0;
3516	P1[1] = curData;
3517	unpredictable_data[unpredictable_count ++] = curData;
3518	}
3519	}
3520	else{
3521	type[1] = 0;
3522	P1[1] = curData;
3523	unpredictable_data[unpredictable_count ++] = curData;
3524	}
3525	/* Process Row-0 data 2 --> data r3-1 */
3526	for (j = 2; j < r3; j++){
3527	pred1D = 2*P1[j-1] - P1[j-2];
3528	curData = cur_data_pos[j];
3529	diff = curData - pred1D;
3530	itvNum = fabs(diff)/realPrecision + 1;
3531	if (itvNum < exe_params->intvCapacity){
3532	if (diff < 0) itvNum = -itvNum;
3533	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
3534	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
3535	//ganrantee comporession error against the case of machine-epsilon
3536	if(fabs(curData-P1[j])>realPrecision){
3537	type[j] = 0;
3538	P1[j] = curData;
3539	unpredictable_data[unpredictable_count ++] = curData;
3540	}
3541	}
3542	else{
3543	type[j] = 0;
3544	P1[j] = curData;
3545	unpredictable_data[unpredictable_count ++] = curData;
3546	}
3547	}
3548	cur_data_pos += dim1_offset;
3549
3550	/* Process Row-1 --> Row-r2-1 */
3551	size_t index;
3552	for (i = 1; i < r2; i++)
3553	{
3554	/* Process row-i data 0 */
3555	index = i*r3;
3556	pred1D = P1[index-r3];
3557	curData = *cur_data_pos;
3558	diff = curData - pred1D;
3559
3560	itvNum = fabs(diff)/realPrecision + 1;
3561
3562	if (itvNum < exe_params->intvCapacity)
3563	{
3564	if (diff < 0) itvNum = -itvNum;
3565	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3566	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3567
3568	//ganrantee comporession error against the case of machine-epsilon
3569	if(fabs(curData-P1[index])>realPrecision)
3570	{
3571	type[index] = 0;
3572	P1[index] = curData;
3573	unpredictable_data[unpredictable_count ++] = curData;
3574	}
3575	}
3576	else
3577	{
3578	type[index] = 0;
3579	P1[index] = curData;
3580	unpredictable_data[unpredictable_count ++] = curData;
3581	}
3582
3583	/* Process row-i data 1 --> data r3-1*/
3584	for (j = 1; j < r3; j++)
3585	{
3586	index = i*r3+j;
3587	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
3588
3589	curData = cur_data_pos[j];
3590	diff = curData - pred2D;
3591
3592	itvNum = fabs(diff)/realPrecision + 1;
3593
3594	if (itvNum < exe_params->intvCapacity)
3595	{
3596	if (diff < 0) itvNum = -itvNum;
3597	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3598	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3599
3600	//ganrantee comporession error against the case of machine-epsilon
3601	if(fabs(curData-P1[index])>realPrecision)
3602	{
3603	type[index] = 0;
3604	P1[index] = curData;
3605	unpredictable_data[unpredictable_count ++] = curData;
3606	}
3607	}
3608	else
3609	{
3610	type[index] = 0;
3611	P1[index] = curData;
3612	unpredictable_data[unpredictable_count ++] = curData;
3613	}
3614	}
3615	cur_data_pos += dim1_offset;
3616	}
3617	cur_data_pos += dim0_offset - r2 * dim1_offset;
3618
3619	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
3620
3621	for (k = 1; k < r1; k++)
3622	{
3623	/* Process Row-0 data 0*/
3624	index = k*r23;
3625	pred1D = P1[0];
3626	curData = *cur_data_pos;
3627	diff = curData - pred1D;
3628	itvNum = fabs(diff)/realPrecision + 1;
3629	if (itvNum < exe_params->intvCapacity)
3630	{
3631	if (diff < 0) itvNum = -itvNum;
3632	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3633	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3634	//ganrantee comporession error against the case of machine-epsilon
3635	if(fabs(curData-P0[0])>realPrecision)
3636	{
3637	type[index] = 0;
3638	P0[0] = curData;
3639	unpredictable_data[unpredictable_count ++] = curData;
3640	}
3641	}
3642	else
3643	{
3644	type[index] = 0;
3645	P0[0] = curData;
3646	unpredictable_data[unpredictable_count ++] = curData;
3647	}
3648	/* Process Row-0 data 1 --> data r3-1 */
3649	for (j = 1; j < r3; j++)
3650	{
3651	//index = kr2r3+j;
3652	index ++;
3653	pred2D = P0[j-1] + P1[j] - P1[j-1];
3654	curData = cur_data_pos[j];
3655	diff = curData - pred2D;
3656	itvNum = fabs(diff)/realPrecision + 1;
3657	if (itvNum < exe_params->intvCapacity)
3658	{
3659	if (diff < 0) itvNum = -itvNum;
3660	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3661	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3662	//ganrantee comporession error against the case of machine-epsilon
3663	if(fabs(curData-P0[j])>realPrecision)
3664	{
3665	type[index] = 0;
3666	P0[j] = curData;
3667	unpredictable_data[unpredictable_count ++] = curData;
3668	}
3669	}
3670	else
3671	{
3672	type[index] = 0;
3673	P0[j] = curData;
3674	unpredictable_data[unpredictable_count ++] = curData;
3675	}
3676	}
3677
3678	cur_data_pos += dim1_offset;
3679	/* Process Row-1 --> Row-r2-1 */
3680	size_t index2D;
3681	for (i = 1; i < r2; i++)
3682	{
3683	/* Process Row-i data 0 */
3684	index = kr23 + ir3;
3685	index2D = i*r3;
3686	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
3687	curData = *cur_data_pos;
3688	diff = curData - pred2D;
3689
3690	itvNum = fabs(diff)/realPrecision + 1;
3691
3692	if (itvNum < exe_params->intvCapacity)
3693	{
3694	if (diff < 0) itvNum = -itvNum;
3695	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3696	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3697	//ganrantee comporession error against the case of machine-epsilon
3698	if(fabs(curData-P0[index2D])>realPrecision)
3699	{
3700	type[index] = 0;
3701	P0[index2D] = curData;
3702	unpredictable_data[unpredictable_count ++] = curData;
3703	}
3704	}
3705	else
3706	{
3707	type[index] = 0;
3708	P0[index2D] = curData;
3709	unpredictable_data[unpredictable_count ++] = curData;
3710	}
3711
3712	/* Process Row-i data 1 --> data r3-1 */
3713	for (j = 1; j < r3; j++)
3714	{
3715	//index = kr2r3 + i*r3 + j;
3716	index ++;
3717	index2D = i*r3 + j;
3718	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
3719	curData = cur_data_pos[j];
3720	diff = curData - pred3D;
3721
3722	itvNum = fabs(diff)/realPrecision + 1;
3723
3724	if (itvNum < exe_params->intvCapacity)
3725	{
3726	if (diff < 0) itvNum = -itvNum;
3727	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3728	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
3729
3730	//ganrantee comporession error against the case of machine-epsilon
3731	if(fabs(curData-P0[index2D])>realPrecision)
3732	{
3733	type[index] = 0;
3734	P0[index2D] = curData;
3735	unpredictable_data[unpredictable_count ++] = curData;
3736	}
3737	}
3738	else
3739	{
3740	type[index] = 0;
3741	P0[index2D] = curData;
3742	unpredictable_data[unpredictable_count ++] = curData;
3743	}
3744	}
3745	cur_data_pos += dim1_offset;
3746	}
3747	cur_data_pos += dim0_offset - r2 * dim1_offset;
3748	float *Pt;
3749	Pt = P1;
3750	P1 = P0;
3751	P0 = Pt;
3752	}
3753
3754	return unpredictable_count;
3755	}
3756
3757	unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r2, double realPrecision)
3758	{
3759	size_t i;
3760	size_t radiusIndex;
3761	float pred_value = 0, pred_err;
3762	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3763	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3764	size_t totalSampleSize = 0;
3765
3766	size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
3767	size_t offset_count_2;
3768	float * data_pos = oriData + r2 + offset_count;
3769	size_t n1_count = 1; // count i sum
3770	size_t len = r1 * r2;
3771	while(data_pos - oriData < len){
3772	totalSampleSize++;
3773	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
3774	pred_err = fabs(pred_value - *data_pos);
3775	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3776	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3777	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3778	intervals[radiusIndex]++;
3779
3780	offset_count += confparams_cpr->sampleDistance;
3781	if(offset_count >= r2){
3782	n1_count ++;
3783	offset_count_2 = n1_count % confparams_cpr->sampleDistance;
3784	data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
3785	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
3786	if(offset_count == 0) offset_count ++;
3787	}
3788	else data_pos += confparams_cpr->sampleDistance;
3789	}
3790
3791	//compute the appropriate number
3792	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3793	size_t sum = 0;
3794	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3795	{
3796	sum += intervals[i];
3797	if(sum>targetCount)
3798	break;
3799	}
3800	if(i>=confparams_cpr->maxRangeRadius)
3801	i = confparams_cpr->maxRangeRadius-1;
3802	unsigned int accIntervals = 2*(i+1);
3803	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3804
3805	if(powerOf2<32)
3806	powerOf2 = 32;
3807
3808	free(intervals);
3809	return powerOf2;
3810	}
3811
3812	unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision)
3813	{
3814	size_t i = 0, radiusIndex;
3815	float pred_value = 0, pred_err;
3816	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
3817	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
3818	size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance;
3819
3820	float * data_pos = oriData + 2;
3821	while(data_pos - oriData < dataLength){
3822	totalSampleSize++;
3823	pred_value = data_pos[-1];
3824	pred_err = fabs(pred_value - *data_pos);
3825	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
3826	if(radiusIndex>=confparams_cpr->maxRangeRadius)
3827	radiusIndex = confparams_cpr->maxRangeRadius - 1;
3828	intervals[radiusIndex]++;
3829
3830	data_pos += confparams_cpr->sampleDistance;
3831	}
3832	//compute the appropriate number
3833	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
3834	size_t sum = 0;
3835	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
3836	{
3837	sum += intervals[i];
3838	if(sum>targetCount)
3839	break;
3840	}
3841	if(i>=confparams_cpr->maxRangeRadius)
3842	i = confparams_cpr->maxRangeRadius-1;
3843
3844	unsigned int accIntervals = 2*(i+1);
3845	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
3846
3847	if(powerOf2<32)
3848	powerOf2 = 32;
3849
3850	free(intervals);
3851	return powerOf2;
3852	}
3853
3854	size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data){
3855
3856	mean[0] = block_ori_data[0];
3857	unsigned short unpredictable_count = 0;
3858
3859	float curData;
3860	double itvNum;
3861	double diff;
3862	float last_over_thres = mean[0];
3863	float pred1D;
3864	size_t type_index = 0;
3865	float * data_pos = block_ori_data;
3866	for(size_t i=0; i<block_dim_0; i++){
3867	curData = *data_pos;
3868
3869	pred1D = last_over_thres;
3870	diff = curData - pred1D;
3871	itvNum = fabs(diff)/realPrecision + 1;
3872	if (itvNum < exe_params->intvCapacity){
3873	if (diff < 0) itvNum = -itvNum;
3874	type[type_index] = (int) (itvNum/2) + exe_params->intvRadius;
3875	last_over_thres = pred1D + 2 * (type[type_index] - exe_params->intvRadius) * realPrecision;
3876	if(fabs(curData-last_over_thres)>realPrecision){
3877	type[type_index] = 0;
3878	last_over_thres = curData;
3879	unpredictable_data[unpredictable_count ++] = curData;
3880	}
3881
3882	}
3883	else{
3884	type[type_index] = 0;
3885	unpredictable_data[unpredictable_count ++] = curData;
3886	last_over_thres = curData;
3887	}
3888	type_index ++;
3889	data_pos ++;
3890	}
3891	return unpredictable_count;
3892
3893	}
3894
3895	size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t block_dim_0, size_t block_dim_1, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){
3896
3897	size_t dim0_offset = dim_1;
3898	mean[0] = block_ori_data[0];
3899
3900	size_t unpredictable_count = 0;
3901	size_t r1, r2;
3902	r1 = block_dim_0;
3903	r2 = block_dim_1;
3904
3905	float * cur_data_pos = block_ori_data;
3906	float curData;
3907	float pred1D, pred2D;
3908	double itvNum;
3909	double diff;
3910	size_t i, j;
3911	/* Process Row-0 data 0*/
3912	curData = *cur_data_pos;
3913	pred1D = mean[0];
3914	diff = curData - pred1D;
3915	itvNum = fabs(diff)/realPrecision + 1;
3916	if (itvNum < exe_params->intvCapacity){
3917	if (diff < 0) itvNum = -itvNum;
3918	type[0] = (int) (itvNum/2) + exe_params->intvRadius;
3919	P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
3920	//ganrantee comporession error against the case of machine-epsilon
3921	if(fabs(curData-P1[0])>realPrecision){
3922	type[0] = 0;
3923	P1[0] = curData;
3924	unpredictable_data[unpredictable_count ++] = curData;
3925	}
3926	}
3927	else{
3928	type[0] = 0;
3929	P1[0] = curData;
3930	unpredictable_data[unpredictable_count ++] = curData;
3931	}
3932
3933	/* Process Row-0 data 1*/
3934	curData = cur_data_pos[1];
3935	pred1D = P1[0];
3936	diff = curData - pred1D;
3937	itvNum = fabs(diff)/realPrecision + 1;
3938	if (itvNum < exe_params->intvCapacity){
3939	if (diff < 0) itvNum = -itvNum;
3940	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
3941	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
3942	//ganrantee comporession error against the case of machine-epsilon
3943	if(fabs(curData-P1[1])>realPrecision){
3944	type[1] = 0;
3945	P1[1] = curData;
3946	unpredictable_data[unpredictable_count ++] = curData;
3947	}
3948	}
3949	else{
3950	type[1] = 0;
3951	P1[1] = curData;
3952	unpredictable_data[unpredictable_count ++] = curData;
3953	}
3954
3955	/* Process Row-0 data 2 --> data r2-1 */
3956	for (j = 2; j < r2; j++)
3957	{
3958	curData = cur_data_pos[j];
3959	pred1D = 2*P1[j-1] - P1[j-2];
3960	diff = curData - pred1D;
3961	itvNum = fabs(diff)/realPrecision + 1;
3962	if (itvNum < exe_params->intvCapacity){
3963	if (diff < 0) itvNum = -itvNum;
3964	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
3965	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
3966	//ganrantee comporession error against the case of machine-epsilon
3967	if(fabs(curData-P1[j])>realPrecision){
3968	type[j] = 0;
3969	P1[j] = curData;
3970	unpredictable_data[unpredictable_count ++] = curData;
3971	}
3972	}
3973	else{
3974	type[j] = 0;
3975	P1[j] = curData;
3976	unpredictable_data[unpredictable_count ++] = curData;
3977	}
3978	}
3979	cur_data_pos += dim0_offset;
3980	/* Process Row-1 --> Row-r1-1 */
3981	size_t index;
3982	for (i = 1; i < r1; i++)
3983	{
3984	/* Process row-i data 0 */
3985	index = i*r2;
3986	curData = *cur_data_pos;
3987	pred1D = P1[0];
3988	diff = curData - pred1D;
3989	itvNum = fabs(diff)/realPrecision + 1;
3990	if (itvNum < exe_params->intvCapacity){
3991	if (diff < 0) itvNum = -itvNum;
3992	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
3993	P0[0] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
3994	//ganrantee comporession error against the case of machine-epsilon
3995	if(fabs(curData-P0[0])>realPrecision){
3996	type[index] = 0;
3997	P0[0] = curData;
3998	unpredictable_data[unpredictable_count ++] = curData;
3999	}
4000	}
4001	else{
4002	type[index] = 0;
4003	P0[0] = curData;
4004	unpredictable_data[unpredictable_count ++] = curData;
4005	}
4006
4007	/* Process row-i data 1 --> r2-1*/
4008	for (j = 1; j < r2; j++)
4009	{
4010	index = i*r2+j;
4011	curData = cur_data_pos[j];
4012	pred2D = P0[j-1] + P1[j] - P1[j-1];
4013	diff = curData - pred2D;
4014	itvNum = fabs(diff)/realPrecision + 1;
4015	if (itvNum < exe_params->intvCapacity)
4016	{
4017	if (diff < 0) itvNum = -itvNum;
4018	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
4019	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
4020
4021	//ganrantee comporession error against the case of machine-epsilon
4022	if(fabs(curData-P0[j])>realPrecision)
4023	{
4024	type[index] = 0;
4025	P0[j] = curData;
4026	unpredictable_data[unpredictable_count ++] = curData;
4027	}
4028	}
4029	else
4030	{
4031	type[index] = 0;
4032	P0[j] = curData;
4033	unpredictable_data[unpredictable_count ++] = curData;
4034	}
4035	}
4036	cur_data_pos += dim0_offset;
4037
4038	float *Pt;
4039	Pt = P1;
4040	P1 = P0;
4041	P0 = Pt;
4042	}
4043	return unpredictable_count;
4044	}
4045
4046	/The above code is for sz 1.4.13; the following code is for sz 2.0/
4047
4048	unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float oriData, size_t r1, size_t r2, double realPrecision, float dense_pos, float * max_freq, float * mean_freq)
4049	{
4050	float mean = 0.0;
4051	size_t len = r1 * r2;
4052	size_t mean_distance = (int) (sqrt(len));
4053
4054	float * data_pos = oriData;
4055	size_t mean_count = 0;
4056	while(data_pos - oriData < len){
4057	mean += *data_pos;
4058	mean_count ++;
4059	data_pos += mean_distance;
4060	}
4061	if(mean_count > 0) mean /= mean_count;
4062	size_t range = 8192;
4063	size_t radius = 4096;
4064	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
4065	memset(freq_intervals, 0, range*sizeof(size_t));
4066
4067	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
4068	int sampleDistance = confparams_cpr->sampleDistance;
4069	float predThreshold = confparams_cpr->predThreshold;
4070
4071	size_t i;
4072	size_t radiusIndex;
4073	float pred_value = 0, pred_err;
4074	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
4075	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
4076
4077	float mean_diff;
4078	ptrdiff_t freq_index;
4079	size_t freq_count = 0;
4080	size_t n1_count = 1;
4081	size_t offset_count = sampleDistance - 1;
4082	size_t offset_count_2 = 0;
4083	size_t sample_count = 0;
4084	data_pos = oriData + r2 + offset_count;
4085	while(data_pos - oriData < len){
4086	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
4087	pred_err = fabs(pred_value - *data_pos);
4088	if(pred_err < realPrecision) freq_count ++;
4089	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
4090	if(radiusIndex>=maxRangeRadius)
4091	radiusIndex = maxRangeRadius - 1;
4092	intervals[radiusIndex]++;
4093
4094	mean_diff = *data_pos - mean;
4095	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
4096	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
4097	if(freq_index <= 0){
4098	freq_intervals[0] ++;
4099	}
4100	else if(freq_index >= range){
4101	freq_intervals[range - 1] ++;
4102	}
4103	else{
4104	freq_intervals[freq_index] ++;
4105	}
4106	offset_count += sampleDistance;
4107	if(offset_count >= r2){
4108	n1_count ++;
4109	offset_count_2 = n1_count % sampleDistance;
4110	data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
4111	offset_count = (sampleDistance - offset_count_2);
4112	if(offset_count == 0) offset_count ++;
4113	}
4114	else data_pos += sampleDistance;
4115	sample_count ++;
4116	}
4117	max_freq = freq_count 1.0/ sample_count;
4118
4119	//compute the appropriate number
4120	size_t targetCount = sample_count*predThreshold;
4121	size_t sum = 0;
4122	for(i=0;i<maxRangeRadius;i++)
4123	{
4124	sum += intervals[i];
4125	if(sum>targetCount)
4126	break;
4127	}
4128	if(i>=maxRangeRadius)
4129	i = maxRangeRadius-1;
4130	unsigned int accIntervals = 2*(i+1);
4131	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
4132
4133	if(powerOf2<32)
4134	powerOf2 = 32;
4135
4136	// collect frequency
4137	size_t max_sum = 0;
4138	size_t max_index = 0;
4139	size_t tmp_sum;
4140	size_t * freq_pos = freq_intervals + 1;
4141	for(size_t i=1; i<range-2; i++){
4142	tmp_sum = freq_pos[0] + freq_pos[1];
4143	if(tmp_sum > max_sum){
4144	max_sum = tmp_sum;
4145	max_index = i;
4146	}
4147	freq_pos ++;
4148	}
4149	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
4150	mean_freq = max_sum 1.0 / sample_count;
4151
4152	free(freq_intervals);
4153	free(intervals);
4154	return powerOf2;
4155	}
4156
4157	// 2D: modified for higher performance
4158	#define MIN(a, b) a<b? a : b
4159	unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float oriData, size_t r1, size_t r2, double realPrecision, size_t comp_size){
4160
4161	unsigned int quantization_intervals;
4162	float sz_sample_correct_freq = -1;//0.5; //-1
4163	float dense_pos;
4164	float mean_flush_freq;
4165	unsigned char use_mean = 0;
4166
4167	if(exe_params->optQuantMode==1)
4168	{
4169	quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
4170	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
4171	updateQuantizationInfo(quantization_intervals);
4172	}
4173	else{
4174	quantization_intervals = exe_params->intvCapacity;
4175	}
4176
4177	// calculate block dims
4178	size_t num_x, num_y;
4179	size_t block_size = 16;
4180
4181	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
4182	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
4183
4184	size_t split_index_x, split_index_y;
4185	size_t early_blockcount_x, early_blockcount_y;
4186	size_t late_blockcount_x, late_blockcount_y;
4187	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
4188	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
4189
4190	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
4191	size_t num_blocks = num_x * num_y;
4192	size_t num_elements = r1 * r2;
4193
4194	size_t dim0_offset = r2;
4195
4196	int * result_type = (int ) malloc(num_elements sizeof(int));
4197	size_t unpred_data_max_size = max_num_block_elements;
4198	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
4199	size_t total_unpred = 0;
4200	size_t unpredictable_count;
4201	float * data_pos = oriData;
4202	int * type = result_type;
4203	size_t offset_x, offset_y;
4204	size_t current_blockcount_x, current_blockcount_y;
4205
4206	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
4207	float * reg_params_pos = reg_params;
4208	// move regression part out
4209	size_t params_offset_b = num_blocks;
4210	size_t params_offset_c = 2*num_blocks;
4211	for(size_t i=0; i<num_x; i++){
4212	for(size_t j=0; j<num_y; j++){
4213	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4214	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4215	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4216	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4217
4218	data_pos = oriData + offset_x * dim0_offset + offset_y;
4219
4220	{
4221	float * cur_data_pos = data_pos;
4222	float fx = 0.0;
4223	float fy = 0.0;
4224	float f = 0;
4225	double sum_x;
4226	float curData;
4227	for(size_t i=0; i<current_blockcount_x; i++){
4228	sum_x = 0;
4229	for(size_t j=0; j<current_blockcount_y; j++){
4230	curData = *cur_data_pos;
4231	sum_x += curData;
4232	fy += curData * j;
4233	cur_data_pos ++;
4234	}
4235	fx += sum_x * i;
4236	f += sum_x;
4237	cur_data_pos += dim0_offset - current_blockcount_y;
4238	}
4239	float coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
4240	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
4241	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
4242	reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
4243	}
4244
4245	reg_params_pos ++;
4246	}
4247	}
4248
4249	//Compress coefficient arrays
4250	double precision_a, precision_b, precision_c;
4251	float rel_param_err = 0.15/3;
4252	precision_a = rel_param_err * realPrecision / late_blockcount_x;
4253	precision_b = rel_param_err * realPrecision / late_blockcount_y;
4254	precision_c = rel_param_err * realPrecision;
4255
4256	float mean = 0;
4257	use_mean = 0;
4258	if(use_mean){
4259	// compute mean
4260	double sum = 0.0;
4261	size_t mean_count = 0;
4262	for(size_t i=0; i<num_elements; i++){
4263	if(fabs(oriData[i] - dense_pos) < realPrecision){
4264	sum += oriData[i];
4265	mean_count ++;
4266	}
4267	}
4268	if(mean_count > 0) mean = sum / mean_count;
4269	}
4270
4271
4272	double tmp_realPrecision = realPrecision;
4273
4274	// use two prediction buffers for higher performance
4275	float * unpredictable_data = result_unpredictable_data;
4276	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
4277	memset(indicator, 0, num_blocks * sizeof(unsigned char));
4278	size_t reg_count = 0;
4279	size_t strip_dim_0 = early_blockcount_x + 1;
4280	size_t strip_dim_1 = r2 + 1;
4281	size_t strip_dim0_offset = strip_dim_1;
4282	unsigned char * indicator_pos = indicator;
4283	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
4284	float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
4285	memset(prediction_buffer_1, 0, prediction_buffer_size);
4286	float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
4287	memset(prediction_buffer_2, 0, prediction_buffer_size);
4288	float * cur_pb_buf = prediction_buffer_1;
4289	float * next_pb_buf = prediction_buffer_2;
4290	float * cur_pb_buf_pos;
4291	float * next_pb_buf_pos;
4292	int intvCapacity = exe_params->intvCapacity;
4293	int intvRadius = exe_params->intvRadius;
4294	int use_reg = 0;
4295
4296	reg_params_pos = reg_params;
4297	// compress the regression coefficients on the fly
4298	float last_coeffcients[3] = {0.0};
4299	int coeff_intvCapacity_sz = 65536;
4300	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
4301	int * coeff_type[3];
4302	int * coeff_result_type = (int ) malloc(num_blocks3*sizeof(int));
4303	float * coeff_unpred_data[3];
4304	float * coeff_unpredictable_data = (float ) malloc(num_blocks3*sizeof(float));
4305	double precision[3];
4306	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
4307	for(int i=0; i<3; i++){
4308	coeff_type[i] = coeff_result_type + i * num_blocks;
4309	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
4310	}
4311	int coeff_index = 0;
4312	unsigned int coeff_unpredictable_count[3] = {0};
4313	if(use_mean){
4314	type = result_type;
4315	int intvCapacity_sz = intvCapacity - 2;
4316	for(size_t i=0; i<num_x; i++){
4317	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4318	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4319	data_pos = oriData + offset_x * dim0_offset;
4320
4321	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
4322	next_pb_buf_pos = next_pb_buf + 1;
4323	float * pb_pos = cur_pb_buf_pos;
4324	float * next_pb_pos = next_pb_buf_pos;
4325
4326	for(size_t j=0; j<num_y; j++){
4327	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4328	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4329
4330	/sampling: decide which predictor to use (regression or lorenzo)/
4331	{
4332	float * cur_data_pos;
4333	float curData;
4334	float pred_reg, pred_sz;
4335	float err_sz = 0.0, err_reg = 0.0;
4336	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
4337	// [1, 9] [3, 7] [7, 3] [9, 1]
4338	int count = 0;
4339	for(int i=1; i<current_blockcount_x; i+=2){
4340	cur_data_pos = data_pos + i * dim0_offset + i;
4341	curData = *cur_data_pos;
4342	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4343	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
4344
4345	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
4346
4347	err_reg += fabs(pred_reg - curData);
4348
4349	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
4350	curData = *cur_data_pos;
4351	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4352	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
4353	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
4354
4355	err_reg += fabs(pred_reg - curData);
4356
4357	count += 2;
4358	}
4359
4360	use_reg = (err_reg < err_sz);
4361	}
4362	if(use_reg)
4363	{
4364	{
4365	/predict coefficients in current block via previous reg_block/
4366	float cur_coeff;
4367	double diff, itvNum;
4368	for(int e=0; e<3; e++){
4369	cur_coeff = reg_params_pos[e*num_blocks];
4370	diff = cur_coeff - last_coeffcients[e];
4371	itvNum = fabs(diff)/precision[e] + 1;
4372	if (itvNum < coeff_intvCapacity_sz){
4373	if (diff < 0) itvNum = -itvNum;
4374	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4375	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4376	//ganrantee comporession error against the case of machine-epsilon
4377	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4378	coeff_type[e][coeff_index] = 0;
4379	last_coeffcients[e] = cur_coeff;
4380	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4381	}
4382	}
4383	else{
4384	coeff_type[e][coeff_index] = 0;
4385	last_coeffcients[e] = cur_coeff;
4386	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4387	}
4388	}
4389	coeff_index ++;
4390	}
4391	float curData;
4392	float pred;
4393	double itvNum;
4394	double diff;
4395	size_t index = 0;
4396	size_t block_unpredictable_count = 0;
4397	float * cur_data_pos = data_pos;
4398	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4399	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4400	curData = *cur_data_pos;
4401	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4402	diff = curData - pred;
4403	itvNum = fabs(diff)/realPrecision + 1;
4404	if (itvNum < intvCapacity){
4405	if (diff < 0) itvNum = -itvNum;
4406	type[index] = (int) (itvNum/2) + intvRadius;
4407	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4408	//ganrantee comporession error against the case of machine-epsilon
4409	if(fabs(curData - pred)>realPrecision){
4410	type[index] = 0;
4411	pred = curData;
4412	unpredictable_data[block_unpredictable_count ++] = curData;
4413	}
4414	}
4415	else{
4416	type[index] = 0;
4417	pred = curData;
4418	unpredictable_data[block_unpredictable_count ++] = curData;
4419	}
4420	index ++;
4421	cur_data_pos ++;
4422	}
4423	/dealing with the last jj (boundary)/
4424	{
4425	size_t jj = current_blockcount_y - 1;
4426	curData = *cur_data_pos;
4427	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4428	diff = curData - pred;
4429	itvNum = fabs(diff)/realPrecision + 1;
4430	if (itvNum < intvCapacity){
4431	if (diff < 0) itvNum = -itvNum;
4432	type[index] = (int) (itvNum/2) + intvRadius;
4433	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4434	//ganrantee comporession error against the case of machine-epsilon
4435	if(fabs(curData - pred)>realPrecision){
4436	type[index] = 0;
4437	pred = curData;
4438	unpredictable_data[block_unpredictable_count ++] = curData;
4439	}
4440	}
4441	else{
4442	type[index] = 0;
4443	pred = curData;
4444	unpredictable_data[block_unpredictable_count ++] = curData;
4445	}
4446
4447	// assign value to block surfaces
4448	pb_pos[ii * strip_dim0_offset + jj] = pred;
4449	index ++;
4450	cur_data_pos ++;
4451	}
4452	cur_data_pos += dim0_offset - current_blockcount_y;
4453	}
4454	/dealing with the last ii (boundary)/
4455	{
4456	size_t ii = current_blockcount_x - 1;
4457	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4458	curData = *cur_data_pos;
4459	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4460	diff = curData - pred;
4461	itvNum = fabs(diff)/realPrecision + 1;
4462	if (itvNum < intvCapacity){
4463	if (diff < 0) itvNum = -itvNum;
4464	type[index] = (int) (itvNum/2) + intvRadius;
4465	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4466	//ganrantee comporession error against the case of machine-epsilon
4467	if(fabs(curData - pred)>realPrecision){
4468	type[index] = 0;
4469	pred = curData;
4470	unpredictable_data[block_unpredictable_count ++] = curData;
4471	}
4472	}
4473	else{
4474	type[index] = 0;
4475	pred = curData;
4476	unpredictable_data[block_unpredictable_count ++] = curData;
4477	}
4478	// assign value to next prediction buffer
4479	next_pb_pos[jj] = pred;
4480	index ++;
4481	cur_data_pos ++;
4482	}
4483	/dealing with the last jj (boundary)/
4484	{
4485	size_t jj = current_blockcount_y - 1;
4486	curData = *cur_data_pos;
4487	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4488	diff = curData - pred;
4489	itvNum = fabs(diff)/realPrecision + 1;
4490	if (itvNum < intvCapacity){
4491	if (diff < 0) itvNum = -itvNum;
4492	type[index] = (int) (itvNum/2) + intvRadius;
4493	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4494	//ganrantee comporession error against the case of machine-epsilon
4495	if(fabs(curData - pred)>realPrecision){
4496	type[index] = 0;
4497	pred = curData;
4498	unpredictable_data[block_unpredictable_count ++] = curData;
4499	}
4500	}
4501	else{
4502	type[index] = 0;
4503	pred = curData;
4504	unpredictable_data[block_unpredictable_count ++] = curData;
4505	}
4506
4507	// assign value to block surfaces
4508	pb_pos[ii * strip_dim0_offset + jj] = pred;
4509	// assign value to next prediction buffer
4510	next_pb_pos[jj] = pred;
4511
4512	index ++;
4513	cur_data_pos ++;
4514	}
4515	} // end ii == -1
4516	unpredictable_count = block_unpredictable_count;
4517	total_unpred += unpredictable_count;
4518	unpredictable_data += unpredictable_count;
4519	reg_count ++;
4520	}// end use_reg
4521	else{
4522	// use SZ
4523	// SZ predication
4524	unpredictable_count = 0;
4525	float * cur_pb_pos = pb_pos;
4526	float * cur_data_pos = data_pos;
4527	float curData;
4528	float pred2D;
4529	double itvNum, diff;
4530	size_t index = 0;
4531	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4532	for(size_t jj=0; jj<current_blockcount_y; jj++){
4533	curData = *cur_data_pos;
4534	if(fabs(curData - mean) <= realPrecision){
4535	// adjust type[index] to intvRadius for coherence with freq in reg
4536	type[index] = intvRadius;
4537	*cur_pb_pos = mean;
4538	}
4539	else
4540	{
4541	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4542	diff = curData - pred2D;
4543	itvNum = fabs(diff)/realPrecision + 1;
4544	if (itvNum < intvCapacity_sz){
4545	if (diff < 0) itvNum = -itvNum;
4546	type[index] = (int) (itvNum/2) + intvRadius;
4547	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4548	if(type[index] <= intvRadius) type[index] -= 1;
4549	//ganrantee comporession error against the case of machine-epsilon
4550	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4551	type[index] = 0;
4552	*cur_pb_pos = curData;
4553	unpredictable_data[unpredictable_count ++] = curData;
4554	}
4555	}
4556	else{
4557	type[index] = 0;
4558	*cur_pb_pos = curData;
4559	unpredictable_data[unpredictable_count ++] = curData;
4560	}
4561	}
4562	index ++;
4563	cur_pb_pos ++;
4564	cur_data_pos ++;
4565	}
4566	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
4567	cur_data_pos += dim0_offset - current_blockcount_y;
4568	}
4569	/dealing with the last ii (boundary)/
4570	{
4571	// ii == current_blockcount_x - 1
4572	for(size_t jj=0; jj<current_blockcount_y; jj++){
4573	curData = *cur_data_pos;
4574	if(fabs(curData - mean) <= realPrecision){
4575	// adjust type[index] to intvRadius for coherence with freq in reg
4576	type[index] = intvRadius;
4577	*cur_pb_pos = mean;
4578	}
4579	else
4580	{
4581	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4582	diff = curData - pred2D;
4583	itvNum = fabs(diff)/realPrecision + 1;
4584	if (itvNum < intvCapacity_sz){
4585	if (diff < 0) itvNum = -itvNum;
4586	type[index] = (int) (itvNum/2) + intvRadius;
4587	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4588	if(type[index] <= intvRadius) type[index] -= 1;
4589	//ganrantee comporession error against the case of machine-epsilon
4590	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4591	type[index] = 0;
4592	*cur_pb_pos = curData;
4593	unpredictable_data[unpredictable_count ++] = curData;
4594	}
4595	}
4596	else{
4597	type[index] = 0;
4598	*cur_pb_pos = curData;
4599	unpredictable_data[unpredictable_count ++] = curData;
4600	}
4601	}
4602	next_pb_pos[jj] = *cur_pb_pos;
4603	index ++;
4604	cur_pb_pos ++;
4605	cur_data_pos ++;
4606	}
4607	}
4608	total_unpred += unpredictable_count;
4609	unpredictable_data += unpredictable_count;
4610	// change indicator
4611	indicator_pos[j] = 1;
4612	}// end SZ
4613	reg_params_pos ++;
4614	data_pos += current_blockcount_y;
4615	pb_pos += current_blockcount_y;
4616	next_pb_pos += current_blockcount_y;
4617	type += current_blockcount_x * current_blockcount_y;
4618	}// end j
4619	indicator_pos += num_y;
4620	float * tmp;
4621	tmp = cur_pb_buf;
4622	cur_pb_buf = next_pb_buf;
4623	next_pb_buf = tmp;
4624	}// end i
4625	}// end use mean
4626	else{
4627	type = result_type;
4628	int intvCapacity_sz = intvCapacity - 2;
4629	for(size_t i=0; i<num_x; i++){
4630	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
4631	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
4632	data_pos = oriData + offset_x * dim0_offset;
4633
4634	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
4635	next_pb_buf_pos = next_pb_buf + 1;
4636	float * pb_pos = cur_pb_buf_pos;
4637	float * next_pb_pos = next_pb_buf_pos;
4638
4639	for(size_t j=0; j<num_y; j++){
4640	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
4641	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
4642	/sampling/
4643	{
4644	// sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
4645	float * cur_data_pos;
4646	float curData;
4647	float pred_reg, pred_sz;
4648	float err_sz = 0.0, err_reg = 0.0;
4649	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
4650	// [1, 9] [3, 7] [7, 3] [9, 1]
4651	int count = 0;
4652	for(int i=1; i<current_blockcount_x; i+=2){
4653	cur_data_pos = data_pos + i * dim0_offset + i;
4654	curData = *cur_data_pos;
4655	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4656	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
4657	err_sz += fabs(pred_sz - curData);
4658	err_reg += fabs(pred_reg - curData);
4659
4660	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
4661	curData = *cur_data_pos;
4662	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
4663	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
4664	err_sz += fabs(pred_sz - curData);
4665	err_reg += fabs(pred_reg - curData);
4666
4667	count += 2;
4668	}
4669	err_sz += realPrecision * count * 0.81;
4670	use_reg = (err_reg < err_sz);
4671
4672	}
4673	if(use_reg)
4674	{
4675	{
4676	/predict coefficients in current block via previous reg_block/
4677	float cur_coeff;
4678	double diff, itvNum;
4679	for(int e=0; e<3; e++){
4680	cur_coeff = reg_params_pos[e*num_blocks];
4681	diff = cur_coeff - last_coeffcients[e];
4682	itvNum = fabs(diff)/precision[e] + 1;
4683	if (itvNum < coeff_intvCapacity_sz){
4684	if (diff < 0) itvNum = -itvNum;
4685	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
4686	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
4687	//ganrantee comporession error against the case of machine-epsilon
4688	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
4689	coeff_type[e][coeff_index] = 0;
4690	last_coeffcients[e] = cur_coeff;
4691	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4692	}
4693	}
4694	else{
4695	coeff_type[e][coeff_index] = 0;
4696	last_coeffcients[e] = cur_coeff;
4697	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
4698	}
4699	}
4700	coeff_index ++;
4701	}
4702	float curData;
4703	float pred;
4704	double itvNum;
4705	double diff;
4706	size_t index = 0;
4707	size_t block_unpredictable_count = 0;
4708	float * cur_data_pos = data_pos;
4709	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4710	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4711	curData = *cur_data_pos;
4712	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4713	diff = curData - pred;
4714	itvNum = fabs(diff)/realPrecision + 1;
4715	if (itvNum < intvCapacity){
4716	if (diff < 0) itvNum = -itvNum;
4717	type[index] = (int) (itvNum/2) + intvRadius;
4718	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4719	//ganrantee comporession error against the case of machine-epsilon
4720	if(fabs(curData - pred)>realPrecision){
4721	type[index] = 0;
4722	pred = curData;
4723	unpredictable_data[block_unpredictable_count ++] = curData;
4724	}
4725	}
4726	else{
4727	type[index] = 0;
4728	pred = curData;
4729	unpredictable_data[block_unpredictable_count ++] = curData;
4730	}
4731	index ++;
4732	cur_data_pos ++;
4733	}
4734	/dealing with the last jj (boundary)/
4735	{
4736	// jj == current_blockcount_y - 1
4737	size_t jj = current_blockcount_y - 1;
4738	curData = *cur_data_pos;
4739	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4740	diff = curData - pred;
4741	itvNum = fabs(diff)/realPrecision + 1;
4742	if (itvNum < intvCapacity){
4743	if (diff < 0) itvNum = -itvNum;
4744	type[index] = (int) (itvNum/2) + intvRadius;
4745	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4746	//ganrantee comporession error against the case of machine-epsilon
4747	if(fabs(curData - pred)>realPrecision){
4748	type[index] = 0;
4749	pred = curData;
4750	unpredictable_data[block_unpredictable_count ++] = curData;
4751	}
4752	}
4753	else{
4754	type[index] = 0;
4755	pred = curData;
4756	unpredictable_data[block_unpredictable_count ++] = curData;
4757	}
4758
4759	// assign value to block surfaces
4760	pb_pos[ii * strip_dim0_offset + jj] = pred;
4761	index ++;
4762	cur_data_pos ++;
4763	}
4764	cur_data_pos += dim0_offset - current_blockcount_y;
4765	}
4766	/dealing with the last ii (boundary)/
4767	{
4768	size_t ii = current_blockcount_x - 1;
4769	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
4770	curData = *cur_data_pos;
4771	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4772	diff = curData - pred;
4773	itvNum = fabs(diff)/realPrecision + 1;
4774	if (itvNum < intvCapacity){
4775	if (diff < 0) itvNum = -itvNum;
4776	type[index] = (int) (itvNum/2) + intvRadius;
4777	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4778	//ganrantee comporession error against the case of machine-epsilon
4779	if(fabs(curData - pred)>realPrecision){
4780	type[index] = 0;
4781	pred = curData;
4782	unpredictable_data[block_unpredictable_count ++] = curData;
4783	}
4784	}
4785	else{
4786	type[index] = 0;
4787	pred = curData;
4788	unpredictable_data[block_unpredictable_count ++] = curData;
4789	}
4790	// assign value to next prediction buffer
4791	next_pb_pos[jj] = pred;
4792	index ++;
4793	cur_data_pos ++;
4794	}
4795	/dealing with the last jj (boundary)/
4796	{
4797	// jj == current_blockcount_y - 1
4798	size_t jj = current_blockcount_y - 1;
4799	curData = *cur_data_pos;
4800	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
4801	diff = curData - pred;
4802	itvNum = fabs(diff)/realPrecision + 1;
4803	if (itvNum < intvCapacity){
4804	if (diff < 0) itvNum = -itvNum;
4805	type[index] = (int) (itvNum/2) + intvRadius;
4806	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
4807	//ganrantee comporession error against the case of machine-epsilon
4808	if(fabs(curData - pred)>realPrecision){
4809	type[index] = 0;
4810	pred = curData;
4811	unpredictable_data[block_unpredictable_count ++] = curData;
4812	}
4813	}
4814	else{
4815	type[index] = 0;
4816	pred = curData;
4817	unpredictable_data[block_unpredictable_count ++] = curData;
4818	}
4819
4820	// assign value to block surfaces
4821	pb_pos[ii * strip_dim0_offset + jj] = pred;
4822	// assign value to next prediction buffer
4823	next_pb_pos[jj] = pred;
4824
4825	index ++;
4826	cur_data_pos ++;
4827	}
4828	} // end ii == -1
4829	unpredictable_count = block_unpredictable_count;
4830	total_unpred += unpredictable_count;
4831	unpredictable_data += unpredictable_count;
4832	reg_count ++;
4833	}// end use_reg
4834	else{
4835	// use SZ
4836	// SZ predication
4837	unpredictable_count = 0;
4838	float * cur_pb_pos = pb_pos;
4839	float * cur_data_pos = data_pos;
4840	float curData;
4841	float pred2D;
4842	double itvNum, diff;
4843	size_t index = 0;
4844	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
4845	for(size_t jj=0; jj<current_blockcount_y; jj++){
4846	curData = *cur_data_pos;
4847
4848	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4849	diff = curData - pred2D;
4850	itvNum = fabs(diff)/realPrecision + 1;
4851	if (itvNum < intvCapacity_sz){
4852	if (diff < 0) itvNum = -itvNum;
4853	type[index] = (int) (itvNum/2) + intvRadius;
4854	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4855	//ganrantee comporession error against the case of machine-epsilon
4856	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4857	type[index] = 0;
4858	*cur_pb_pos = curData;
4859	unpredictable_data[unpredictable_count ++] = curData;
4860	}
4861	}
4862	else{
4863	type[index] = 0;
4864	*cur_pb_pos = curData;
4865	unpredictable_data[unpredictable_count ++] = curData;
4866	}
4867
4868	index ++;
4869	cur_pb_pos ++;
4870	cur_data_pos ++;
4871	}
4872	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
4873	cur_data_pos += dim0_offset - current_blockcount_y;
4874	}
4875	/dealing with the last ii (boundary)/
4876	{
4877	// ii == current_blockcount_x - 1
4878	for(size_t jj=0; jj<current_blockcount_y; jj++){
4879	curData = *cur_data_pos;
4880
4881	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
4882	diff = curData - pred2D;
4883	itvNum = fabs(diff)/realPrecision + 1;
4884	if (itvNum < intvCapacity_sz){
4885	if (diff < 0) itvNum = -itvNum;
4886	type[index] = (int) (itvNum/2) + intvRadius;
4887	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
4888	//ganrantee comporession error against the case of machine-epsilon
4889	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
4890	type[index] = 0;
4891	*cur_pb_pos = curData;
4892	unpredictable_data[unpredictable_count ++] = curData;
4893	}
4894	}
4895	else{
4896	type[index] = 0;
4897	*cur_pb_pos = curData;
4898	unpredictable_data[unpredictable_count ++] = curData;
4899	}
4900	next_pb_pos[jj] = *cur_pb_pos;
4901	index ++;
4902	cur_pb_pos ++;
4903	cur_data_pos ++;
4904	}
4905	}
4906	total_unpred += unpredictable_count;
4907	unpredictable_data += unpredictable_count;
4908	// change indicator
4909	indicator_pos[j] = 1;
4910	}// end SZ
4911	reg_params_pos ++;
4912	data_pos += current_blockcount_y;
4913	pb_pos += current_blockcount_y;
4914	next_pb_pos += current_blockcount_y;
4915	type += current_blockcount_x * current_blockcount_y;
4916	}// end j
4917	indicator_pos += num_y;
4918	float * tmp;
4919	tmp = cur_pb_buf;
4920	cur_pb_buf = next_pb_buf;
4921	next_pb_buf = tmp;
4922	}// end i
4923	}
4924	free(prediction_buffer_1);
4925	free(prediction_buffer_2);
4926
4927	int stateNum = 2*quantization_intervals;
4928	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4929
4930	size_t nodeCount = 0;
4931	size_t i = 0;
4932	init(huffmanTree, result_type, num_elements);
4933	for (i = 0; i < stateNum; i++)
4934	if (huffmanTree->code[i]) nodeCount++;
4935	nodeCount = nodeCount*2-1;
4936
4937	unsigned char *treeBytes;
4938	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4939
4940	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
4941	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
4942	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
4943	unsigned char * result_pos = result;
4944	initRandomAccessBytes(result_pos);
4945	result_pos += meta_data_offset;
4946
4947	sizeToBytes(result_pos, num_elements);
4948	result_pos += exe_params->SZ_SIZE_TYPE;
4949
4950	intToBytes_bigEndian(result_pos, block_size);
4951	result_pos += sizeof(int);
4952	doubleToBytes(result_pos, realPrecision);
4953	result_pos += sizeof(double);
4954	intToBytes_bigEndian(result_pos, quantization_intervals);
4955	result_pos += sizeof(int);
4956	intToBytes_bigEndian(result_pos, treeByteSize);
4957	result_pos += sizeof(int);
4958	intToBytes_bigEndian(result_pos, nodeCount);
4959	result_pos += sizeof(int);
4960	memcpy(result_pos, treeBytes, treeByteSize);
4961	result_pos += treeByteSize;
4962	free(treeBytes);
4963
4964	memcpy(result_pos, &use_mean, sizeof(unsigned char));
4965	result_pos += sizeof(unsigned char);
4966	memcpy(result_pos, &mean, sizeof(float));
4967	result_pos += sizeof(float);
4968
4969	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
4970	result_pos += indicator_size;
4971
4972	//convert the lead/mid/resi to byte stream
4973	if(reg_count>0){
4974	for(int e=0; e<3; e++){
4975	int stateNum = 2*coeff_intvCapacity_sz;
4976	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
4977	size_t nodeCount = 0;
4978	init(huffmanTree, coeff_type[e], reg_count);
4979	size_t i = 0;
4980	for (i = 0; i < huffmanTree->stateNum; i++)
4981	if (huffmanTree->code[i]) nodeCount++;
4982	nodeCount = nodeCount*2-1;
4983	unsigned char *treeBytes;
4984	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
4985	doubleToBytes(result_pos, precision[e]);
4986	result_pos += sizeof(double);
4987	intToBytes_bigEndian(result_pos, coeff_intvRadius);
4988	result_pos += sizeof(int);
4989	intToBytes_bigEndian(result_pos, treeByteSize);
4990	result_pos += sizeof(int);
4991	intToBytes_bigEndian(result_pos, nodeCount);
4992	result_pos += sizeof(int);
4993	memcpy(result_pos, treeBytes, treeByteSize);
4994	result_pos += treeByteSize;
4995	free(treeBytes);
4996	size_t typeArray_size = 0;
4997	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
4998	sizeToBytes(result_pos, typeArray_size);
4999	result_pos += sizeof(size_t) + typeArray_size;
5000	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
5001	result_pos += sizeof(int);
5002	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
5003	result_pos += coeff_unpredictable_count[e]*sizeof(float);
5004	SZ_ReleaseHuffman(huffmanTree);
5005	}
5006	}
5007	free(coeff_result_type);
5008	free(coeff_unpredictable_data);
5009
5010	//record the number of unpredictable data and also store them
5011	memcpy(result_pos, &total_unpred, sizeof(size_t));
5012	result_pos += sizeof(size_t);
5013	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
5014	result_pos += total_unpred * sizeof(float);
5015	size_t typeArray_size = 0;
5016	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
5017	result_pos += typeArray_size;
5018
5019	size_t totalEncodeSize = result_pos - result;
5020	free(indicator);
5021	free(result_unpredictable_data);
5022	free(result_type);
5023	free(reg_params);
5024
5025	SZ_ReleaseHuffman(huffmanTree);
5026	*comp_size = totalEncodeSize;
5027
5028	return result;
5029	}
5030
5031	unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float dense_pos, float * max_freq, float * mean_freq)
5032	{
5033	float mean = 0.0;
5034	size_t len = r1 * r2 * r3;
5035	size_t mean_distance = (int) (sqrt(len));
5036	float * data_pos = oriData;
5037	size_t offset_count = 0;
5038	size_t offset_count_2 = 0;
5039	size_t mean_count = 0;
5040	while(data_pos - oriData < len){
5041	mean += *data_pos;
5042	mean_count ++;
5043	data_pos += mean_distance;
5044	offset_count += mean_distance;
5045	offset_count_2 += mean_distance;
5046	if(offset_count >= r3){
5047	offset_count = 0;
5048	data_pos -= 1;
5049	}
5050	if(offset_count_2 >= r2 * r3){
5051	offset_count_2 = 0;
5052	data_pos -= 1;
5053	}
5054	}
5055	if(mean_count > 0) mean /= mean_count;
5056	size_t range = 8192;
5057	size_t radius = 4096;
5058	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
5059	memset(freq_intervals, 0, range*sizeof(size_t));
5060
5061	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
5062	int sampleDistance = confparams_cpr->sampleDistance;
5063	float predThreshold = confparams_cpr->predThreshold;
5064
5065	size_t i;
5066	size_t radiusIndex;
5067	size_t r23=r2*r3;
5068	float pred_value = 0, pred_err;
5069	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
5070	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
5071
5072	float mean_diff;
5073	ptrdiff_t freq_index;
5074	size_t freq_count = 0;
5075	size_t sample_count = 0;
5076
5077	offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
5078	data_pos = oriData + r23 + r3 + offset_count;
5079	size_t n1_count = 1, n2_count = 1; // count i,j sum
5080
5081	while(data_pos - oriData < len){
5082
5083	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
5084	pred_err = fabs(pred_value - *data_pos);
5085	if(pred_err < realPrecision) freq_count ++;
5086	radiusIndex = (pred_err/realPrecision+1)/2;
5087	if(radiusIndex>=maxRangeRadius)
5088	{
5089	radiusIndex = maxRangeRadius - 1;
5090	}
5091	intervals[radiusIndex]++;
5092
5093	mean_diff = *data_pos - mean;
5094	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
5095	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
5096	if(freq_index <= 0){
5097	freq_intervals[0] ++;
5098	}
5099	else if(freq_index >= range){
5100	freq_intervals[range - 1] ++;
5101	}
5102	else{
5103	freq_intervals[freq_index] ++;
5104	}
5105	offset_count += sampleDistance;
5106	if(offset_count >= r3){
5107	n2_count ++;
5108	if(n2_count == r2){
5109	n1_count ++;
5110	n2_count = 1;
5111	data_pos += r3;
5112	}
5113	offset_count_2 = (n1_count + n2_count) % sampleDistance;
5114	data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
5115	offset_count = (sampleDistance - offset_count_2);
5116	if(offset_count == 0) offset_count ++;
5117	}
5118	else data_pos += sampleDistance;
5119	sample_count ++;
5120	}
5121	max_freq = freq_count 1.0/ sample_count;
5122
5123	//compute the appropriate number
5124	size_t targetCount = sample_count*predThreshold;
5125	size_t sum = 0;
5126	for(i=0;i<maxRangeRadius;i++)
5127	{
5128	sum += intervals[i];
5129	if(sum>targetCount)
5130	break;
5131	}
5132	if(i>=maxRangeRadius)
5133	i = maxRangeRadius-1;
5134	unsigned int accIntervals = 2*(i+1);
5135	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
5136
5137	if(powerOf2<32)
5138	powerOf2 = 32;
5139	// collect frequency
5140	size_t max_sum = 0;
5141	size_t max_index = 0;
5142	size_t tmp_sum;
5143	size_t * freq_pos = freq_intervals + 1;
5144	for(size_t i=1; i<range-2; i++){
5145	tmp_sum = freq_pos[0] + freq_pos[1];
5146	if(tmp_sum > max_sum){
5147	max_sum = tmp_sum;
5148	max_index = i;
5149	}
5150	freq_pos ++;
5151	}
5152	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
5153	mean_freq = max_sum 1.0 / sample_count;
5154
5155	free(freq_intervals);
5156	free(intervals);
5157	return powerOf2;
5158	}
5159
5160
5161	// 3D: modified for higher performance
5162	unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
5163
5164	#ifdef HAVE_TIMECMPR
5165	float* decData = NULL;
5166	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5167	decData = (float*)(multisteps->hist_data);
5168	#endif
5169
5170	unsigned int quantization_intervals;
5171	float sz_sample_correct_freq = -1;//0.5; //-1
5172	float dense_pos;
5173	float mean_flush_freq;
5174	unsigned char use_mean = 0;
5175
5176	// calculate block dims
5177	size_t num_x, num_y, num_z;
5178	size_t block_size = 6;
5179	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
5180	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
5181	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
5182
5183	size_t split_index_x, split_index_y, split_index_z;
5184	size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
5185	size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
5186	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
5187	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
5188	SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
5189
5190	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
5191	size_t num_blocks = num_x * num_y * num_z;
5192	size_t num_elements = r1 * r2 * r3;
5193
5194	size_t dim0_offset = r2 * r3;
5195	size_t dim1_offset = r3;
5196
5197	int * result_type = (int ) malloc(num_elements sizeof(int));
5198	size_t unpred_data_max_size = max_num_block_elements;
5199	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
5200	size_t total_unpred = 0;
5201	size_t unpredictable_count;
5202	size_t max_unpred_count = 0;
5203	float * data_pos = oriData;
5204	int * type = result_type;
5205	size_t type_offset;
5206	size_t offset_x, offset_y, offset_z;
5207	size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
5208
5209	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
5210	float * reg_params_pos = reg_params;
5211	// move regression part out
5212	size_t params_offset_b = num_blocks;
5213	size_t params_offset_c = 2*num_blocks;
5214	size_t params_offset_d = 3*num_blocks;
5215	for(size_t i=0; i<num_x; i++){
5216	for(size_t j=0; j<num_y; j++){
5217	for(size_t k=0; k<num_z; k++){
5218	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
5219	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
5220	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
5221	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
5222	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
5223	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
5224
5225	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
5226	/Calculate regression coefficients/
5227	{
5228	float * cur_data_pos = data_pos;
5229	float fx = 0.0;
5230	float fy = 0.0;
5231	float fz = 0.0;
5232	float f = 0;
5233	float sum_x, sum_y;
5234	float curData;
5235	for(size_t i=0; i<current_blockcount_x; i++){
5236	sum_x = 0;
5237	for(size_t j=0; j<current_blockcount_y; j++){
5238	sum_y = 0;
5239	for(size_t k=0; k<current_blockcount_z; k++){
5240	curData = *cur_data_pos;
5241	// f += curData;
5242	// fx += curData * i;
5243	// fy += curData * j;
5244	// fz += curData * k;
5245	sum_y += curData;
5246	fz += curData * k;
5247	cur_data_pos ++;
5248	}
5249	fy += sum_y * j;
5250	sum_x += sum_y;
5251	cur_data_pos += dim1_offset - current_blockcount_z;
5252	}
5253	fx += sum_x * i;
5254	f += sum_x;
5255	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5256	}
5257	float coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
5258	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
5259	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
5260	reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
5261	reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
5262	}
5263	reg_params_pos ++;
5264	}
5265	}
5266	}
5267
5268	//Compress coefficient arrays
5269	double precision_a, precision_b, precision_c, precision_d;
5270	float rel_param_err = 0.025;
5271	precision_a = rel_param_err * realPrecision / late_blockcount_x;
5272	precision_b = rel_param_err * realPrecision / late_blockcount_y;
5273	precision_c = rel_param_err * realPrecision / late_blockcount_z;
5274	precision_d = rel_param_err * realPrecision;
5275
5276	if(exe_params->optQuantMode==1)
5277	{
5278	quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
5279	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
5280	updateQuantizationInfo(quantization_intervals);
5281	}
5282	else{
5283	quantization_intervals = exe_params->intvCapacity;
5284	}
5285
5286	float mean = 0;
5287	if(use_mean){
5288	// compute mean
5289	double sum = 0.0;
5290	size_t mean_count = 0;
5291	for(size_t i=0; i<num_elements; i++){
5292	if(fabs(oriData[i] - dense_pos) < realPrecision){
5293	sum += oriData[i];
5294	mean_count ++;
5295	}
5296	}
5297	if(mean_count > 0) mean = sum / mean_count;
5298	}
5299
5300	double tmp_realPrecision = realPrecision;
5301
5302	// use two prediction buffers for higher performance
5303	float * unpredictable_data = result_unpredictable_data;
5304	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
5305	memset(indicator, 0, num_blocks * sizeof(unsigned char));
5306	size_t reg_count = 0;
5307	size_t strip_dim_0 = early_blockcount_x + 1;
5308	size_t strip_dim_1 = r2 + 1;
5309	size_t strip_dim_2 = r3 + 1;
5310	size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
5311	size_t strip_dim1_offset = strip_dim_2;
5312	unsigned char * indicator_pos = indicator;
5313
5314	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
5315	float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
5316	memset(prediction_buffer_1, 0, prediction_buffer_size);
5317	float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
5318	memset(prediction_buffer_2, 0, prediction_buffer_size);
5319	float * cur_pb_buf = prediction_buffer_1;
5320	float * next_pb_buf = prediction_buffer_2;
5321	float * cur_pb_buf_pos;
5322	float * next_pb_buf_pos;
5323	int intvCapacity = exe_params->intvCapacity;
5324	int intvRadius = exe_params->intvRadius;
5325	int use_reg = 0;
5326	float noise = realPrecision * 1.22;
5327
5328	reg_params_pos = reg_params;
5329	// compress the regression coefficients on the fly
5330	float last_coeffcients[4] = {0.0};
5331	int coeff_intvCapacity_sz = 65536;
5332	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
5333	int * coeff_type[4];
5334	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
5335	float * coeff_unpred_data[4];
5336	float * coeff_unpredictable_data = (float ) malloc(num_blocks4*sizeof(float));
5337	double precision[4];
5338	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
5339	for(int i=0; i<4; i++){
5340	coeff_type[i] = coeff_result_type + i * num_blocks;
5341	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
5342	}
5343	int coeff_index = 0;
5344	unsigned int coeff_unpredictable_count[4] = {0};
5345
5346	if(use_mean){
5347	int intvCapacity_sz = intvCapacity - 2;
5348	for(size_t i=0; i<num_x; i++){
5349	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
5350	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
5351	for(size_t j=0; j<num_y; j++){
5352	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
5353	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
5354	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
5355	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
5356	type = result_type + type_offset;
5357
5358	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
5359	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
5360	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
5361
5362	size_t current_blockcount_z;
5363	float * pb_pos = cur_pb_buf_pos;
5364	float * next_pb_pos = next_pb_buf_pos;
5365	size_t strip_unpredictable_count = 0;
5366	for(size_t k=0; k<num_z; k++){
5367	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
5368	#ifdef HAVE_TIMECMPR
5369	size_t offset_z = 0;
5370	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
5371	size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
5372	#endif
5373	/sampling and decide which predictor/
5374	{
5375	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
5376	float * cur_data_pos;
5377	float curData;
5378	float pred_reg, pred_sz;
5379	float err_sz = 0.0, err_reg = 0.0;
5380	int bmi = 0;
5381	if(i>0 && j>0 && k>0){
5382	for(int i=0; i<block_size; i++){
5383	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5384	curData = *cur_data_pos;
5385	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5386	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5387	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5388	err_reg += fabs(pred_reg - curData);
5389
5390	bmi = block_size - i;
5391	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5392	curData = *cur_data_pos;
5393	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5394	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5395	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5396	err_reg += fabs(pred_reg - curData);
5397
5398	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5399	curData = *cur_data_pos;
5400	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5401	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5402	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5403	err_reg += fabs(pred_reg - curData);
5404
5405	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5406	curData = *cur_data_pos;
5407	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5408	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5409	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5410	err_reg += fabs(pred_reg - curData);
5411	}
5412	}
5413	else{
5414	for(int i=1; i<block_size; i++){
5415	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5416	curData = *cur_data_pos;
5417	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5418	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5419	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5420	err_reg += fabs(pred_reg - curData);
5421
5422	bmi = block_size - i;
5423	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5424	curData = *cur_data_pos;
5425	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5426	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5427	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5428	err_reg += fabs(pred_reg - curData);
5429
5430	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5431	curData = *cur_data_pos;
5432	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5433	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5434	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5435	err_reg += fabs(pred_reg - curData);
5436
5437	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5438	curData = *cur_data_pos;
5439	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5440	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5441	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
5442	err_reg += fabs(pred_reg - curData);
5443
5444	}
5445	}
5446	use_reg = (err_reg < err_sz);
5447	}
5448	if(use_reg){
5449	{
5450	/predict coefficients in current block via previous reg_block/
5451	float cur_coeff;
5452	double diff, itvNum;
5453	for(int e=0; e<4; e++){
5454	cur_coeff = reg_params_pos[e*num_blocks];
5455	diff = cur_coeff - last_coeffcients[e];
5456	itvNum = fabs(diff)/precision[e] + 1;
5457	if (itvNum < coeff_intvCapacity_sz){
5458	if (diff < 0) itvNum = -itvNum;
5459	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
5460	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
5461	//ganrantee comporession error against the case of machine-epsilon
5462	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
5463	coeff_type[e][coeff_index] = 0;
5464	last_coeffcients[e] = cur_coeff;
5465	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5466	}
5467	}
5468	else{
5469	coeff_type[e][coeff_index] = 0;
5470	last_coeffcients[e] = cur_coeff;
5471	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5472	}
5473	}
5474	coeff_index ++;
5475	}
5476	float curData;
5477	float pred;
5478	double itvNum;
5479	double diff;
5480	size_t index = 0;
5481	size_t block_unpredictable_count = 0;
5482	float * cur_data_pos = data_pos;
5483	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5484	for(size_t jj=0; jj<current_blockcount_y; jj++){
5485	for(size_t kk=0; kk<current_blockcount_z; kk++){
5486	curData = *cur_data_pos;
5487	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5488	diff = curData - pred;
5489	itvNum = fabs(diff)/tmp_realPrecision + 1;
5490	if (itvNum < intvCapacity){
5491	if (diff < 0) itvNum = -itvNum;
5492	type[index] = (int) (itvNum/2) + intvRadius;
5493	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5494	//ganrantee comporession error against the case of machine-epsilon
5495	if(fabs(curData - pred)>tmp_realPrecision){
5496	type[index] = 0;
5497	pred = curData;
5498	unpredictable_data[block_unpredictable_count ++] = curData;
5499	}
5500	}
5501	else{
5502	type[index] = 0;
5503	pred = curData;
5504	unpredictable_data[block_unpredictable_count ++] = curData;
5505	}
5506
5507	#ifdef HAVE_TIMECMPR
5508	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5509	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5510	decData[block_offset + point_offset] = pred;
5511	#endif
5512
5513	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5514	// assign value to block surfaces
5515	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5516	}
5517	index ++;
5518	cur_data_pos ++;
5519	}
5520	cur_data_pos += dim1_offset - current_blockcount_z;
5521	}
5522	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5523	}
5524	/dealing with the last ii (boundary)/
5525	{
5526	// ii == current_blockcount_x - 1
5527	size_t ii = current_blockcount_x - 1;
5528	for(size_t jj=0; jj<current_blockcount_y; jj++){
5529	for(size_t kk=0; kk<current_blockcount_z; kk++){
5530	curData = *cur_data_pos;
5531	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5532	diff = curData - pred;
5533	itvNum = fabs(diff)/tmp_realPrecision + 1;
5534	if (itvNum < intvCapacity){
5535	if (diff < 0) itvNum = -itvNum;
5536	type[index] = (int) (itvNum/2) + intvRadius;
5537	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5538	//ganrantee comporession error against the case of machine-epsilon
5539	if(fabs(curData - pred)>tmp_realPrecision){
5540	type[index] = 0;
5541	pred = curData;
5542	unpredictable_data[block_unpredictable_count ++] = curData;
5543	}
5544	}
5545	else{
5546	type[index] = 0;
5547	pred = curData;
5548	unpredictable_data[block_unpredictable_count ++] = curData;
5549	}
5550
5551	#ifdef HAVE_TIMECMPR
5552	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5553	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5554	decData[block_offset + point_offset] = pred;
5555	#endif
5556
5557	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5558	// assign value to block surfaces
5559	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5560	}
5561	// assign value to next prediction buffer
5562	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
5563	index ++;
5564	cur_data_pos ++;
5565	}
5566	cur_data_pos += dim1_offset - current_blockcount_z;
5567	}
5568	}
5569	unpredictable_count = block_unpredictable_count;
5570	strip_unpredictable_count += unpredictable_count;
5571	unpredictable_data += unpredictable_count;
5572
5573	reg_count ++;
5574	}
5575	else{
5576	// use SZ
5577	// SZ predication
5578	unpredictable_count = 0;
5579	float * cur_pb_pos = pb_pos;
5580	float * cur_data_pos = data_pos;
5581	float curData;
5582	float pred3D;
5583	double itvNum, diff;
5584	size_t index = 0;
5585	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5586	for(size_t jj=0; jj<current_blockcount_y; jj++){
5587	for(size_t kk=0; kk<current_blockcount_z; kk++){
5588
5589	curData = *cur_data_pos;
5590	if(fabs(curData - mean) <= realPrecision){
5591	// adjust type[index] to intvRadius for coherence with freq in reg
5592	type[index] = intvRadius;
5593	*cur_pb_pos = mean;
5594	}
5595	else
5596	{
5597	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5598	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5599	diff = curData - pred3D;
5600	itvNum = fabs(diff)/realPrecision + 1;
5601	if (itvNum < intvCapacity_sz){
5602	if (diff < 0) itvNum = -itvNum;
5603	type[index] = (int) (itvNum/2) + intvRadius;
5604	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5605	if(type[index] <= intvRadius) type[index] -= 1;
5606	//ganrantee comporession error against the case of machine-epsilon
5607	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5608	type[index] = 0;
5609	*cur_pb_pos = curData;
5610	unpredictable_data[unpredictable_count ++] = curData;
5611	}
5612	}
5613	else{
5614	type[index] = 0;
5615	*cur_pb_pos = curData;
5616	unpredictable_data[unpredictable_count ++] = curData;
5617	}
5618	}
5619	#ifdef HAVE_TIMECMPR
5620	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5621	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5622	decData[block_offset + point_offset] = *cur_pb_pos;
5623	#endif
5624
5625	index ++;
5626	cur_pb_pos ++;
5627	cur_data_pos ++;
5628	}
5629	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5630	cur_data_pos += dim1_offset - current_blockcount_z;
5631	}
5632	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
5633	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5634	}
5635	/dealing with the last ii (boundary)/
5636	{
5637	// ii == current_blockcount_x - 1
5638	for(size_t jj=0; jj<current_blockcount_y; jj++){
5639	for(size_t kk=0; kk<current_blockcount_z; kk++){
5640
5641	curData = *cur_data_pos;
5642	if(fabs(curData - mean) <= realPrecision){
5643	// adjust type[index] to intvRadius for coherence with freq in reg
5644	type[index] = intvRadius;
5645	*cur_pb_pos = mean;
5646	}
5647	else
5648	{
5649	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5650	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5651	diff = curData - pred3D;
5652	itvNum = fabs(diff)/realPrecision + 1;
5653	if (itvNum < intvCapacity_sz){
5654	if (diff < 0) itvNum = -itvNum;
5655	type[index] = (int) (itvNum/2) + intvRadius;
5656	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5657	if(type[index] <= intvRadius) type[index] -= 1;
5658	//ganrantee comporession error against the case of machine-epsilon
5659	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5660	type[index] = 0;
5661	*cur_pb_pos = curData;
5662	unpredictable_data[unpredictable_count ++] = curData;
5663	}
5664	}
5665	else{
5666	type[index] = 0;
5667	*cur_pb_pos = curData;
5668	unpredictable_data[unpredictable_count ++] = curData;
5669	}
5670	}
5671	#ifdef HAVE_TIMECMPR
5672	size_t ii = current_blockcount_x - 1;
5673	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5674	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5675	decData[block_offset + point_offset] = *cur_pb_pos;
5676	#endif
5677
5678	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
5679	index ++;
5680	cur_pb_pos ++;
5681	cur_data_pos ++;
5682	}
5683	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5684	cur_data_pos += dim1_offset - current_blockcount_z;
5685	}
5686	}
5687	strip_unpredictable_count += unpredictable_count;
5688	unpredictable_data += unpredictable_count;
5689	// change indicator
5690	indicator_pos[k] = 1;
5691	}// end SZ
5692
5693	reg_params_pos ++;
5694	data_pos += current_blockcount_z;
5695	pb_pos += current_blockcount_z;
5696	next_pb_pos += current_blockcount_z;
5697	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
5698
5699	} // end k
5700
5701	if(strip_unpredictable_count > max_unpred_count){
5702	max_unpred_count = strip_unpredictable_count;
5703	}
5704	total_unpred += strip_unpredictable_count;
5705	indicator_pos += num_z;
5706	}// end j
5707	float * tmp;
5708	tmp = cur_pb_buf;
5709	cur_pb_buf = next_pb_buf;
5710	next_pb_buf = tmp;
5711	}// end i
5712	}
5713	else{
5714	int intvCapacity_sz = intvCapacity - 2;
5715	for(size_t i=0; i<num_x; i++){
5716	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
5717	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
5718
5719	for(size_t j=0; j<num_y; j++){
5720	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
5721	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
5722	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
5723	// copy bottom plane from plane buffer
5724	// memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(float));
5725	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
5726	type = result_type + type_offset;
5727
5728	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
5729	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
5730	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
5731
5732	size_t current_blockcount_z;
5733	float * pb_pos = cur_pb_buf_pos;
5734	float * next_pb_pos = next_pb_buf_pos;
5735	size_t strip_unpredictable_count = 0;
5736	for(size_t k=0; k<num_z; k++){
5737	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
5738	#ifdef HAVE_TIMECMPR
5739	size_t offset_z = 0;
5740	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
5741	size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
5742	#endif
5743	/sampling/
5744	{
5745	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
5746	float * cur_data_pos;
5747	float curData;
5748	float pred_reg, pred_sz;
5749	float err_sz = 0.0, err_reg = 0.0;
5750	int bmi;
5751	if(i>0 && j>0 && k>0){
5752	for(int i=0; i<block_size; i++){
5753	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5754	curData = *cur_data_pos;
5755	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5756	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5757	err_sz += fabs(pred_sz - curData) + noise;
5758	err_reg += fabs(pred_reg - curData);
5759
5760	bmi = block_size - i;
5761	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5762	curData = *cur_data_pos;
5763	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5764	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5765	err_sz += fabs(pred_sz - curData) + noise;
5766	err_reg += fabs(pred_reg - curData);
5767
5768	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5769	curData = *cur_data_pos;
5770	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5771	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5772	err_sz += fabs(pred_sz - curData) + noise;
5773	err_reg += fabs(pred_reg - curData);
5774
5775	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5776	curData = *cur_data_pos;
5777	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5778	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5779	err_sz += fabs(pred_sz - curData) + noise;
5780	err_reg += fabs(pred_reg - curData);
5781	}
5782	}
5783	else{
5784	for(int i=1; i<block_size; i++){
5785	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
5786	curData = *cur_data_pos;
5787	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5788	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5789	err_sz += fabs(pred_sz - curData) + noise;
5790	err_reg += fabs(pred_reg - curData);
5791
5792	bmi = block_size - i;
5793	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
5794	curData = *cur_data_pos;
5795	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5796	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5797	err_sz += fabs(pred_sz - curData) + noise;
5798	err_reg += fabs(pred_reg - curData);
5799
5800	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
5801	curData = *cur_data_pos;
5802	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5803	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
5804	err_sz += fabs(pred_sz - curData) + noise;
5805	err_reg += fabs(pred_reg - curData);
5806
5807	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
5808	curData = *cur_data_pos;
5809	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
5810	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
5811	err_sz += fabs(pred_sz - curData) + noise;
5812	err_reg += fabs(pred_reg - curData);
5813	}
5814	}
5815	use_reg = (err_reg < err_sz);
5816
5817	}
5818	if(use_reg)
5819	{
5820	{
5821	/predict coefficients in current block via previous reg_block/
5822	float cur_coeff;
5823	double diff, itvNum;
5824	for(int e=0; e<4; e++){
5825	cur_coeff = reg_params_pos[e*num_blocks];
5826	diff = cur_coeff - last_coeffcients[e];
5827	itvNum = fabs(diff)/precision[e] + 1;
5828	if (itvNum < coeff_intvCapacity_sz){
5829	if (diff < 0) itvNum = -itvNum;
5830	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
5831	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
5832	//ganrantee comporession error against the case of machine-epsilon
5833	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
5834	coeff_type[e][coeff_index] = 0;
5835	last_coeffcients[e] = cur_coeff;
5836	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5837	}
5838	}
5839	else{
5840	coeff_type[e][coeff_index] = 0;
5841	last_coeffcients[e] = cur_coeff;
5842	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
5843	}
5844	}
5845	coeff_index ++;
5846	}
5847	float curData;
5848	float pred;
5849	double itvNum;
5850	double diff;
5851	size_t index = 0;
5852	size_t block_unpredictable_count = 0;
5853	float * cur_data_pos = data_pos;
5854	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5855	for(size_t jj=0; jj<current_blockcount_y; jj++){
5856	for(size_t kk=0; kk<current_blockcount_z; kk++){
5857
5858	curData = *cur_data_pos;
5859	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5860	diff = curData - pred;
5861	itvNum = fabs(diff)/tmp_realPrecision + 1;
5862	if (itvNum < intvCapacity){
5863	if (diff < 0) itvNum = -itvNum;
5864	type[index] = (int) (itvNum/2) + intvRadius;
5865	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5866	//ganrantee comporession error against the case of machine-epsilon
5867	if(fabs(curData - pred)>tmp_realPrecision){
5868	type[index] = 0;
5869	pred = curData;
5870	unpredictable_data[block_unpredictable_count ++] = curData;
5871	}
5872	}
5873	else{
5874	type[index] = 0;
5875	pred = curData;
5876	unpredictable_data[block_unpredictable_count ++] = curData;
5877	}
5878
5879	#ifdef HAVE_TIMECMPR
5880	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5881	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5882	decData[block_offset + point_offset] = pred;
5883	#endif
5884
5885
5886	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5887	// assign value to block surfaces
5888	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5889	}
5890	index ++;
5891	cur_data_pos ++;
5892	}
5893	cur_data_pos += dim1_offset - current_blockcount_z;
5894	}
5895	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5896	}
5897	/dealing with the last ii (boundary)/
5898	{
5899	// ii == current_blockcount_x - 1
5900	size_t ii = current_blockcount_x - 1;
5901	for(size_t jj=0; jj<current_blockcount_y; jj++){
5902	for(size_t kk=0; kk<current_blockcount_z; kk++){
5903	curData = *cur_data_pos;
5904	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
5905	diff = curData - pred;
5906	itvNum = fabs(diff)/tmp_realPrecision + 1;
5907	if (itvNum < intvCapacity){
5908	if (diff < 0) itvNum = -itvNum;
5909	type[index] = (int) (itvNum/2) + intvRadius;
5910	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
5911	//ganrantee comporession error against the case of machine-epsilon
5912	if(fabs(curData - pred)>tmp_realPrecision){
5913	type[index] = 0;
5914	pred = curData;
5915	unpredictable_data[block_unpredictable_count ++] = curData;
5916	}
5917	}
5918	else{
5919	type[index] = 0;
5920	pred = curData;
5921	unpredictable_data[block_unpredictable_count ++] = curData;
5922	}
5923
5924	#ifdef HAVE_TIMECMPR
5925	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5926	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5927	decData[block_offset + point_offset] = pred;
5928	#endif
5929
5930	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
5931	// assign value to block surfaces
5932	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
5933	}
5934	// assign value to next prediction buffer
5935	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
5936	index ++;
5937	cur_data_pos ++;
5938	}
5939	cur_data_pos += dim1_offset - current_blockcount_z;
5940	}
5941	}
5942	unpredictable_count = block_unpredictable_count;
5943	strip_unpredictable_count += unpredictable_count;
5944	unpredictable_data += unpredictable_count;
5945	reg_count ++;
5946	}
5947	else{
5948	// use SZ
5949	// SZ predication
5950	unpredictable_count = 0;
5951	float * cur_pb_pos = pb_pos;
5952	float * cur_data_pos = data_pos;
5953	float curData;
5954	float pred3D;
5955	double itvNum, diff;
5956	size_t index = 0;
5957	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
5958	for(size_t jj=0; jj<current_blockcount_y; jj++){
5959	for(size_t kk=0; kk<current_blockcount_z; kk++){
5960
5961	curData = *cur_data_pos;
5962	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
5963	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
5964	diff = curData - pred3D;
5965	itvNum = fabs(diff)/realPrecision + 1;
5966	if (itvNum < intvCapacity_sz){
5967	if (diff < 0) itvNum = -itvNum;
5968	type[index] = (int) (itvNum/2) + intvRadius;
5969	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
5970	//ganrantee comporession error against the case of machine-epsilon
5971	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
5972	type[index] = 0;
5973	*cur_pb_pos = curData;
5974	unpredictable_data[unpredictable_count ++] = curData;
5975	}
5976	}
5977	else{
5978	type[index] = 0;
5979	*cur_pb_pos = curData;
5980	unpredictable_data[unpredictable_count ++] = curData;
5981	}
5982
5983	#ifdef HAVE_TIMECMPR
5984	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
5985	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
5986	decData[block_offset + point_offset] = *cur_pb_pos;
5987	#endif
5988	index ++;
5989	cur_pb_pos ++;
5990	cur_data_pos ++;
5991	}
5992	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
5993	cur_data_pos += dim1_offset - current_blockcount_z;
5994	}
5995	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
5996	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
5997	}
5998	/dealing with the last ii (boundary)/
5999	{
6000	// ii == current_blockcount_x - 1
6001	for(size_t jj=0; jj<current_blockcount_y; jj++){
6002	for(size_t kk=0; kk<current_blockcount_z; kk++){
6003
6004	curData = *cur_data_pos;
6005	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
6006	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6007	diff = curData - pred3D;
6008	itvNum = fabs(diff)/realPrecision + 1;
6009	if (itvNum < intvCapacity_sz){
6010	if (diff < 0) itvNum = -itvNum;
6011	type[index] = (int) (itvNum/2) + intvRadius;
6012	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
6013	//ganrantee comporession error against the case of machine-epsilon
6014	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
6015	type[index] = 0;
6016	*cur_pb_pos = curData;
6017	unpredictable_data[unpredictable_count ++] = curData;
6018	}
6019	}
6020	else{
6021	type[index] = 0;
6022	*cur_pb_pos = curData;
6023	unpredictable_data[unpredictable_count ++] = curData;
6024	}
6025
6026	#ifdef HAVE_TIMECMPR
6027	size_t ii = current_blockcount_x - 1;
6028	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
6029	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
6030	decData[block_offset + point_offset] = *cur_pb_pos;
6031	#endif
6032
6033	// assign value to next prediction buffer
6034	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
6035	index ++;
6036	cur_pb_pos ++;
6037	cur_data_pos ++;
6038	}
6039	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
6040	cur_data_pos += dim1_offset - current_blockcount_z;
6041	}
6042	}
6043	strip_unpredictable_count += unpredictable_count;
6044	unpredictable_data += unpredictable_count;
6045	// change indicator
6046	indicator_pos[k] = 1;
6047	}// end SZ
6048
6049	reg_params_pos ++;
6050	data_pos += current_blockcount_z;
6051	pb_pos += current_blockcount_z;
6052	next_pb_pos += current_blockcount_z;
6053	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
6054
6055	}
6056
6057	if(strip_unpredictable_count > max_unpred_count){
6058	max_unpred_count = strip_unpredictable_count;
6059	}
6060	total_unpred += strip_unpredictable_count;
6061	indicator_pos += num_z;
6062	}
6063	float * tmp;
6064	tmp = cur_pb_buf;
6065	cur_pb_buf = next_pb_buf;
6066	next_pb_buf = tmp;
6067	}
6068	}
6069
6070	free(prediction_buffer_1);
6071	free(prediction_buffer_2);
6072
6073	int stateNum = 2*quantization_intervals;
6074	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6075
6076	size_t nodeCount = 0;
6077	init(huffmanTree, result_type, num_elements);
6078	size_t i = 0;
6079	for (i = 0; i < huffmanTree->stateNum; i++)
6080	if (huffmanTree->code[i]) nodeCount++;
6081	nodeCount = nodeCount*2-1;
6082
6083	unsigned char *treeBytes;
6084	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6085
6086	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
6087	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
6088	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
6089	unsigned char * result_pos = result;
6090	initRandomAccessBytes(result_pos);
6091
6092	result_pos += meta_data_offset;
6093
6094	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
6095	result_pos += exe_params->SZ_SIZE_TYPE;
6096
6097	intToBytes_bigEndian(result_pos, block_size);
6098	result_pos += sizeof(int);
6099	doubleToBytes(result_pos, realPrecision);
6100	result_pos += sizeof(double);
6101	intToBytes_bigEndian(result_pos, quantization_intervals);
6102	result_pos += sizeof(int);
6103	intToBytes_bigEndian(result_pos, treeByteSize);
6104	result_pos += sizeof(int);
6105	intToBytes_bigEndian(result_pos, nodeCount);
6106	result_pos += sizeof(int);
6107	memcpy(result_pos, treeBytes, treeByteSize);
6108	result_pos += treeByteSize;
6109	free(treeBytes);
6110
6111	memcpy(result_pos, &use_mean, sizeof(unsigned char));
6112	result_pos += sizeof(unsigned char);
6113	memcpy(result_pos, &mean, sizeof(float));
6114	result_pos += sizeof(float);
6115	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
6116	result_pos += indicator_size;
6117
6118	//convert the lead/mid/resi to byte stream
6119	if(reg_count > 0){
6120	for(int e=0; e<4; e++){
6121	int stateNum = 2*coeff_intvCapacity_sz;
6122	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6123	size_t nodeCount = 0;
6124	init(huffmanTree, coeff_type[e], reg_count);
6125	size_t i = 0;
6126	for (i = 0; i < huffmanTree->stateNum; i++)
6127	if (huffmanTree->code[i]) nodeCount++;
6128	nodeCount = nodeCount*2-1;
6129	unsigned char *treeBytes;
6130	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6131	doubleToBytes(result_pos, precision[e]);
6132	result_pos += sizeof(double);
6133	intToBytes_bigEndian(result_pos, coeff_intvRadius);
6134	result_pos += sizeof(int);
6135	intToBytes_bigEndian(result_pos, treeByteSize);
6136	result_pos += sizeof(int);
6137	intToBytes_bigEndian(result_pos, nodeCount);
6138	result_pos += sizeof(int);
6139	memcpy(result_pos, treeBytes, treeByteSize);
6140	result_pos += treeByteSize;
6141	free(treeBytes);
6142	size_t typeArray_size = 0;
6143	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
6144	sizeToBytes(result_pos, typeArray_size);
6145	result_pos += sizeof(size_t) + typeArray_size;
6146	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
6147	result_pos += sizeof(int);
6148	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
6149	result_pos += coeff_unpredictable_count[e]*sizeof(float);
6150	SZ_ReleaseHuffman(huffmanTree);
6151	}
6152	}
6153	free(coeff_result_type);
6154	free(coeff_unpredictable_data);
6155
6156	//record the number of unpredictable data and also store them
6157	memcpy(result_pos, &total_unpred, sizeof(size_t));
6158	result_pos += sizeof(size_t);
6159	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
6160	result_pos += total_unpred * sizeof(float);
6161	size_t typeArray_size = 0;
6162	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
6163	result_pos += typeArray_size;
6164	size_t totalEncodeSize = result_pos - result;
6165	free(indicator);
6166	free(result_unpredictable_data);
6167	free(result_type);
6168	free(reg_params);
6169
6170
6171	SZ_ReleaseHuffman(huffmanTree);
6172	*comp_size = totalEncodeSize;
6173	return result;
6174	}
6175
6176	unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
6177
6178	unsigned int quantization_intervals;
6179	float sz_sample_correct_freq = -1;//0.5; //-1
6180	float dense_pos;
6181	float mean_flush_freq;
6182	unsigned char use_mean = 0;
6183
6184	// calculate block dims
6185	size_t num_x, num_y, num_z;
6186	size_t block_size = 6;
6187	num_x = (r1 - 1) / block_size + 1;
6188	num_y = (r2 - 1) / block_size + 1;
6189	num_z = (r3 - 1) / block_size + 1;
6190
6191	size_t max_num_block_elements = block_size * block_size * block_size;
6192	size_t num_blocks = num_x * num_y * num_z;
6193	size_t num_elements = r1 * r2 * r3;
6194
6195	size_t dim0_offset = r2 * r3;
6196	size_t dim1_offset = r3;
6197
6198	int * result_type = (int ) malloc(num_blocksmax_num_block_elements * sizeof(int));
6199	size_t unpred_data_max_size = max_num_block_elements;
6200	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
6201	size_t total_unpred = 0;
6202	size_t unpredictable_count;
6203	float * data_pos = oriData;
6204	int * type = result_type;
6205	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
6206	float * reg_params_pos = reg_params;
6207	// move regression part out
6208	size_t params_offset_b = num_blocks;
6209	size_t params_offset_c = 2*num_blocks;
6210	size_t params_offset_d = 3*num_blocks;
6211	float * pred_buffer = (float ) malloc((block_size+1)(block_size+1)(block_size+1)sizeof(float));
6212	float * pred_buffer_pos = NULL;
6213	float * block_data_pos_x = NULL;
6214	float * block_data_pos_y = NULL;
6215	float * block_data_pos_z = NULL;
6216	for(size_t i=0; i<num_x; i++){
6217	for(size_t j=0; j<num_y; j++){
6218	for(size_t k=0; k<num_z; k++){
6219	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
6220	pred_buffer_pos = pred_buffer;
6221	block_data_pos_x = data_pos;
6222	// use the buffer as block_sizeblock_sizeblock_size
6223	for(int ii=0; ii<block_size; ii++){
6224	block_data_pos_y = block_data_pos_x;
6225	for(int jj=0; jj<block_size; jj++){
6226	block_data_pos_z = block_data_pos_y;
6227	for(int kk=0; kk<block_size; kk++){
6228	pred_buffer_pos = block_data_pos_z;
6229	if(k*block_size + kk + 1 < r3) block_data_pos_z ++;
6230	pred_buffer_pos ++;
6231	}
6232	if(j*block_size + jj + 1 < r2) block_data_pos_y += dim1_offset;
6233	}
6234	if(i*block_size + ii + 1 < r1) block_data_pos_x += dim0_offset;
6235	}
6236	/Calculate regression coefficients/
6237	{
6238	float * cur_data_pos = pred_buffer;
6239	float fx = 0.0;
6240	float fy = 0.0;
6241	float fz = 0.0;
6242	float f = 0;
6243	float sum_x, sum_y;
6244	float curData;
6245	for(size_t i=0; i<block_size; i++){
6246	sum_x = 0;
6247	for(size_t j=0; j<block_size; j++){
6248	sum_y = 0;
6249	for(size_t k=0; k<block_size; k++){
6250	curData = *cur_data_pos;
6251	sum_y += curData;
6252	fz += curData * k;
6253	cur_data_pos ++;
6254	}
6255	fy += sum_y * j;
6256	sum_x += sum_y;
6257	}
6258	fx += sum_x * i;
6259	f += sum_x;
6260	}
6261	float coeff = 1.0 / (block_size * block_size * block_size);
6262	reg_params_pos[0] = (2 * fx / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
6263	reg_params_pos[params_offset_b] = (2 * fy / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
6264	reg_params_pos[params_offset_c] = (2 * fz / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
6265	reg_params_pos[params_offset_d] = f * coeff - ((block_size - 1) * reg_params_pos[0] / 2 + (block_size - 1) * reg_params_pos[params_offset_b] / 2 + (block_size - 1) * reg_params_pos[params_offset_c] / 2);
6266	}
6267	reg_params_pos ++;
6268	}
6269	}
6270	}
6271
6272	//Compress coefficient arrays
6273	double precision_a, precision_b, precision_c, precision_d;
6274	float rel_param_err = 0.025;
6275	precision_a = rel_param_err * realPrecision / block_size;
6276	precision_b = rel_param_err * realPrecision / block_size;
6277	precision_c = rel_param_err * realPrecision / block_size;
6278	precision_d = rel_param_err * realPrecision;
6279
6280	if(exe_params->optQuantMode==1)
6281	{
6282	quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
6283	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
6284	updateQuantizationInfo(quantization_intervals);
6285	}
6286	else{
6287	quantization_intervals = exe_params->intvCapacity;
6288	}
6289
6290	float mean = 0;
6291	if(use_mean){
6292	// compute mean
6293	double sum = 0.0;
6294	size_t mean_count = 0;
6295	for(size_t i=0; i<num_elements; i++){
6296	if(fabs(oriData[i] - dense_pos) < realPrecision){
6297	sum += oriData[i];
6298	mean_count ++;
6299	}
6300	}
6301	if(mean_count > 0) mean = sum / mean_count;
6302	}
6303
6304	double tmp_realPrecision = realPrecision;
6305
6306	// use two prediction buffers for higher performance
6307	float * unpredictable_data = result_unpredictable_data;
6308	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
6309	memset(indicator, 0, num_blocks * sizeof(unsigned char));
6310	size_t reg_count = 0;
6311	unsigned char * indicator_pos = indicator;
6312
6313	int intvCapacity = exe_params->intvCapacity;
6314	int intvRadius = exe_params->intvRadius;
6315	int use_reg = 0;
6316	float noise = realPrecision * 1.22;
6317
6318	reg_params_pos = reg_params;
6319	// compress the regression coefficients on the fly
6320	float last_coeffcients[4] = {0.0};
6321	int coeff_intvCapacity_sz = 65536;
6322	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
6323	int * coeff_type[4];
6324	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
6325	float * coeff_unpred_data[4];
6326	float * coeff_unpredictable_data = (float ) malloc(num_blocks4*sizeof(float));
6327	double precision[4];
6328	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
6329	for(int i=0; i<4; i++){
6330	coeff_type[i] = coeff_result_type + i * num_blocks;
6331	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
6332	}
6333	int coeff_index = 0;
6334	unsigned int coeff_unpredictable_count[4] = {0};
6335
6336	memset(pred_buffer, 0, (block_size+1)(block_size+1)(block_size+1)*sizeof(float));
6337	int pred_buffer_block_size = block_size + 1;
6338	int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size;
6339	int strip_dim1_offset = pred_buffer_block_size;
6340
6341	if(use_mean){
6342	int intvCapacity_sz = intvCapacity - 2;
6343	type = result_type;
6344	for(size_t i=0; i<num_x; i++){
6345	for(size_t j=0; j<num_y; j++){
6346	for(size_t k=0; k<num_z; k++){
6347	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
6348	// add 1 in x, y, z offset
6349	pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6350	block_data_pos_x = data_pos;
6351	for(int ii=0; ii<block_size; ii++){
6352	block_data_pos_y = block_data_pos_x;
6353	for(int jj=0; jj<block_size; jj++){
6354	block_data_pos_z = block_data_pos_y;
6355	for(int kk=0; kk<block_size; kk++){
6356	pred_buffer_pos = block_data_pos_z;
6357	if(k*block_size + kk + 1< r3) block_data_pos_z ++;
6358	pred_buffer_pos ++;
6359	}
6360	// add 1 in z offset
6361	pred_buffer_pos ++;
6362	if(j*block_size + jj + 1< r2) block_data_pos_y += dim1_offset;
6363	}
6364	// add 1 in y offset
6365	pred_buffer_pos += pred_buffer_block_size;
6366	if(i*block_size + ii + 1< r1) block_data_pos_x += dim0_offset;
6367	}
6368	/sampling and decide which predictor/
6369	{
6370	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
6371	float * cur_data_pos;
6372	float curData;
6373	float pred_reg, pred_sz;
6374	float err_sz = 0.0, err_reg = 0.0;
6375	int bmi = 0;
6376	for(int i=2; i<=block_size; i++){
6377	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + i;
6378	curData = *cur_data_pos;
6379	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6380	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6381	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6382	err_reg += fabs(pred_reg - curData);
6383
6384	bmi = block_size - i;
6385	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + bmi;
6386	curData = *cur_data_pos;
6387	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6388	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6389	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6390	err_reg += fabs(pred_reg - curData);
6391
6392	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + i;
6393	curData = *cur_data_pos;
6394	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6395	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6396	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6397	err_reg += fabs(pred_reg - curData);
6398
6399	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
6400	curData = *cur_data_pos;
6401	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6402	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6403	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
6404	err_reg += fabs(pred_reg - curData);
6405	}
6406
6407	use_reg = (err_reg < err_sz);
6408	}
6409	if(use_reg){
6410	{
6411	/predict coefficients in current block via previous reg_block/
6412	float cur_coeff;
6413	double diff, itvNum;
6414	for(int e=0; e<4; e++){
6415	cur_coeff = reg_params_pos[e*num_blocks];
6416	diff = cur_coeff - last_coeffcients[e];
6417	itvNum = fabs(diff)/precision[e] + 1;
6418	if (itvNum < coeff_intvCapacity_sz){
6419	if (diff < 0) itvNum = -itvNum;
6420	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
6421	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
6422	//ganrantee comporession error against the case of machine-epsilon
6423	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
6424	coeff_type[e][coeff_index] = 0;
6425	last_coeffcients[e] = cur_coeff;
6426	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6427	}
6428	}
6429	else{
6430	coeff_type[e][coeff_index] = 0;
6431	last_coeffcients[e] = cur_coeff;
6432	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6433	}
6434	}
6435	coeff_index ++;
6436	}
6437	float curData;
6438	float pred;
6439	double itvNum;
6440	double diff;
6441	size_t index = 0;
6442	size_t block_unpredictable_count = 0;
6443	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6444	for(size_t ii=0; ii<block_size; ii++){
6445	for(size_t jj=0; jj<block_size; jj++){
6446	for(size_t kk=0; kk<block_size; kk++){
6447	curData = *cur_data_pos;
6448	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
6449	diff = curData - pred;
6450	itvNum = fabs(diff)/tmp_realPrecision + 1;
6451	if (itvNum < intvCapacity){
6452	if (diff < 0) itvNum = -itvNum;
6453	type[index] = (int) (itvNum/2) + intvRadius;
6454	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
6455	//ganrantee comporession error against the case of machine-epsilon
6456	if(fabs(curData - pred)>tmp_realPrecision){
6457	type[index] = 0;
6458	pred = curData;
6459	unpredictable_data[block_unpredictable_count ++] = curData;
6460	}
6461	}
6462	else{
6463	type[index] = 0;
6464	pred = curData;
6465	unpredictable_data[block_unpredictable_count ++] = curData;
6466	}
6467	index ++;
6468	cur_data_pos ++;
6469	}
6470	cur_data_pos ++;
6471	}
6472	cur_data_pos += pred_buffer_block_size;
6473	}
6474
6475	total_unpred += block_unpredictable_count;
6476	unpredictable_data += block_unpredictable_count;
6477	reg_count ++;
6478	}
6479	else{
6480	// use SZ
6481	// SZ predication
6482	unpredictable_count = 0;
6483	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6484	float curData;
6485	float pred3D;
6486	double itvNum, diff;
6487	size_t index = 0;
6488	for(size_t ii=0; ii<block_size; ii++){
6489	for(size_t jj=0; jj<block_size; jj++){
6490	for(size_t kk=0; kk<block_size; kk++){
6491
6492	curData = *cur_data_pos;
6493	if(fabs(curData - mean) <= realPrecision){
6494	type[index] = 1;
6495	*cur_data_pos = mean;
6496	}
6497	else
6498	{
6499	pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
6500	- cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6501	diff = curData - pred3D;
6502	itvNum = fabs(diff)/realPrecision + 1;
6503	if (itvNum < intvCapacity_sz){
6504	if (diff < 0) itvNum = -itvNum;
6505	type[index] = (int) (itvNum/2) + intvRadius;
6506	cur_data_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
6507	//ganrantee comporession error against the case of machine-epsilon
6508	if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
6509	type[index] = 0;
6510	*cur_data_pos = curData;
6511	unpredictable_data[unpredictable_count ++] = curData;
6512	}
6513	}
6514	else{
6515	type[index] = 0;
6516	*cur_data_pos = curData;
6517	unpredictable_data[unpredictable_count ++] = curData;
6518	}
6519	}
6520	index ++;
6521	cur_data_pos ++;
6522	}
6523	cur_data_pos ++;
6524	}
6525	cur_data_pos += pred_buffer_block_size;
6526	}
6527	total_unpred += unpredictable_count;
6528	unpredictable_data += unpredictable_count;
6529	// change indicator
6530	indicator_pos[k] = 1;
6531	}// end SZ
6532	reg_params_pos ++;
6533	type += block_size * block_size * block_size;
6534	} // end k
6535	indicator_pos += num_z;
6536	}// end j
6537	}// end i
6538	}
6539	else{
6540	int intvCapacity_sz = intvCapacity - 2;
6541	type = result_type;
6542	for(size_t i=0; i<num_x; i++){
6543	for(size_t j=0; j<num_y; j++){
6544	for(size_t k=0; k<num_z; k++){
6545	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
6546	// add 1 in x, y, z offset
6547	pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6548	block_data_pos_x = data_pos;
6549	for(int ii=0; ii<block_size; ii++){
6550	block_data_pos_y = block_data_pos_x;
6551	for(int jj=0; jj<block_size; jj++){
6552	block_data_pos_z = block_data_pos_y;
6553	for(int kk=0; kk<block_size; kk++){
6554	pred_buffer_pos = block_data_pos_z;
6555	if(k*block_size + kk < r3) block_data_pos_z ++;
6556	pred_buffer_pos ++;
6557	}
6558	// add 1 in z offset
6559	pred_buffer_pos ++;
6560	if(j*block_size + jj < r2) block_data_pos_y += dim1_offset;
6561	}
6562	// add 1 in y offset
6563	pred_buffer_pos += pred_buffer_block_size;
6564	if(i*block_size + ii < r1) block_data_pos_x += dim0_offset;
6565	}
6566	/sampling/
6567	{
6568	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
6569	float * cur_data_pos;
6570	float curData;
6571	float pred_reg, pred_sz;
6572	float err_sz = 0.0, err_reg = 0.0;
6573	int bmi;
6574	for(int i=2; i<=block_size; i++){
6575	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + i;
6576	curData = *cur_data_pos;
6577	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6578	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6579	err_sz += fabs(pred_sz - curData) + noise;
6580	err_reg += fabs(pred_reg - curData);
6581
6582	bmi = block_size - i;
6583	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + bmi;
6584	curData = *cur_data_pos;
6585	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6586	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6587	err_sz += fabs(pred_sz - curData) + noise;
6588	err_reg += fabs(pred_reg - curData);
6589
6590	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + i;
6591	curData = *cur_data_pos;
6592	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6593	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
6594	err_sz += fabs(pred_sz - curData) + noise;
6595	err_reg += fabs(pred_reg - curData);
6596
6597	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
6598	curData = *cur_data_pos;
6599	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6600	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
6601	err_sz += fabs(pred_sz - curData) + noise;
6602	err_reg += fabs(pred_reg - curData);
6603	}
6604
6605	use_reg = (err_reg < err_sz);
6606
6607	}
6608	if(use_reg)
6609	{
6610	{
6611	/predict coefficients in current block via previous reg_block/
6612	float cur_coeff;
6613	double diff, itvNum;
6614	for(int e=0; e<4; e++){
6615	cur_coeff = reg_params_pos[e*num_blocks];
6616	diff = cur_coeff - last_coeffcients[e];
6617	itvNum = fabs(diff)/precision[e] + 1;
6618	if (itvNum < coeff_intvCapacity_sz){
6619	if (diff < 0) itvNum = -itvNum;
6620	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
6621	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
6622	//ganrantee comporession error against the case of machine-epsilon
6623	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
6624	coeff_type[e][coeff_index] = 0;
6625	last_coeffcients[e] = cur_coeff;
6626	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6627	}
6628	}
6629	else{
6630	coeff_type[e][coeff_index] = 0;
6631	last_coeffcients[e] = cur_coeff;
6632	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
6633	}
6634	}
6635	coeff_index ++;
6636	}
6637	float curData;
6638	float pred;
6639	double itvNum;
6640	double diff;
6641	size_t index = 0;
6642	size_t block_unpredictable_count = 0;
6643	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6644	for(size_t ii=0; ii<block_size; ii++){
6645	for(size_t jj=0; jj<block_size; jj++){
6646	for(size_t kk=0; kk<block_size; kk++){
6647	curData = *cur_data_pos;
6648	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
6649	diff = curData - pred;
6650	itvNum = fabs(diff)/tmp_realPrecision + 1;
6651	if (itvNum < intvCapacity){
6652	if (diff < 0) itvNum = -itvNum;
6653	type[index] = (int) (itvNum/2) + intvRadius;
6654	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
6655	//ganrantee comporession error against the case of machine-epsilon
6656	if(fabs(curData - pred)>tmp_realPrecision){
6657	type[index] = 0;
6658	pred = curData;
6659	unpredictable_data[block_unpredictable_count ++] = curData;
6660	}
6661	}
6662	else{
6663	type[index] = 0;
6664	pred = curData;
6665	unpredictable_data[block_unpredictable_count ++] = curData;
6666	}
6667	index ++;
6668	cur_data_pos ++;
6669	}
6670	cur_data_pos ++;
6671	}
6672	cur_data_pos += pred_buffer_block_size;
6673	}
6674	total_unpred += block_unpredictable_count;
6675	unpredictable_data += block_unpredictable_count;
6676	reg_count ++;
6677	}
6678	else{
6679	// use SZ
6680	// SZ predication
6681	unpredictable_count = 0;
6682	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
6683	float curData;
6684	float pred3D;
6685	double itvNum, diff;
6686	size_t index = 0;
6687	for(size_t ii=0; ii<block_size; ii++){
6688	for(size_t jj=0; jj<block_size; jj++){
6689	for(size_t kk=0; kk<block_size; kk++){
6690	curData = *cur_data_pos;
6691	pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
6692	- cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
6693	diff = curData - pred3D;
6694	itvNum = fabs(diff)/realPrecision + 1;
6695	if (itvNum < intvCapacity_sz){
6696	if (diff < 0) itvNum = -itvNum;
6697	type[index] = (int) (itvNum/2) + intvRadius;
6698	cur_data_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
6699	//ganrantee comporession error against the case of machine-epsilon
6700	if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
6701	type[index] = 0;
6702	*cur_data_pos = curData;
6703	unpredictable_data[unpredictable_count ++] = curData;
6704	}
6705	}
6706	else{
6707	type[index] = 0;
6708	*cur_data_pos = curData;
6709	unpredictable_data[unpredictable_count ++] = curData;
6710	}
6711	index ++;
6712	cur_data_pos ++;
6713	}
6714	cur_data_pos ++;
6715	}
6716	cur_data_pos += pred_buffer_block_size;
6717	}
6718	total_unpred += unpredictable_count;
6719	unpredictable_data += unpredictable_count;
6720	// change indicator
6721	indicator_pos[k] = 1;
6722	}// end SZ
6723	reg_params_pos ++;
6724	type += block_size * block_size * block_size;
6725	}
6726	indicator_pos += num_z;
6727	}
6728	}
6729	}
6730	free(pred_buffer);
6731	int stateNum = 2*quantization_intervals;
6732	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6733
6734	size_t nodeCount = 0;
6735	init(huffmanTree, result_type, num_blocks*max_num_block_elements);
6736	size_t i = 0;
6737	for (i = 0; i < huffmanTree->stateNum; i++)
6738	if (huffmanTree->code[i]) nodeCount++;
6739	nodeCount = nodeCount*2-1;
6740
6741	unsigned char *treeBytes;
6742	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6743
6744	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
6745	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
6746	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
6747	unsigned char * result_pos = result;
6748	initRandomAccessBytes(result_pos);
6749
6750	result_pos += meta_data_offset;
6751
6752	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
6753	result_pos += exe_params->SZ_SIZE_TYPE;
6754
6755	intToBytes_bigEndian(result_pos, block_size);
6756	result_pos += sizeof(int);
6757	doubleToBytes(result_pos, realPrecision);
6758	result_pos += sizeof(double);
6759	intToBytes_bigEndian(result_pos, quantization_intervals);
6760	result_pos += sizeof(int);
6761	intToBytes_bigEndian(result_pos, treeByteSize);
6762	result_pos += sizeof(int);
6763	intToBytes_bigEndian(result_pos, nodeCount);
6764	result_pos += sizeof(int);
6765	memcpy(result_pos, treeBytes, treeByteSize);
6766	result_pos += treeByteSize;
6767	free(treeBytes);
6768
6769	memcpy(result_pos, &use_mean, sizeof(unsigned char));
6770	result_pos += sizeof(unsigned char);
6771	memcpy(result_pos, &mean, sizeof(float));
6772	result_pos += sizeof(float);
6773	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
6774	result_pos += indicator_size;
6775
6776	//convert the lead/mid/resi to byte stream
6777	if(reg_count > 0){
6778	for(int e=0; e<4; e++){
6779	int stateNum = 2*coeff_intvCapacity_sz;
6780	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
6781	size_t nodeCount = 0;
6782	init(huffmanTree, coeff_type[e], reg_count);
6783	size_t i = 0;
6784	for (i = 0; i < huffmanTree->stateNum; i++)
6785	if (huffmanTree->code[i]) nodeCount++;
6786	nodeCount = nodeCount*2-1;
6787	unsigned char *treeBytes;
6788	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
6789	doubleToBytes(result_pos, precision[e]);
6790	result_pos += sizeof(double);
6791	intToBytes_bigEndian(result_pos, coeff_intvRadius);
6792	result_pos += sizeof(int);
6793	intToBytes_bigEndian(result_pos, treeByteSize);
6794	result_pos += sizeof(int);
6795	intToBytes_bigEndian(result_pos, nodeCount);
6796	result_pos += sizeof(int);
6797	memcpy(result_pos, treeBytes, treeByteSize);
6798	result_pos += treeByteSize;
6799	free(treeBytes);
6800	size_t typeArray_size = 0;
6801	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
6802	sizeToBytes(result_pos, typeArray_size);
6803	result_pos += sizeof(size_t) + typeArray_size;
6804	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
6805	result_pos += sizeof(int);
6806	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
6807	result_pos += coeff_unpredictable_count[e]*sizeof(float);
6808	SZ_ReleaseHuffman(huffmanTree);
6809	}
6810	}
6811	free(coeff_result_type);
6812	free(coeff_unpredictable_data);
6813
6814	//record the number of unpredictable data and also store them
6815	memcpy(result_pos, &total_unpred, sizeof(size_t));
6816	result_pos += sizeof(size_t);
6817	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
6818	result_pos += total_unpred * sizeof(float);
6819	size_t typeArray_size = 0;
6820	encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size);
6821	result_pos += typeArray_size;
6822	size_t totalEncodeSize = result_pos - result;
6823	free(indicator);
6824	free(result_unpredictable_data);
6825	free(result_type);
6826	free(reg_params);
6827
6828
6829	SZ_ReleaseHuffman(huffmanTree);
6830	*comp_size = totalEncodeSize;
6831	return result;
6832	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: