Context Navigation

source: thirdparty/SZ/sz/src/sz_float.c @ e6aa0eb

Revision e6aa0eb, 240.4 KB checked in by Hal Finkel <hfinkel@…>, 6 years ago (diff)
add stddef.h for ptrdiff_t
Property mode set to `100644`

Rev	Line
[2c47b73]	1	/**
	2	* @file sz_float.c
[9ee2ce3]	3	* @author Sheng Di, Dingwen Tao, Xin Liang
[2c47b73]	4	* @date Aug, 2016
	5	* @brief SZ_Init, Compression and Decompression functions
	6	* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
	7	* See COPYRIGHT in top-level directory.
	8	*/
	9
	10
	11	#include <stdio.h>
	12	#include <stdlib.h>
[e6aa0eb]	13	#include <stddef.h>
[2c47b73]	14	#include <string.h>
	15	#include <unistd.h>
	16	#include <math.h>
	17	#include "sz.h"
	18	#include "CompressElement.h"
	19	#include "DynamicByteArray.h"
	20	#include "DynamicIntArray.h"
	21	#include "TightDataPointStorageF.h"
	22	#include "sz_float.h"
	23	#include "sz_float_pwr.h"
	24	#include "szd_float.h"
	25	#include "szd_float_pwr.h"
	26	#include "zlib.h"
	27	#include "rw.h"
	28	#include "sz_float_ts.h"
[9ee2ce3]	29	#include "utility.h"
[2c47b73]	30
	31	unsigned char* SZ_skip_compress_float(float* data, size_t dataLength, size_t* outSize)
	32	{
	33	outSize = dataLengthsizeof(float);
	34	unsigned char* out = (unsigned char)malloc(dataLengthsizeof(float));
	35	memcpy(out, data, dataLength*sizeof(float));
	36	return out;
	37	}
	38	unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision)
	39	{
	40	size_t i = 0, radiusIndex;
	41	float pred_value = 0, pred_err;
	42	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	43	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	44	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
	45	for(i=2;i<dataLength;i++)
	46	{
	47	if(i%confparams_cpr->sampleDistance==0)
	48	{
	49	//pred_value = 2*oriData[i-1] - oriData[i-2];
	50	pred_value = oriData[i-1];
	51	pred_err = fabs(pred_value - oriData[i]);
	52	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	53	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	54	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	55	intervals[radiusIndex]++;
	56	}
	57	}
	58	//compute the appropriate number
	59	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	60	size_t sum = 0;
	61	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	62	{
	63	sum += intervals[i];
	64	if(sum>targetCount)
	65	break;
	66	}
	67	if(i>=confparams_cpr->maxRangeRadius)
	68	i = confparams_cpr->maxRangeRadius-1;
	69
	70	unsigned int accIntervals = 2*(i+1);
	71	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	72
	73	if(powerOf2<32)
	74	powerOf2 = 32;
	75
	76	free(intervals);
	77	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
	78	return powerOf2;
	79	}
	80
	81	unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, double realPrecision)
	82	{
	83	size_t i,j, index;
	84	size_t radiusIndex;
	85	float pred_value = 0, pred_err;
	86	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	87	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	88	size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance;
	89
	90	//float max = oriData[0];
	91	//float min = oriData[0];
	92
	93	for(i=1;i<r1;i++)
	94	{
	95	for(j=1;j<r2;j++)
	96	{
	97	if((i+j)%confparams_cpr->sampleDistance==0)
	98	{
	99	index = i*r2+j;
	100	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
	101	pred_err = fabs(pred_value - oriData[index]);
	102	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	103	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	104	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	105	intervals[radiusIndex]++;
	106
	107	// if (max < oriData[index]) max = oriData[index];
	108	// if (min > oriData[index]) min = oriData[index];
	109	}
	110	}
	111	}
	112	//compute the appropriate number
	113	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	114	size_t sum = 0;
	115	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	116	{
	117	sum += intervals[i];
	118	if(sum>targetCount)
	119	break;
	120	}
	121	if(i>=confparams_cpr->maxRangeRadius)
	122	i = confparams_cpr->maxRangeRadius-1;
	123	unsigned int accIntervals = 2*(i+1);
	124	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	125
	126	if(powerOf2<32)
	127	powerOf2 = 32;
	128
	129	// struct timeval costStart, costEnd;
	130	// double cost_est = 0;
	131	//
	132	// gettimeofday(&costStart, NULL);
	133	//
	134	// //compute estimate of bit-rate and distortion
	135	// double est_br = 0;
	136	// double est_psnr = 0;
	137	// double c1 = log2(targetCount)+1;
	138	// double c2 = -20.0log10(realPrecision) + 20.0log10(max-min) + 10.0*log10(3);
	139	//
	140	// for (i = 0; i < powerOf2/2; i++)
	141	// {
	142	// int count = intervals[i];
	143	// if (count != 0)
	144	// est_br += count*log2(count);
	145	// est_psnr += count;
	146	// }
	147	//
	148	// //compute estimate of bit-rate
	149	// est_br -= c1*est_psnr;
	150	// est_br /= totalSampleSize;
	151	// est_br = -est_br;
	152	//
	153	// //compute estimate of psnr
	154	// est_psnr /= totalSampleSize;
	155	// printf ("sum of P(i) = %lf\n", est_psnr);
	156	// est_psnr = -10.0*log10(est_psnr);
	157	// est_psnr += c2;
	158	//
	159	// printf ("estimate bitrate = %.2f\n", est_br);
	160	// printf ("estimate psnr = %.2f\n",est_psnr);
	161	//
	162	// gettimeofday(&costEnd, NULL);
	163	// cost_est = ((costEnd.tv_sec1000000+costEnd.tv_usec)-(costStart.tv_sec1000000+costStart.tv_usec))/1000000.0;
	164	//
	165	// printf ("analysis time = %f\n", cost_est);
	166
	167	free(intervals);
	168	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
	169	return powerOf2;
	170	}
	171
	172	unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
	173	{
	174	size_t i,j,k, index;
	175	size_t radiusIndex;
	176	size_t r23=r2*r3;
	177	float pred_value = 0, pred_err;
	178	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	179	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	180	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)/confparams_cpr->sampleDistance;
	181
	182	//float max = oriData[0];
	183	//float min = oriData[0];
	184
	185	for(i=1;i<r1;i++)
	186	{
	187	for(j=1;j<r2;j++)
	188	{
	189	for(k=1;k<r3;k++)
	190	{
	191	if((i+j+k)%confparams_cpr->sampleDistance==0)
	192	{
	193	index = ir23+jr3+k;
	194	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
	195	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
	196	pred_err = fabs(pred_value - oriData[index]);
	197	radiusIndex = (pred_err/realPrecision+1)/2;
	198	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	199	{
	200	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	201	//printf("radiusIndex=%d\n", radiusIndex);
	202	}
	203	intervals[radiusIndex]++;
	204
	205	// if (max < oriData[index]) max = oriData[index];
	206	// if (min > oriData[index]) min = oriData[index];
	207	}
	208	}
	209	}
	210	}
	211	//compute the appropriate number
	212	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	213	size_t sum = 0;
	214	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	215	{
	216	sum += intervals[i];
	217	if(sum>targetCount)
	218	break;
	219	}
	220	if(i>=confparams_cpr->maxRangeRadius)
	221	i = confparams_cpr->maxRangeRadius-1;
	222	unsigned int accIntervals = 2*(i+1);
	223	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	224
	225	if(powerOf2<32)
	226	powerOf2 = 32;
	227
	228	// struct timeval costStart, costEnd;
	229	// double cost_est = 0;
	230	//
	231	// gettimeofday(&costStart, NULL);
	232	//
	233	// //compute estimate of bit-rate and distortion
	234	// double est_br = 0;
	235	// double est_psnr = 0;
	236	// double c1 = log2(targetCount)+1;
	237	// double c2 = -20.0log10(realPrecision) + 20.0log10(max-min) + 10.0*log10(3);
	238	//
	239	// for (i = 0; i < powerOf2/2; i++)
	240	// {
	241	// int count = intervals[i];
	242	// if (count != 0)
	243	// est_br += count*log2(count);
	244	// est_psnr += count;
	245	// }
	246	//
	247	// //compute estimate of bit-rate
	248	// est_br -= c1*est_psnr;
	249	// est_br /= totalSampleSize;
	250	// est_br = -est_br;
	251	//
	252	// //compute estimate of psnr
	253	// est_psnr /= totalSampleSize;
	254	// printf ("sum of P(i) = %lf\n", est_psnr);
	255	// est_psnr = -10.0*log10(est_psnr);
	256	// est_psnr += c2;
	257	//
	258	// printf ("estimate bitrate = %.2f\n", est_br);
	259	// printf ("estimate psnr = %.2f\n",est_psnr);
	260	//
	261	// gettimeofday(&costEnd, NULL);
	262	// cost_est = ((costEnd.tv_sec1000000+costEnd.tv_usec)-(costStart.tv_sec1000000+costStart.tv_usec))/1000000.0;
	263	//
	264	// printf ("analysis time = %f\n", cost_est);
	265
	266	free(intervals);
	267	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
	268	return powerOf2;
	269	}
	270
	271
	272	unsigned int optimize_intervals_float_4D(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision)
	273	{
	274	size_t i,j,k,l, index;
	275	size_t radiusIndex;
	276	size_t r234=r2r3r4;
	277	size_t r34=r3*r4;
	278	float pred_value = 0, pred_err;
	279	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	280	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	281	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)*(r4-1)/confparams_cpr->sampleDistance;
	282	for(i=1;i<r1;i++)
	283	{
	284	for(j=1;j<r2;j++)
	285	{
	286	for(k=1;k<r3;k++)
	287	{
	288	for (l=1;l<r4;l++)
	289	{
	290	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
	291	{
	292	index = ir234+jr34+k*r4+l;
	293	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34]
	294	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
	295	pred_err = fabs(pred_value - oriData[index]);
	296	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	297	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	298	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	299	intervals[radiusIndex]++;
	300	}
	301	}
	302	}
	303	}
	304	}
	305	//compute the appropriate number
	306	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	307	size_t sum = 0;
	308	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	309	{
	310	sum += intervals[i];
	311	if(sum>targetCount)
	312	break;
	313	}
	314	if(i>=confparams_cpr->maxRangeRadius)
	315	i = confparams_cpr->maxRangeRadius-1;
	316
	317	unsigned int accIntervals = 2*(i+1);
	318	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	319
	320	if(powerOf2<32)
	321	powerOf2 = 32;
	322
	323	free(intervals);
	324	return powerOf2;
	325	}
	326
	327	TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData,
	328	size_t dataLength, double realPrecision, float valueRangeSize, float medianValue_f)
	329	{
	330	#ifdef HAVE_TIMECMPR
	331	float* decData = NULL;
	332	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	333	decData = (float*)(multisteps->hist_data);
	334	#endif
	335
	336	unsigned int quantization_intervals;
	337	if(exe_params->optQuantMode==1)
	338	quantization_intervals = optimize_intervals_float_1D_opt(oriData, dataLength, realPrecision);
	339	else
	340	quantization_intervals = exe_params->intvCapacity;
	341	updateQuantizationInfo(quantization_intervals);
	342
	343	size_t i;
	344	int reqLength;
	345	float medianValue = medianValue_f;
	346	short radExpo = getExponent_float(valueRangeSize/2);
	347
	348	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	349
	350	int* type = (int) malloc(dataLengthsizeof(int));
	351
	352	float* spaceFillingValue = oriData; //
	353
	354	DynamicIntArray *exactLeadNumArray;
	355	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	356
	357	DynamicByteArray *exactMidByteArray;
	358	new_DBA(&exactMidByteArray, DynArrayInitLen);
	359
	360	DynamicIntArray *resiBitArray;
	361	new_DIA(&resiBitArray, DynArrayInitLen);
	362
	363	unsigned char preDataBytes[4];
	364	intToBytes_bigEndian(preDataBytes, 0);
	365
	366	int reqBytesLength = reqLength/8;
	367	int resiBitsLength = reqLength%8;
	368	float last3CmprsData[3] = {0};
	369
	370	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	371	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	372
	373	//add the first data
	374	type[0] = 0;
	375	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	376	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	377	memcpy(preDataBytes,vce->curBytes,4);
	378	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	379	listAdd_float(last3CmprsData, vce->data);
	380	#ifdef HAVE_TIMECMPR
	381	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	382	decData[0] = vce->data;
	383	#endif
	384
	385	//add the second data
	386	type[1] = 0;
	387	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	388	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	389	memcpy(preDataBytes,vce->curBytes,4);
	390	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	391	listAdd_float(last3CmprsData, vce->data);
	392	#ifdef HAVE_TIMECMPR
	393	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	394	decData[1] = vce->data;
	395	#endif
	396	int state;
	397	double checkRadius;
	398	float curData;
	399	float pred;
	400	float predAbsErr;
	401	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
	402	double interval = 2*realPrecision;
	403
	404	for(i=2;i<dataLength;i++)
	405	{
	406	curData = spaceFillingValue[i];
	407	//pred = 2*last3CmprsData[0] - last3CmprsData[1];
	408	pred = last3CmprsData[0];
	409	predAbsErr = fabs(curData - pred);
[9ee2ce3]	410	if(predAbsErr<checkRadius)
[2c47b73]	411	{
	412	state = (predAbsErr/realPrecision+1)/2;
	413	if(curData>=pred)
	414	{
	415	type[i] = exe_params->intvRadius+state;
	416	pred = pred + state*interval;
	417	}
	418	else //curData<pred
	419	{
	420	type[i] = exe_params->intvRadius-state;
	421	pred = pred - state*interval;
	422	}
	423
	424	//double-check the prediction error in case of machine-epsilon impact
	425	if(fabs(curData-pred)>realPrecision)
	426	{
	427	type[i] = 0;
	428	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	429	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	430	memcpy(preDataBytes,vce->curBytes,4);
	431	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	432
	433	listAdd_float(last3CmprsData, vce->data);
	434	#ifdef HAVE_TIMECMPR
	435	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	436	decData[i] = vce->data;
	437	#endif
	438	}
	439	else
	440	{
	441	listAdd_float(last3CmprsData, pred);
	442	#ifdef HAVE_TIMECMPR
	443	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	444	decData[i] = pred;
	445	#endif
	446	}
	447	continue;
	448	}
	449
	450	//unpredictable data processing
	451	type[i] = 0;
	452	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	453	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	454	memcpy(preDataBytes,vce->curBytes,4);
	455	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	456
	457	listAdd_float(last3CmprsData, vce->data);
	458	#ifdef HAVE_TIMECMPR
	459	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	460	decData[i] = vce->data;
	461	#endif
	462
	463	}//end of for
	464
	465	// char* expSegmentsInBytes;
	466	// int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes);
	467	size_t exactDataNum = exactLeadNumArray->size;
	468
	469	TightDataPointStorageF* tdps;
	470
	471	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	472	type, exactMidByteArray->array, exactMidByteArray->size,
	473	exactLeadNumArray->array,
	474	resiBitArray->array, resiBitArray->size,
	475	resiBitsLength,
	476	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	477
	478	//sdi:Debug
	479	/* int sum =0;
	480	for(i=0;i<dataLength;i++)
	481	if(type[i]==0) sum++;
	482	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
	483
	484	//free memory
	485	free_DIA(exactLeadNumArray);
	486	free_DIA(resiBitArray);
	487	free(type);
	488	free(vce);
	489	free(lce);
	490	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	491
	492	return tdps;
	493	}
	494
	495	void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, TightDataPointStorageF* tdps,
	496	unsigned char** newByteData, size_t *outSize)
	497	{
	498	int floatSize=sizeof(float);
	499	size_t k = 0, i;
	500	tdps->isLossless = 1;
	501	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + floatSize*dataLength;
	502	newByteData = (unsigned char)malloc(totalByteLength);
	503
	504	unsigned char dsLengthBytes[8];
	505	for (i = 0; i < 3; i++)//3
	506	(*newByteData)[k++] = versionNumber[i];
	507
	508	if(exe_params->SZ_SIZE_TYPE==4)//1
	509	(*newByteData)[k++] = 16; //00010000
	510	else
	511	(*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8
	512
	513	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
	514	k = k + MetaDataByteLength;
	515
	516	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
	517	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
	518	(*newByteData)[k++] = dsLengthBytes[i];
	519
	520	if(sysEndianType==BIG_ENDIAN_SYSTEM)
	521	memcpy((newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLengthfloatSize);
	522	else
	523	{
	524	unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
	525	for(i=0;i<dataLength;i++,p+=floatSize)
	526	floatToBytes(p, oriData[i]);
	527	}
	528	*outSize = totalByteLength;
	529	}
	530
	531	char SZ_compress_args_float_NoCkRngeNoGzip_1D(unsigned char** newByteData, float *oriData,
	532	size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f)
	533	{
	534	char compressionType = 0;
	535	TightDataPointStorageF* tdps = NULL;
	536
	537	#ifdef HAVE_TIMECMPR
	538	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	539	{
	540	int timestep = sz_tsc->currentStep;
	541	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	542	{
	543	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
	544	compressionType = 1; //time-series based compression
	545	}
	546	else
	547	{
	548	tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
	549	compressionType = 0; //snapshot-based compression
	550	multisteps->lastSnapshotStep = timestep;
	551	}
	552	}
	553	else
	554	#endif
	555	tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
	556
	557	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
	558
	559	if(outSize>dataLengthsizeof(float))
	560	SZ_compress_args_float_StoreOriData(oriData, dataLength+2, tdps, newByteData, outSize);
	561
	562	free_TightDataPointStorageF(tdps);
	563	return compressionType;
	564	}
	565
	566	TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size_t r2, double realPrecision, float valueRangeSize, float medianValue_f)
	567	{
	568	#ifdef HAVE_TIMECMPR
	569	float* decData = NULL;
	570	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	571	decData = (float*)(multisteps->hist_data);
	572	#endif
	573
	574	unsigned int quantization_intervals;
	575	if(exe_params->optQuantMode==1)
	576	{
	577	quantization_intervals = optimize_intervals_float_2D_opt(oriData, r1, r2, realPrecision);
	578	updateQuantizationInfo(quantization_intervals);
	579	}
	580	else
	581	quantization_intervals = exe_params->intvCapacity;
	582	size_t i,j;
	583	int reqLength;
	584	float pred1D, pred2D;
	585	float diff = 0.0;
	586	double itvNum = 0;
	587	float P0, P1;
	588
	589	size_t dataLength = r1*r2;
	590
	591	P0 = (float)malloc(r2sizeof(float));
	592	memset(P0, 0, r2*sizeof(float));
	593	P1 = (float)malloc(r2sizeof(float));
	594	memset(P1, 0, r2*sizeof(float));
	595
	596	float medianValue = medianValue_f;
	597	short radExpo = getExponent_float(valueRangeSize/2);
	598	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	599
	600	int* type = (int) malloc(dataLengthsizeof(int));
	601	//type[dataLength]=0;
	602
	603	float* spaceFillingValue = oriData; //
	604
	605	DynamicIntArray *exactLeadNumArray;
	606	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	607
	608	DynamicByteArray *exactMidByteArray;
	609	new_DBA(&exactMidByteArray, DynArrayInitLen);
	610
	611	DynamicIntArray *resiBitArray;
	612	new_DIA(&resiBitArray, DynArrayInitLen);
	613
	614	type[0] = 0;
	615	unsigned char preDataBytes[4];
	616	intToBytes_bigEndian(preDataBytes, 0);
	617
	618	int reqBytesLength = reqLength/8;
	619	int resiBitsLength = reqLength%8;
	620
	621	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	622	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	623
	624	/* Process Row-0 data 0*/
	625	type[0] = 0;
	626	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	627	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	628	memcpy(preDataBytes,vce->curBytes,4);
	629	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	630	P1[0] = vce->data;
	631	#ifdef HAVE_TIMECMPR
	632	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	633	decData[0] = vce->data;
	634	#endif
	635
	636	float curData;
	637
	638	/* Process Row-0 data 1*/
	639	pred1D = P1[0];
	640	curData = spaceFillingValue[1];
	641	diff = curData - pred1D;
	642
	643	itvNum = fabs(diff)/realPrecision + 1;
	644
	645	if (itvNum < exe_params->intvCapacity)
	646	{
	647	if (diff < 0) itvNum = -itvNum;
	648	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	649	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	650
	651	//ganrantee comporession error against the case of machine-epsilon
	652	if(fabs(spaceFillingValue[1]-P1[1])>realPrecision)
	653	{
	654	type[1] = 0;
	655	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	656	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	657	memcpy(preDataBytes,vce->curBytes,4);
	658	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	659
	660	P1[1] = vce->data;
	661	}
	662	}
	663	else
	664	{
	665	type[1] = 0;
	666	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	667	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	668	memcpy(preDataBytes,vce->curBytes,4);
	669	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	670	P1[1] = vce->data;
	671	}
	672	#ifdef HAVE_TIMECMPR
	673	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	674	decData[1] = P1[1];
	675	#endif
	676
	677	/* Process Row-0 data 2 --> data r2-1 */
	678	for (j = 2; j < r2; j++)
	679	{
	680	pred1D = 2*P1[j-1] - P1[j-2];
	681	curData = spaceFillingValue[j];
	682	diff = curData - pred1D;
	683
	684	itvNum = fabs(diff)/realPrecision + 1;
	685
	686	if (itvNum < exe_params->intvCapacity)
	687	{
	688	if (diff < 0) itvNum = -itvNum;
	689	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	690	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	691
	692	//ganrantee comporession error against the case of machine-epsilon
	693	if(fabs(curData-P1[j])>realPrecision)
	694	{
	695	type[j] = 0;
	696	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	697	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	698	memcpy(preDataBytes,vce->curBytes,4);
	699	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	700
	701	P1[j] = vce->data;
	702	}
	703	}
	704	else
	705	{
	706	type[j] = 0;
	707	compressSingleFloatValue(vce,curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	708	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	709	memcpy(preDataBytes,vce->curBytes,4);
	710	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	711	P1[j] = vce->data;
	712	}
	713	#ifdef HAVE_TIMECMPR
	714	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	715	decData[j] = P1[j];
	716	#endif
	717	}
	718
	719	/* Process Row-1 --> Row-r1-1 */
	720	size_t index;
	721	for (i = 1; i < r1; i++)
	722	{
	723	/* Process row-i data 0 */
	724	index = i*r2;
	725	pred1D = P1[0];
	726	curData = spaceFillingValue[index];
	727	diff = curData - pred1D;
	728
	729	itvNum = fabs(diff)/realPrecision + 1;
	730
	731	if (itvNum < exe_params->intvCapacity)
	732	{
	733	if (diff < 0) itvNum = -itvNum;
	734	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	735	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	736
	737	//ganrantee comporession error against the case of machine-epsilon
	738	if(fabs(curData-P0[0])>realPrecision)
	739	{
	740	type[index] = 0;
	741	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	742	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	743	memcpy(preDataBytes,vce->curBytes,4);
	744	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	745
	746	P0[0] = vce->data;
	747	}
	748	}
	749	else
	750	{
	751	type[index] = 0;
	752	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	753	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	754	memcpy(preDataBytes,vce->curBytes,4);
	755	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	756	P0[0] = vce->data;
	757	}
	758	#ifdef HAVE_TIMECMPR
	759	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	760	decData[index] = P0[0];
	761	#endif
	762
	763	/* Process row-i data 1 --> r2-1*/
	764	for (j = 1; j < r2; j++)
	765	{
	766	index = i*r2+j;
	767	pred2D = P0[j-1] + P1[j] - P1[j-1];
	768
	769	curData = spaceFillingValue[index];
	770	diff = curData - pred2D;
	771
	772	itvNum = fabs(diff)/realPrecision + 1;
	773
	774	if (itvNum < exe_params->intvCapacity)
	775	{
	776	if (diff < 0) itvNum = -itvNum;
	777	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	778	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	779
	780	//ganrantee comporession error against the case of machine-epsilon
	781	if(fabs(curData-P0[j])>realPrecision)
	782	{
	783	type[index] = 0;
	784	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	785	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	786	memcpy(preDataBytes,vce->curBytes,4);
	787	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	788
	789	P0[j] = vce->data;
	790	}
	791	}
	792	else
	793	{
	794	type[index] = 0;
	795	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	796	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	797	memcpy(preDataBytes,vce->curBytes,4);
	798	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	799	P0[j] = vce->data;
	800	}
	801	#ifdef HAVE_TIMECMPR
	802	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	803	decData[index] = P0[j];
	804	#endif
	805	}
	806
	807	float *Pt;
	808	Pt = P1;
	809	P1 = P0;
	810	P0 = Pt;
	811	}
	812
	813	if(r2!=1)
	814	free(P0);
	815	free(P1);
	816	size_t exactDataNum = exactLeadNumArray->size;
	817
	818	TightDataPointStorageF* tdps;
	819
	820	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	821	type, exactMidByteArray->array, exactMidByteArray->size,
	822	exactLeadNumArray->array,
	823	resiBitArray->array, resiBitArray->size,
	824	resiBitsLength,
	825	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	826
	827	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	828	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	829
	830	// for(i = 3800;i<3844;i++)
	831	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
	832
	833	//free memory
	834	free_DIA(exactLeadNumArray);
	835	free_DIA(resiBitArray);
	836	free(type);
	837	free(vce);
	838	free(lce);
	839	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	840
	841	return tdps;
	842	}
	843
	844	/**
	845	*
	846	* Note: @r1 is high dimension
	847	* @r2 is low dimension
	848	* */
	849	char SZ_compress_args_float_NoCkRngeNoGzip_2D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
	850	{
	851	size_t dataLength = r1*r2;
	852	char compressionType = 0;
	853	TightDataPointStorageF* tdps = NULL;
	854
	855	#ifdef HAVE_TIMECMPR
	856	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	857	{
	858	int timestep = sz_tsc->currentStep;
	859	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	860	{
	861	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
	862	compressionType = 1; //time-series based compression
	863	}
	864	else
	865	{
	866	tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
	867	compressionType = 0; //snapshot-based compression
	868	multisteps->lastSnapshotStep = timestep;
	869	}
	870	}
	871	else
	872	#endif
	873	tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
	874
	875	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
	876
	877	if(outSize>dataLengthsizeof(float))
	878	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	879
	880	free_TightDataPointStorageF(tdps);
	881
	882	return compressionType;
	883	}
	884
	885	TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float valueRangeSize, float medianValue_f)
	886	{
	887	#ifdef HAVE_TIMECMPR
	888	float* decData = NULL;
	889	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	890	decData = (float*)(multisteps->hist_data);
	891	#endif
	892
	893	unsigned int quantization_intervals;
	894	if(exe_params->optQuantMode==1)
	895	{
	896	quantization_intervals = optimize_intervals_float_3D_opt(oriData, r1, r2, r3, realPrecision);
	897	updateQuantizationInfo(quantization_intervals);
	898	}
	899	else
	900	quantization_intervals = exe_params->intvCapacity;
	901	size_t i,j,k;
	902	int reqLength;
	903	float pred1D, pred2D, pred3D;
	904	float diff = 0.0;
	905	double itvNum = 0;
	906	float P0, P1;
	907
	908	size_t dataLength = r1r2r3;
	909	size_t r23 = r2*r3;
	910	P0 = (float)malloc(r23sizeof(float));
	911	P1 = (float)malloc(r23sizeof(float));
	912
	913	float medianValue = medianValue_f;
	914	short radExpo = getExponent_float(valueRangeSize/2);
	915	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	916
	917	int* type = (int) malloc(dataLengthsizeof(int));
	918
	919	float* spaceFillingValue = oriData; //
	920
	921	DynamicIntArray *exactLeadNumArray;
	922	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	923
	924	DynamicByteArray *exactMidByteArray;
	925	new_DBA(&exactMidByteArray, DynArrayInitLen);
	926
	927	DynamicIntArray *resiBitArray;
	928	new_DIA(&resiBitArray, DynArrayInitLen);
	929
	930	unsigned char preDataBytes[4];
	931	intToBytes_bigEndian(preDataBytes, 0);
	932
	933	int reqBytesLength = reqLength/8;
	934	int resiBitsLength = reqLength%8;
	935
	936	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	937	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	938
	939
	940	/////////////////////////// Process layer-0 ///////////////////////////
	941	/* Process Row-0 data 0*/
	942	type[0] = 0;
	943	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	944	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	945	memcpy(preDataBytes,vce->curBytes,4);
	946	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	947	P1[0] = vce->data;
	948	#ifdef HAVE_TIMECMPR
	949	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	950	decData[0] = P1[0];
	951	#endif
	952
	953	float curData;
	954
	955	/* Process Row-0 data 1*/
	956	pred1D = P1[0];
	957	curData = spaceFillingValue[1];
	958	diff = curData - pred1D;
	959
	960	itvNum = fabs(diff)/realPrecision + 1;
	961
	962	if (itvNum < exe_params->intvCapacity)
	963	{
	964	if (diff < 0) itvNum = -itvNum;
	965	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	966	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	967
	968	//ganrantee comporession error against the case of machine-epsilon
	969	if(fabs(curData-P1[1])>realPrecision)
	970	{
	971	type[1] = 0;
	972	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	973	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	974	memcpy(preDataBytes,vce->curBytes,4);
	975	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	976
	977	P1[1] = vce->data;
	978	}
	979	}
	980	else
	981	{
	982	type[1] = 0;
	983	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	984	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	985	memcpy(preDataBytes,vce->curBytes,4);
	986	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	987	P1[1] = vce->data;
	988	}
	989	#ifdef HAVE_TIMECMPR
	990	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	991	decData[1] = P1[1];
	992	#endif
	993
	994	/* Process Row-0 data 2 --> data r3-1 */
	995	for (j = 2; j < r3; j++)
	996	{
	997	pred1D = 2*P1[j-1] - P1[j-2];
	998	curData = spaceFillingValue[j];
	999	diff = curData - pred1D;
	1000
	1001	itvNum = fabs(diff)/realPrecision + 1;
	1002
	1003	if (itvNum < exe_params->intvCapacity)
	1004	{
	1005	if (diff < 0) itvNum = -itvNum;
	1006	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	1007	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	1008
	1009	//ganrantee comporession error against the case of machine-epsilon
	1010	if(fabs(curData-P1[j])>realPrecision)
	1011	{
	1012	type[j] = 0;
	1013	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1014	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1015	memcpy(preDataBytes,vce->curBytes,4);
	1016	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1017
	1018	P1[j] = vce->data;
	1019	}
	1020	}
	1021	else
	1022	{
	1023	type[j] = 0;
	1024	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1025	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1026	memcpy(preDataBytes,vce->curBytes,4);
	1027	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1028	P1[j] = vce->data;
	1029	}
	1030	#ifdef HAVE_TIMECMPR
	1031	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1032	decData[j] = P1[j];
	1033	#endif
	1034	}
	1035
	1036	/* Process Row-1 --> Row-r2-1 */
	1037	size_t index;
	1038	for (i = 1; i < r2; i++)
	1039	{
	1040	/* Process row-i data 0 */
	1041	index = i*r3;
	1042	pred1D = P1[index-r3];
	1043	curData = spaceFillingValue[index];
	1044	diff = curData - pred1D;
	1045
	1046	itvNum = fabs(diff)/realPrecision + 1;
	1047
	1048	if (itvNum < exe_params->intvCapacity)
	1049	{
	1050	if (diff < 0) itvNum = -itvNum;
	1051	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1052	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1053
	1054	//ganrantee comporession error against the case of machine-epsilon
	1055	if(fabs(curData-P1[index])>realPrecision)
	1056	{
	1057	type[index] = 0;
	1058	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1059	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1060	memcpy(preDataBytes,vce->curBytes,4);
	1061	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1062
	1063	P1[index] = vce->data;
	1064	}
	1065	}
	1066	else
	1067	{
	1068	type[index] = 0;
	1069	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1070	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1071	memcpy(preDataBytes,vce->curBytes,4);
	1072	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1073	P1[index] = vce->data;
	1074	}
	1075	#ifdef HAVE_TIMECMPR
	1076	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1077	decData[index] = P1[index];
	1078	#endif
	1079
	1080	/* Process row-i data 1 --> data r3-1*/
	1081	for (j = 1; j < r3; j++)
	1082	{
	1083	index = i*r3+j;
	1084	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
	1085
	1086	curData = spaceFillingValue[index];
	1087	diff = curData - pred2D;
	1088
	1089	itvNum = fabs(diff)/realPrecision + 1;
	1090
	1091	if (itvNum < exe_params->intvCapacity)
	1092	{
	1093	if (diff < 0) itvNum = -itvNum;
	1094	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1095	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1096
	1097	//ganrantee comporession error against the case of machine-epsilon
	1098	if(fabs(curData-P1[index])>realPrecision)
	1099	{
	1100	type[index] = 0;
	1101	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1102	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1103	memcpy(preDataBytes,vce->curBytes,4);
	1104	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1105
	1106	P1[index] = vce->data;
	1107	}
	1108	}
	1109	else
	1110	{
	1111	type[index] = 0;
	1112	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1113	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1114	memcpy(preDataBytes,vce->curBytes,4);
	1115	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1116	P1[index] = vce->data;
	1117	}
	1118	#ifdef HAVE_TIMECMPR
	1119	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1120	decData[index] = P1[index];
	1121	#endif
	1122	}
	1123	}
	1124
	1125
	1126	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
	1127
	1128	for (k = 1; k < r1; k++)
	1129	{
	1130	/* Process Row-0 data 0*/
	1131	index = k*r23;
	1132	pred1D = P1[0];
	1133	curData = spaceFillingValue[index];
	1134	diff = curData - pred1D;
	1135
	1136	itvNum = fabs(diff)/realPrecision + 1;
	1137
	1138	if (itvNum < exe_params->intvCapacity)
	1139	{
	1140	if (diff < 0) itvNum = -itvNum;
	1141	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1142	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1143
	1144	//ganrantee comporession error against the case of machine-epsilon
	1145	if(fabs(curData-P0[0])>realPrecision)
	1146	{
	1147	type[index] = 0;
	1148	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1149	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1150	memcpy(preDataBytes,vce->curBytes,4);
	1151	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1152
	1153	P0[0] = vce->data;
	1154	}
	1155	}
	1156	else
	1157	{
	1158	type[index] = 0;
	1159	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1160	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1161	memcpy(preDataBytes,vce->curBytes,4);
	1162	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1163	P0[0] = vce->data;
	1164	}
	1165	#ifdef HAVE_TIMECMPR
	1166	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1167	decData[index] = P0[0];
	1168	#endif
	1169
	1170	/* Process Row-0 data 1 --> data r3-1 */
	1171	for (j = 1; j < r3; j++)
	1172	{
	1173	//index = kr2r3+j;
	1174	index ++;
	1175	pred2D = P0[j-1] + P1[j] - P1[j-1];
	1176	curData = spaceFillingValue[index];
	1177	diff = spaceFillingValue[index] - pred2D;
	1178
	1179	itvNum = fabs(diff)/realPrecision + 1;
	1180
	1181	if (itvNum < exe_params->intvCapacity)
	1182	{
	1183	if (diff < 0) itvNum = -itvNum;
	1184	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1185	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1186	//ganrantee comporession error against the case of machine-epsilon
	1187	if(fabs(curData-P0[j])>realPrecision)
	1188	{
	1189	type[index] = 0;
	1190	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1191	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1192	memcpy(preDataBytes,vce->curBytes,4);
	1193	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1194
	1195	P0[j] = vce->data;
	1196	}
	1197	}
	1198	else
	1199	{
	1200	type[index] = 0;
	1201	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1202	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1203	memcpy(preDataBytes,vce->curBytes,4);
	1204	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1205	P0[j] = vce->data;
	1206	}
	1207	#ifdef HAVE_TIMECMPR
	1208	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1209	decData[index] = P0[j];
	1210	#endif
	1211	}
	1212
	1213	/* Process Row-1 --> Row-r2-1 */
	1214	size_t index2D;
	1215	for (i = 1; i < r2; i++)
	1216	{
	1217	/* Process Row-i data 0 */
	1218	index = kr23 + ir3;
	1219	index2D = i*r3;
	1220	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
	1221	curData = spaceFillingValue[index];
	1222	diff = spaceFillingValue[index] - pred2D;
	1223
	1224	itvNum = fabs(diff)/realPrecision + 1;
	1225
	1226	if (itvNum < exe_params->intvCapacity)
	1227	{
	1228	if (diff < 0) itvNum = -itvNum;
	1229	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1230	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1231	//ganrantee comporession error against the case of machine-epsilon
	1232	if(fabs(curData-P0[index2D])>realPrecision)
	1233	{
	1234	type[index] = 0;
	1235	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1236	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1237	memcpy(preDataBytes,vce->curBytes,4);
	1238	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1239
	1240	P0[index2D] = vce->data;
	1241	}
	1242	}
	1243	else
	1244	{
	1245	type[index] = 0;
	1246	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1247	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1248	memcpy(preDataBytes,vce->curBytes,4);
	1249	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1250	P0[index2D] = vce->data;
	1251	}
	1252	#ifdef HAVE_TIMECMPR
	1253	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1254	decData[index] = P0[index2D];
	1255	#endif
	1256
	1257	/* Process Row-i data 1 --> data r3-1 */
	1258	for (j = 1; j < r3; j++)
	1259	{
	1260	// if(k==63&&i==43&&j==27)
	1261	// printf("i=%d\n", i);
	1262	//index = kr2r3 + i*r3 + j;
	1263	index ++;
	1264	index2D = i*r3 + j;
	1265	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
	1266	curData = spaceFillingValue[index];
	1267	diff = curData - pred3D;
	1268
	1269	itvNum = fabs(diff)/realPrecision + 1;
	1270
	1271	if (itvNum < exe_params->intvCapacity)
	1272	{
	1273	if (diff < 0) itvNum = -itvNum;
	1274	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1275	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1276
	1277	//ganrantee comporession error against the case of machine-epsilon
	1278	if(fabs(curData-P0[index2D])>realPrecision)
	1279	{
	1280	type[index] = 0;
	1281	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1282	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1283	memcpy(preDataBytes,vce->curBytes,4);
	1284	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1285
	1286	P0[index2D] = vce->data;
	1287	}
	1288	}
	1289	else
	1290	{
	1291	type[index] = 0;
	1292	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1293	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1294	memcpy(preDataBytes,vce->curBytes,4);
	1295	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1296	P0[index2D] = vce->data;
	1297	}
	1298	#ifdef HAVE_TIMECMPR
	1299	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1300	decData[index] = P0[index2D];
	1301	#endif
	1302	}
	1303	}
	1304
	1305	float *Pt;
	1306	Pt = P1;
	1307	P1 = P0;
	1308	P0 = Pt;
	1309	}
	1310	if(r23!=1)
	1311	free(P0);
	1312	free(P1);
	1313	size_t exactDataNum = exactLeadNumArray->size;
	1314
	1315	TightDataPointStorageF* tdps;
	1316
	1317	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	1318	type, exactMidByteArray->array, exactMidByteArray->size,
	1319	exactLeadNumArray->array,
	1320	resiBitArray->array, resiBitArray->size,
	1321	resiBitsLength,
	1322	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	1323
	1324	//sdi:Debug
	1325	/* int sum =0;
	1326	for(i=0;i<dataLength;i++)
	1327	if(type[i]==0) sum++;
	1328	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
	1329
	1330
	1331	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	1332	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	1333
	1334	//free memory
	1335	free_DIA(exactLeadNumArray);
	1336	free_DIA(resiBitArray);
	1337	free(type);
	1338	free(vce);
	1339	free(lce);
	1340	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	1341
	1342	return tdps;
	1343	}
	1344
	1345	char SZ_compress_args_float_NoCkRngeNoGzip_3D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
	1346	{
	1347	size_t dataLength = r1r2r3;
	1348	char compressionType = 0;
	1349	TightDataPointStorageF* tdps = NULL;
	1350
	1351	#ifdef HAVE_TIMECMPR
	1352	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1353	{
	1354	int timestep = sz_tsc->currentStep;
	1355	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	1356	{
	1357	tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
	1358	compressionType = 1; //time-series based compression
	1359	}
	1360	else
[9ee2ce3]	1361	{
	1362	if(sz_with_regression == SZ_NO_REGRESSION)
	1363	tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
	1364	else
	1365	*newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
[2c47b73]	1366	compressionType = 0; //snapshot-based compression
	1367	multisteps->lastSnapshotStep = timestep;
	1368	}
	1369	}
	1370	else
	1371	#endif
	1372	tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
	1373
[9ee2ce3]	1374	if(tdps!=NULL)
	1375	{
	1376	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
	1377	if(outSize>dataLengthsizeof(float))
	1378	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1379	free_TightDataPointStorageF(tdps);
	1380	}
[2c47b73]	1381
	1382	return compressionType;
	1383	}
	1384
	1385
	1386	TightDataPointStorageF* SZ_compress_float_4D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, float valueRangeSize, float medianValue_f)
	1387	{
	1388	unsigned int quantization_intervals;
	1389	if(exe_params->optQuantMode==1)
	1390	{
	1391	quantization_intervals = optimize_intervals_float_4D(oriData, r1, r2, r3, r4, realPrecision);
	1392	updateQuantizationInfo(quantization_intervals);
	1393	}
	1394	else
	1395	quantization_intervals = exe_params->intvCapacity;
	1396
	1397	size_t i,j,k;
	1398	int reqLength;
	1399	float pred1D, pred2D, pred3D;
	1400	float diff = 0.0;
	1401	double itvNum = 0;
	1402	float P0, P1;
	1403
	1404	size_t dataLength = r1r2r3*r4;
	1405
	1406	size_t r234 = r2r3r4;
	1407	size_t r34 = r3*r4;
	1408
	1409	P0 = (float)malloc(r34sizeof(float));
	1410	P1 = (float)malloc(r34sizeof(float));
	1411
	1412	float medianValue = medianValue_f;
	1413	short radExpo = getExponent_float(valueRangeSize/2);
	1414	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	1415
	1416	int* type = (int) malloc(dataLengthsizeof(int));
	1417
	1418	float* spaceFillingValue = oriData; //
	1419
	1420	DynamicIntArray *exactLeadNumArray;
	1421	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	1422
	1423	DynamicByteArray *exactMidByteArray;
	1424	new_DBA(&exactMidByteArray, DynArrayInitLen);
	1425
	1426	DynamicIntArray *resiBitArray;
	1427	new_DIA(&resiBitArray, DynArrayInitLen);
	1428
	1429	unsigned char preDataBytes[4];
	1430	intToBytes_bigEndian(preDataBytes, 0);
	1431
	1432	int reqBytesLength = reqLength/8;
	1433	int resiBitsLength = reqLength%8;
	1434
	1435	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	1436	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	1437
	1438
	1439	size_t l;
	1440	for (l = 0; l < r1; l++)
	1441	{
	1442
	1443	/////////////////////////// Process layer-0 ///////////////////////////
	1444	/* Process Row-0 data 0*/
	1445	size_t index = l*r234;
	1446	size_t index2D = 0;
	1447
	1448	type[index] = 0;
	1449	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1450	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1451	memcpy(preDataBytes,vce->curBytes,4);
	1452	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1453	P1[index2D] = vce->data;
	1454
	1455	/* Process Row-0 data 1*/
	1456	index = l*r234+1;
	1457	index2D = 1;
	1458
	1459	pred1D = P1[index2D-1];
	1460	diff = spaceFillingValue[index] - pred1D;
	1461
	1462	itvNum = fabs(diff)/realPrecision + 1;
	1463
	1464	if (itvNum < exe_params->intvCapacity)
	1465	{
	1466	if (diff < 0) itvNum = -itvNum;
	1467	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1468	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1469	}
	1470	else
	1471	{
	1472	type[index] = 0;
	1473	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1474	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1475	memcpy(preDataBytes,vce->curBytes,4);
	1476	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1477	P1[index2D] = vce->data;
	1478	}
	1479
	1480	/* Process Row-0 data 2 --> data r4-1 */
	1481	for (j = 2; j < r4; j++)
	1482	{
	1483	index = l*r234+j;
	1484	index2D = j;
	1485
	1486	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	1487	diff = spaceFillingValue[index] - pred1D;
	1488
	1489	itvNum = fabs(diff)/realPrecision + 1;
	1490
	1491	if (itvNum < exe_params->intvCapacity)
	1492	{
	1493	if (diff < 0) itvNum = -itvNum;
	1494	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1495	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1496	}
	1497	else
	1498	{
	1499	type[index] = 0;
	1500	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1501	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1502	memcpy(preDataBytes,vce->curBytes,4);
	1503	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1504	P1[index2D] = vce->data;
	1505	}
	1506	}
	1507
	1508	/* Process Row-1 --> Row-r3-1 */
	1509	for (i = 1; i < r3; i++)
	1510	{
	1511	/* Process row-i data 0 */
	1512	index = lr234+ir4;
	1513	index2D = i*r4;
	1514
	1515	pred1D = P1[index2D-r4];
	1516	diff = spaceFillingValue[index] - pred1D;
	1517
	1518	itvNum = fabs(diff)/realPrecision + 1;
	1519
	1520	if (itvNum < exe_params->intvCapacity)
	1521	{
	1522	if (diff < 0) itvNum = -itvNum;
	1523	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1524	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1525	}
	1526	else
	1527	{
	1528	type[index] = 0;
	1529	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1530	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1531	memcpy(preDataBytes,vce->curBytes,4);
	1532	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1533	P1[index2D] = vce->data;
	1534	}
	1535
	1536	/* Process row-i data 1 --> data r4-1*/
	1537	for (j = 1; j < r4; j++)
	1538	{
	1539	index = lr234+ir4+j;
	1540	index2D = i*r4+j;
	1541
	1542	pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1];
	1543
	1544	diff = spaceFillingValue[index] - pred2D;
	1545
	1546	itvNum = fabs(diff)/realPrecision + 1;
	1547
	1548	if (itvNum < exe_params->intvCapacity)
	1549	{
	1550	if (diff < 0) itvNum = -itvNum;
	1551	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1552	P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1553	}
	1554	else
	1555	{
	1556	type[index] = 0;
	1557	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1558	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1559	memcpy(preDataBytes,vce->curBytes,4);
	1560	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1561	P1[index2D] = vce->data;
	1562	}
	1563	}
	1564	}
	1565
	1566
	1567	/////////////////////////// Process layer-1 --> layer-r2-1 ///////////////////////////
	1568
	1569	for (k = 1; k < r2; k++)
	1570	{
	1571	/* Process Row-0 data 0*/
	1572	index = lr234+kr34;
	1573	index2D = 0;
	1574
	1575	pred1D = P1[index2D];
	1576	diff = spaceFillingValue[index] - pred1D;
	1577
	1578	itvNum = fabs(diff)/realPrecision + 1;
	1579
	1580	if (itvNum < exe_params->intvCapacity)
	1581	{
	1582	if (diff < 0) itvNum = -itvNum;
	1583	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1584	P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1585	}
	1586	else
	1587	{
	1588	type[index] = 0;
	1589	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1590	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1591	memcpy(preDataBytes,vce->curBytes,4);
	1592	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1593	P0[index2D] = vce->data;
	1594	}
	1595
	1596	/* Process Row-0 data 1 --> data r4-1 */
	1597	for (j = 1; j < r4; j++)
	1598	{
	1599	index = lr234+kr34+j;
	1600	index2D = j;
	1601
	1602	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	1603	diff = spaceFillingValue[index] - pred2D;
	1604
	1605	itvNum = fabs(diff)/realPrecision + 1;
	1606
	1607	if (itvNum < exe_params->intvCapacity)
	1608	{
	1609	if (diff < 0) itvNum = -itvNum;
	1610	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1611	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1612	}
	1613	else
	1614	{
	1615	type[index] = 0;
	1616	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1617	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1618	memcpy(preDataBytes,vce->curBytes,4);
	1619	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1620	P0[index2D] = vce->data;
	1621	}
	1622	}
	1623
	1624	/* Process Row-1 --> Row-r3-1 */
	1625	for (i = 1; i < r3; i++)
	1626	{
	1627	/* Process Row-i data 0 */
	1628	index = lr234+kr34+i*r4;
	1629	index2D = i*r4;
	1630
	1631	pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4];
	1632	diff = spaceFillingValue[index] - pred2D;
	1633
	1634	itvNum = fabs(diff)/realPrecision + 1;
	1635
	1636	if (itvNum < exe_params->intvCapacity)
	1637	{
	1638	if (diff < 0) itvNum = -itvNum;
	1639	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1640	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1641	}
	1642	else
	1643	{
	1644	type[index] = 0;
	1645	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1646	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1647	memcpy(preDataBytes,vce->curBytes,4);
	1648	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1649	P0[index2D] = vce->data;
	1650	}
	1651
	1652	/* Process Row-i data 1 --> data r4-1 */
	1653	for (j = 1; j < r4; j++)
	1654	{
	1655	index = lr234+kr34+i*r4+j;
	1656	index2D = i*r4+j;
	1657
	1658	pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1];
	1659	diff = spaceFillingValue[index] - pred3D;
	1660
	1661
	1662	itvNum = fabs(diff)/realPrecision + 1;
	1663
	1664	if (itvNum < exe_params->intvCapacity)
	1665	{
	1666	if (diff < 0) itvNum = -itvNum;
	1667	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1668	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1669	}
	1670	else
	1671	{
	1672	type[index] = 0;
	1673	compressSingleFloatValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1674	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1675	memcpy(preDataBytes,vce->curBytes,4);
	1676	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1677	P0[index2D] = vce->data;
	1678	}
	1679	}
	1680	}
	1681
	1682	float *Pt;
	1683	Pt = P1;
	1684	P1 = P0;
	1685	P0 = Pt;
	1686	}
	1687	}
	1688
	1689	free(P0);
	1690	free(P1);
	1691	size_t exactDataNum = exactLeadNumArray->size;
	1692
	1693	TightDataPointStorageF* tdps;
	1694
	1695	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	1696	type, exactMidByteArray->array, exactMidByteArray->size,
	1697	exactLeadNumArray->array,
	1698	resiBitArray->array, resiBitArray->size,
	1699	resiBitsLength,
	1700	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	1701
	1702	//free memory
	1703	free_DIA(exactLeadNumArray);
	1704	free_DIA(resiBitArray);
	1705	free(type);
	1706	free(vce);
	1707	free(lce);
	1708	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	1709
	1710	return tdps;
	1711	}
	1712
	1713	char SZ_compress_args_float_NoCkRngeNoGzip_4D(unsigned char** newByteData, float oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f)
	1714	{
	1715	TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_f);
	1716
	1717	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
	1718
	1719	int dataLength = r1r2r3*r4;
	1720	if(outSize>dataLengthsizeof(float))
	1721	SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1722
	1723	free_TightDataPointStorageF(tdps);
	1724
	1725	return 0;
	1726	}
	1727
	1728	void SZ_compress_args_float_withinRange(unsigned char** newByteData, float oriData, size_t dataLength, size_t outSize)
	1729	{
	1730	TightDataPointStorageF* tdps = (TightDataPointStorageF*) malloc(sizeof(TightDataPointStorageF));
	1731	tdps->rtypeArray = NULL;
	1732	tdps->typeArray = NULL;
	1733	tdps->leadNumArray = NULL;
	1734	tdps->residualMidBits = NULL;
	1735
	1736	tdps->allSameData = 1;
	1737	tdps->dataSeriesLength = dataLength;
	1738	tdps->exactMidBytes = (unsigned char)malloc(sizeof(unsigned char)4);
	1739	tdps->pwrErrBoundBytes = NULL;
	1740	tdps->isLossless = 0;
	1741	float value = oriData[0];
	1742	floatToBytes(tdps->exactMidBytes, value);
	1743	tdps->exactMidBytes_size = 4;
	1744
	1745	size_t tmpOutSize;
	1746	//unsigned char *tmpByteData;
	1747	convertTDPStoFlatBytes_float(tdps, newByteData, &tmpOutSize);
	1748
	1749	//newByteData = (unsigned char)malloc(sizeof(unsigned char)*12); //for floating-point data (1+3+4+4)
	1750	//memcpy(*newByteData, tmpByteData, 12);
	1751	*outSize = tmpOutSize; //8+SZ_SIZE_TYPE; //8==3+1+4(float_size)
	1752	free_TightDataPointStorageF(tdps);
	1753	}
	1754
	1755	int SZ_compress_args_float_wRngeNoGzip(unsigned char** newByteData, float *oriData,
	1756	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
	1757	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
	1758	{
	1759	int status = SZ_SCES;
	1760	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
	1761	float valueRangeSize = 0, medianValue = 0;
	1762
	1763	float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
	1764	float max = min+valueRangeSize;
	1765	double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1766
	1767	if(valueRangeSize <= realPrecision)
	1768	{
	1769	SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
	1770	}
	1771	else
	1772	{
	1773	// SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize);
	1774	if(r5==0&&r4==0&&r3==0&&r2==0)
	1775	{
	1776	if(errBoundMode>=PW_REL)
	1777	{
[9ee2ce3]	1778	SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
	1779	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
[2c47b73]	1780	}
	1781	else
	1782	SZ_compress_args_float_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1783	}
	1784	else if(r5==0&&r4==0&&r3==0)
	1785	{
	1786	if(errBoundMode>=PW_REL)
[9ee2ce3]	1787	SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r2, r1, outSize, min, max);
[2c47b73]	1788	else
	1789	SZ_compress_args_float_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1790	}
	1791	else if(r5==0&&r4==0)
	1792	{
	1793	if(errBoundMode>=PW_REL)
[9ee2ce3]	1794	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r3, r2, r1, outSize, min, max);
[2c47b73]	1795	else
	1796	SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1797	}
	1798	else if(r5==0)
	1799	{
	1800	if(errBoundMode>=PW_REL)
[9ee2ce3]	1801	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r4*r3, r2, r1, outSize, min, max);
[2c47b73]	1802	else
	1803	SZ_compress_args_float_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1804	}
	1805	}
	1806	return status;
	1807	}
	1808
	1809	int SZ_compress_args_float(unsigned char** newByteData, float *oriData,
	1810	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
	1811	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
	1812	{
	1813	confparams_cpr->errorBoundMode = errBoundMode;
	1814	if(errBoundMode==PW_REL)
	1815	{
	1816	confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
	1817	//confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
	1818	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE && r3 != 0 )
	1819	{
	1820	printf("Error: Current version doesn't support 3D data compression with point-wise relative error bound being based on pwrType=AVG\n");
	1821	exit(0);
	1822	return SZ_NSCS;
	1823	}
	1824	}
	1825	int status = SZ_SCES;
	1826	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
	1827
	1828	if(dataLength <= MIN_NUM_OF_ELEMENTS)
	1829	{
	1830	*newByteData = SZ_skip_compress_float(oriData, dataLength, outSize);
	1831	return status;
	1832	}
	1833
	1834	float valueRangeSize = 0, medianValue = 0;
	1835
	1836	float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
	1837	float max = min+valueRangeSize;
	1838	double realPrecision = 0;
	1839
	1840	if(confparams_cpr->errorBoundMode==PSNR)
	1841	{
	1842	confparams_cpr->errorBoundMode = ABS;
	1843	realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, (double)valueRangeSize);
	1844	//printf("realPrecision=%lf\n", realPrecision);
	1845	}
	1846	else
	1847	realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1848
	1849	if(valueRangeSize <= realPrecision)
	1850	{
	1851	SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
	1852	}
	1853	else
	1854	{
	1855	size_t tmpOutSize = 0;
	1856	unsigned char* tmpByteData;
	1857
	1858	if (r2==0)
	1859	{
	1860	if(confparams_cpr->errorBoundMode>=PW_REL)
	1861	{
[9ee2ce3]	1862	SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
	1863	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
[2c47b73]	1864	}
	1865	else
	1866	#ifdef HAVE_TIMECMPR
	1867	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1868	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1869	else
	1870	#endif
	1871	SZ_compress_args_float_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1872	}
	1873	else
	1874	if (r3==0)
	1875	{
	1876	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1877	SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1878	else
	1879	#ifdef HAVE_TIMECMPR
	1880	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1881	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1882	else
	1883	#endif
[9ee2ce3]	1884	{
	1885	if(sz_with_regression == SZ_NO_REGRESSION)
	1886	SZ_compress_args_float_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1887	else
	1888	tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
	1889	}
[2c47b73]	1890	}
	1891	else
	1892	if (r4==0)
	1893	{
	1894	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1895	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1896	else
	1897	#ifdef HAVE_TIMECMPR
	1898	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
[9ee2ce3]	1899	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
[2c47b73]	1900	else
	1901	#endif
[9ee2ce3]	1902	{
	1903	if(sz_with_regression == SZ_NO_REGRESSION)
	1904	SZ_compress_args_float_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1905	else
	1906	tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
	1907	}
[2c47b73]	1908	}
	1909	else
	1910	if (r5==0)
	1911	{
	1912	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1913	SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1914	//ToDO
	1915	//SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr(&tmpByteData, oriData, r4, r3, r2, r1, &tmpOutSize, min, max);
	1916	else
	1917	#ifdef HAVE_TIMECMPR
	1918	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1919	multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1920	else
	1921	#endif
[9ee2ce3]	1922	{
	1923	if(sz_with_regression == SZ_NO_REGRESSION)
	1924	SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1925	else
	1926	tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
	1927	}
[2c47b73]	1928	}
	1929	else
	1930	{
	1931	printf("Error: doesn't support 5 dimensions for now.\n");
	1932	status = SZ_DERR; //dimension error
	1933	}
	1934	//Call Gzip to do the further compression.
	1935	if(confparams_cpr->szMode==SZ_BEST_SPEED)
	1936	{
	1937	*outSize = tmpOutSize;
	1938	*newByteData = tmpByteData;
	1939	}
	1940	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION \|\| confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION)
	1941	{
[9ee2ce3]	1942	*outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
[2c47b73]	1943	free(tmpByteData);
	1944	}
	1945	else
	1946	{
	1947	printf("Error: Wrong setting of confparams_cpr->szMode in the float compression.\n");
	1948	status = SZ_MERR; //mode error
	1949	}
	1950	}
	1951
	1952	return status;
	1953	}
	1954
	1955
	1956	void computeReqLength_float(double realPrecision, short radExpo, int* reqLength, float* medianValue)
	1957	{
	1958	short reqExpo = getPrecisionReqLength_double(realPrecision);
	1959	*reqLength = 9+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength
	1960	if(*reqLength<9)
	1961	*reqLength = 9;
	1962	if(*reqLength>32)
	1963	{
	1964	*reqLength = 32;
	1965	*medianValue = 0;
	1966	}
	1967	}
	1968
	1969	//TODO
	1970	int SZ_compress_args_float_subblock(unsigned char* compressedBytes, float *oriData,
	1971	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
	1972	size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
	1973	size_t e5, size_t e4, size_t e3, size_t e2, size_t e1,
	1974	size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio)
	1975	{
	1976	int status = SZ_SCES;
	1977	float valueRangeSize = 0, medianValue = 0;
	1978	computeRangeSize_float_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1);
	1979
	1980	double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1981
	1982	if(valueRangeSize <= realPrecision)
	1983	{
	1984	//TODO
	1985	//SZ_compress_args_float_withinRange_subblock();
	1986	}
	1987	else
	1988	{
	1989	if (r2==0)
	1990	{
	1991	if(errBoundMode>=PW_REL)
	1992	{
	1993	//TODO
	1994	//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_subblock();
	1995	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1996	}
	1997	else
	1998	SZ_compress_args_float_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1);
	1999	}
	2000	else
	2001	if (r3==0)
	2002	{
	2003	//TODO
	2004	if(errBoundMode>=PW_REL)
	2005	{
	2006	//TODO
	2007	//SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_subblock();
	2008	printf ("Current subblock version does not support point-wise relative error bound.\n");
	2009	}
	2010	else
	2011	SZ_compress_args_float_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1);
	2012	}
	2013	else
	2014	if (r4==0)
	2015	{
	2016	if(errBoundMode>=PW_REL)
	2017	{
	2018	//TODO
	2019	//SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_subblock();
	2020	printf ("Current subblock version does not support point-wise relative error bound.\n");
	2021	}
	2022	else
	2023	SZ_compress_args_float_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1);
	2024	}
	2025	else
	2026	if (r5==0)
	2027	{
	2028	if(errBoundMode>=PW_REL)
	2029	{
	2030	//TODO
	2031	//SZ_compress_args_float_NoCkRngeNoGzip_4D_pwr_subblock();
	2032	printf ("Current subblock version does not support point-wise relative error bound.\n");
	2033	}
	2034	else
	2035	SZ_compress_args_float_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
	2036	}
	2037	else
	2038	{
	2039	printf("Error: doesn't support 5 dimensions for now.\n");
	2040	status = SZ_DERR; //dimension error
	2041	}
	2042	}
	2043	return status;
	2044	}
	2045
	2046	void SZ_compress_args_float_NoCkRnge_1D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
	2047	size_t r1, size_t s1, size_t e1)
	2048	{
	2049	TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r1, s1, e1);
	2050
	2051	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	2052	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
	2053	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	2054	{
	2055	unsigned char *tmpCompBytes;
	2056	size_t tmpOutSize;
	2057	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
	2058	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	2059	free(tmpCompBytes);
	2060	}
	2061	else
	2062	{
	2063	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	2064	}
	2065
	2066	//TODO
	2067	// if(outSize>dataLengthsizeof(float))
	2068	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	2069
	2070	free_TightDataPointStorageF(tdps);
	2071	}
	2072
	2073	void SZ_compress_args_float_NoCkRnge_2D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
	2074	size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1)
	2075	{
	2076	TightDataPointStorageF* tdps = SZ_compress_float_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r2, r1, s2, s1, e2, e1);
	2077
	2078	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	2079	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
	2080	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	2081	{
	2082	unsigned char *tmpCompBytes;
	2083	size_t tmpOutSize;
	2084	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
	2085	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	2086	free(tmpCompBytes);
	2087	}
	2088	else
	2089	{
	2090	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	2091	}
	2092
	2093	//TODO
	2094	// if(outSize>dataLengthsizeof(float))
	2095	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	2096
	2097	free_TightDataPointStorageF(tdps);
	2098	}
	2099
	2100	void SZ_compress_args_float_NoCkRnge_3D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
	2101	size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1)
	2102	{
	2103	TightDataPointStorageF* tdps = SZ_compress_float_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r3, r2, r1, s3, s2, s1, e3, e2, e1);
	2104
	2105	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	2106	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
	2107	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	2108	{
	2109	unsigned char *tmpCompBytes;
	2110	size_t tmpOutSize;
	2111	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
	2112	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	2113	free(tmpCompBytes);
	2114	}
	2115	else
	2116	{
	2117	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	2118	}
	2119
	2120	//TODO
	2121	// if(outSize>dataLengthsizeof(float))
	2122	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	2123
	2124	free_TightDataPointStorageF(tdps);
	2125	}
	2126
	2127	void SZ_compress_args_float_NoCkRnge_4D_subblock(unsigned char* compressedBytes, float oriData, double realPrecision, size_t outSize, float valueRangeSize, float medianValue_f,
	2128	size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1)
	2129	{
	2130	TightDataPointStorageF* tdps = SZ_compress_float_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_f, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
	2131
	2132	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	2133	convertTDPStoFlatBytes_float_args(tdps, compressedBytes, outSize);
	2134	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	2135	{
	2136	unsigned char *tmpCompBytes;
	2137	size_t tmpOutSize;
	2138	convertTDPStoFlatBytes_float(tdps, &tmpCompBytes, &tmpOutSize);
	2139	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	2140	free(tmpCompBytes);
	2141	}
	2142	else
	2143	{
	2144	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	2145	}
	2146
	2147	//TODO
	2148	// if(outSize>dataLengthsizeof(float))
	2149	// SZ_compress_args_float_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	2150
	2151	free_TightDataPointStorageF(tdps);
	2152
	2153	}
	2154
	2155	unsigned int optimize_intervals_float_1D_subblock(float *oriData, double realPrecision, size_t r1, size_t s1, size_t e1)
	2156	{
	2157	size_t dataLength = e1 - s1 + 1;
	2158	oriData = oriData + s1;
	2159
	2160	size_t i = 0;
	2161	unsigned long radiusIndex;
	2162	float pred_value = 0, pred_err;
	2163	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	2164	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	2165	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
	2166	for(i=2;i<dataLength;i++)
	2167	{
	2168	if(i%confparams_cpr->sampleDistance==0)
	2169	{
	2170	pred_value = 2*oriData[i-1] - oriData[i-2];
	2171	//pred_value = oriData[i-1];
	2172	pred_err = fabs(pred_value - oriData[i]);
	2173	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2174	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2175	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2176	intervals[radiusIndex]++;
	2177	}
	2178	}
	2179	//compute the appropriate number
	2180	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2181	size_t sum = 0;
	2182	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2183	{
	2184	sum += intervals[i];
	2185	if(sum>targetCount)
	2186	break;
	2187	}
	2188	if(i>=confparams_cpr->maxRangeRadius)
	2189	i = confparams_cpr->maxRangeRadius-1;
	2190
	2191	unsigned int accIntervals = 2*(i+1);
	2192	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2193
	2194	if(powerOf2<32)
	2195	powerOf2 = 32;
	2196
	2197	free(intervals);
	2198	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
	2199	return powerOf2;
	2200	}
	2201
	2202	unsigned int optimize_intervals_float_2D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
	2203	{
	2204	size_t R1 = e1 - s1 + 1;
	2205	size_t R2 = e2 - s2 + 1;
	2206
	2207	size_t i,j, index;
	2208	unsigned long radiusIndex;
	2209	float pred_value = 0, pred_err;
	2210	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	2211	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	2212	size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance;
	2213	for(i=s1+1;i<=e1;i++)
	2214	{
	2215	for(j=s2+1;j<=e2;j++)
	2216	{
	2217	if((i+j)%confparams_cpr->sampleDistance==0)
	2218	{
	2219	index = i*r2+j;
	2220	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
	2221	pred_err = fabs(pred_value - oriData[index]);
	2222	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2223	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2224	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2225	intervals[radiusIndex]++;
	2226	}
	2227	}
	2228	}
	2229	//compute the appropriate number
	2230	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2231	size_t sum = 0;
	2232	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2233	{
	2234	sum += intervals[i];
	2235	if(sum>targetCount)
	2236	break;
	2237	}
	2238	if(i>=confparams_cpr->maxRangeRadius)
	2239	i = confparams_cpr->maxRangeRadius-1;
	2240	unsigned int accIntervals = 2*(i+1);
	2241	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2242
	2243	if(powerOf2<32)
	2244	powerOf2 = 32;
	2245
	2246	free(intervals);
	2247	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
	2248	return powerOf2;
	2249	}
	2250
	2251	unsigned int optimize_intervals_float_3D_subblock(float *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
	2252	{
	2253	size_t R1 = e1 - s1 + 1;
	2254	size_t R2 = e2 - s2 + 1;
	2255	size_t R3 = e3 - s3 + 1;
	2256
	2257	size_t r23 = r2*r3;
	2258
	2259	size_t i,j,k, index;
	2260	unsigned long radiusIndex;
	2261	float pred_value = 0, pred_err;
	2262	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	2263	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	2264	size_t totalSampleSize = R1R2R3/confparams_cpr->sampleDistance;
	2265	for(i=s1+1;i<=e1;i++)
	2266	{
	2267	for(j=s2+1;j<=e2;j++)
	2268	{
	2269	for(k=s3+1;k<=e3;k++)
	2270	{
	2271	if((i+j+k)%confparams_cpr->sampleDistance==0)
	2272	{
	2273	index = ir23+jr3+k;
	2274	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
	2275	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
	2276	pred_err = fabs(pred_value - oriData[index]);
	2277	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2278	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2279	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2280	intervals[radiusIndex]++;
	2281	}
	2282	}
	2283	}
	2284	}
	2285	//compute the appropriate number
	2286	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2287	size_t sum = 0;
	2288	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2289	{
	2290	sum += intervals[i];
	2291	if(sum>targetCount)
	2292	break;
	2293	}
	2294	if(i>=confparams_cpr->maxRangeRadius)
	2295	i = confparams_cpr->maxRangeRadius-1;
	2296	unsigned int accIntervals = 2*(i+1);
	2297	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2298
	2299	if(powerOf2<32)
	2300	powerOf2 = 32;
	2301
	2302	free(intervals);
	2303	return powerOf2;
	2304	}
	2305
	2306	unsigned int optimize_intervals_float_4D_subblock(float *oriData, double realPrecision,
	2307	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
	2308	{
	2309	size_t R1 = e1 - s1 + 1;
	2310	size_t R2 = e2 - s2 + 1;
	2311	size_t R3 = e3 - s3 + 1;
	2312	size_t R4 = e4 - s4 + 1;
	2313
	2314	size_t r34 = r3*r4;
	2315	size_t r234 = r2r3r4;
	2316
	2317	size_t i,j,k,l, index;
	2318	unsigned long radiusIndex;
	2319	float pred_value = 0, pred_err;
	2320	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	2321	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	2322	size_t totalSampleSize = R1R2R3*R4/confparams_cpr->sampleDistance;
	2323	for(i=s1+1;i<=e1;i++)
	2324	{
	2325	for(j=s2+1;j<=e2;j++)
	2326	{
	2327	for(k=s3+1;k<=e3;k++)
	2328	{
	2329	for (l=s4+1;l<=e4;l++)
	2330	{
	2331	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
	2332	{
	2333	index = ir234+jr34+k*r4+l;
	2334	pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34]
	2335	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
	2336	pred_err = fabs(pred_value - oriData[index]);
	2337	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2338	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2339	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2340	intervals[radiusIndex]++;
	2341	}
	2342	}
	2343	}
	2344	}
	2345	}
	2346	//compute the appropriate number
	2347	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2348	size_t sum = 0;
	2349	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2350	{
	2351	sum += intervals[i];
	2352	if(sum>targetCount)
	2353	break;
	2354	}
	2355	if(i>=confparams_cpr->maxRangeRadius)
	2356	i = confparams_cpr->maxRangeRadius-1;
	2357
	2358	unsigned int accIntervals = 2*(i+1);
	2359	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2360
	2361	if(powerOf2<32)
	2362	powerOf2 = 32;
	2363
	2364	free(intervals);
	2365	return powerOf2;
	2366	}
	2367
	2368	TightDataPointStorageF* SZ_compress_float_1D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
	2369	size_t r1, size_t s1, size_t e1)
	2370	{
	2371	size_t dataLength = e1 - s1 + 1;
	2372	unsigned int quantization_intervals;
	2373	if(exe_params->optQuantMode==1)
	2374	quantization_intervals = optimize_intervals_float_1D_subblock(oriData, realPrecision, r1, s1, e1);
	2375	else
	2376	quantization_intervals = exe_params->intvCapacity;
	2377	updateQuantizationInfo(quantization_intervals);
	2378
	2379	size_t i;
	2380	int reqLength;
	2381	float medianValue = medianValue_f;
	2382	short radExpo = getExponent_float(valueRangeSize/2);
	2383
	2384	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	2385
	2386	int* type = (int) malloc(dataLengthsizeof(int));
	2387
	2388	float* spaceFillingValue = oriData + s1;
	2389
	2390	DynamicIntArray *exactLeadNumArray;
	2391	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2392
	2393	DynamicByteArray *exactMidByteArray;
	2394	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2395
	2396	DynamicIntArray *resiBitArray;
	2397	new_DIA(&resiBitArray, DynArrayInitLen);
	2398
	2399	type[0] = 0;
	2400
	2401	unsigned char preDataBytes[4];
	2402	intToBytes_bigEndian(preDataBytes, 0);
	2403
	2404	int reqBytesLength = reqLength/8;
	2405	int resiBitsLength = reqLength%8;
	2406	float last3CmprsData[3] = {0};
	2407
	2408	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	2409	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2410
	2411	//add the first data
	2412	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2413	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2414	memcpy(preDataBytes,vce->curBytes,4);
	2415	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2416	listAdd_float(last3CmprsData, vce->data);
	2417
	2418	//add the second data
	2419	type[1] = 0;
	2420	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2421	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2422	memcpy(preDataBytes,vce->curBytes,4);
	2423	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2424	listAdd_float(last3CmprsData, vce->data);
	2425
	2426	int state;
	2427	double checkRadius;
	2428	float curData;
	2429	float pred;
	2430	float predAbsErr;
	2431	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
	2432	double interval = 2*realPrecision;
	2433
	2434	for(i=2;i<dataLength;i++)
	2435	{
	2436	curData = spaceFillingValue[i];
	2437	pred = 2*last3CmprsData[0] - last3CmprsData[1];
	2438	predAbsErr = fabs(curData - pred);
	2439	if(predAbsErr<=checkRadius)
	2440	{
	2441	state = (predAbsErr/realPrecision+1)/2;
	2442	if(curData>=pred)
	2443	{
	2444	type[i] = exe_params->intvRadius+state;
	2445	pred = pred + state*interval;
	2446	}
	2447	else
	2448	{
	2449	type[i] = exe_params->intvRadius-state;
	2450	pred = pred - state*interval;
	2451	}
	2452
	2453	listAdd_float(last3CmprsData, pred);
	2454	continue;
	2455	}
	2456
	2457	//unpredictable data processing
	2458	type[i] = 0;
	2459	compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2460	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2461	memcpy(preDataBytes,vce->curBytes,4);
	2462	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2463
	2464	listAdd_float(last3CmprsData, vce->data);
	2465	}
	2466
	2467	size_t exactDataNum = exactLeadNumArray->size;
	2468
	2469	TightDataPointStorageF* tdps;
	2470
	2471	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	2472	type, exactMidByteArray->array, exactMidByteArray->size,
	2473	exactLeadNumArray->array,
	2474	resiBitArray->array, resiBitArray->size,
	2475	resiBitsLength,
	2476	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2477
	2478	//free memory
	2479	free_DIA(exactLeadNumArray);
	2480	free_DIA(resiBitArray);
	2481	free(type);
	2482	free(vce);
	2483	free(lce);
	2484	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2485
	2486	return tdps;
	2487	}
	2488
	2489	TightDataPointStorageF* SZ_compress_float_2D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
	2490	size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
	2491	{
	2492	unsigned int quantization_intervals;
	2493	if(exe_params->optQuantMode==1)
	2494	{
	2495	quantization_intervals = optimize_intervals_float_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2);
	2496	updateQuantizationInfo(quantization_intervals);
	2497	}
	2498	else
	2499	quantization_intervals = exe_params->intvCapacity;
	2500
	2501	size_t i,j;
	2502	int reqLength;
	2503	float pred1D, pred2D;
	2504	float diff = 0.0;
	2505	double itvNum = 0;
	2506	float P0, P1;
	2507
	2508	size_t R1 = e1 - s1 + 1;
	2509	size_t R2 = e2 - s2 + 1;
	2510	size_t dataLength = R1*R2;
	2511
	2512	P0 = (float)malloc(R2sizeof(float));
	2513	memset(P0, 0, R2*sizeof(float));
	2514	P1 = (float)malloc(R2sizeof(float));
	2515	memset(P1, 0, R2*sizeof(float));
	2516
	2517	float medianValue = medianValue_f;
	2518	short radExpo = getExponent_float(valueRangeSize/2);
	2519	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	2520
	2521	int* type = (int) malloc(dataLengthsizeof(int));
	2522
	2523	float* spaceFillingValue = oriData; //
	2524
	2525	DynamicIntArray *exactLeadNumArray;
	2526	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2527
	2528	DynamicByteArray *exactMidByteArray;
	2529	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2530
	2531	DynamicIntArray *resiBitArray;
	2532	new_DIA(&resiBitArray, DynArrayInitLen);
	2533
	2534	unsigned char preDataBytes[4];
	2535	intToBytes_bigEndian(preDataBytes, 0);
	2536
	2537	int reqBytesLength = reqLength/8;
	2538	int resiBitsLength = reqLength%8;
	2539
	2540	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	2541	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2542
	2543	/* Process Row-s1 data s2*/
	2544	size_t gIndex;
	2545	size_t lIndex;
	2546
	2547	gIndex = s1*r2+s2;
	2548	lIndex = 0;
	2549
	2550	type[lIndex] = 0;
	2551	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2552	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2553	memcpy(preDataBytes,vce->curBytes,4);
	2554	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2555	P1[0] = vce->data;
	2556
	2557	/* Process Row-s1 data s2+1*/
	2558	gIndex = s1*r2+(s2+1);
	2559	lIndex = 1;
	2560
	2561	pred1D = P1[0];
	2562	diff = spaceFillingValue[gIndex] - pred1D;
	2563
	2564	itvNum = fabs(diff)/realPrecision + 1;
	2565
	2566	if (itvNum < exe_params->intvCapacity)
	2567	{
	2568	if (diff < 0) itvNum = -itvNum;
	2569	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2570	P1[1] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2571	}
	2572	else
	2573	{
	2574	type[lIndex] = 0;
	2575	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2576	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2577	memcpy(preDataBytes,vce->curBytes,4);
	2578	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2579	P1[1] = vce->data;
	2580	}
	2581
	2582	/* Process Row-s1 data s2+2 --> data e2 */
	2583	for (j = 2; j < R2; j++)
	2584	{
	2585	gIndex = s1*r2+(s2+j);
	2586	lIndex = j;
	2587
	2588	pred1D = 2*P1[j-1] - P1[j-2];
	2589	diff = spaceFillingValue[gIndex] - pred1D;
	2590
	2591	itvNum = fabs(diff)/realPrecision + 1;
	2592
	2593	if (itvNum < exe_params->intvCapacity)
	2594	{
	2595	if (diff < 0) itvNum = -itvNum;
	2596	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2597	P1[j] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2598	}
	2599	else
	2600	{
	2601	type[lIndex] = 0;
	2602	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2603	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2604	memcpy(preDataBytes,vce->curBytes,4);
	2605	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2606	P1[j] = vce->data;
	2607	}
	2608	}
	2609
	2610	/* Process Row-s1+1 --> Row-e1 */
	2611	for (i = 1; i < R1; i++)
	2612	{
	2613	/* Process row-s1+i data s2 */
	2614	gIndex = (s1+i)*r2+s2;
	2615	lIndex = i*R2;
	2616
	2617	pred1D = P1[0];
	2618	diff = spaceFillingValue[gIndex] - pred1D;
	2619
	2620	itvNum = fabs(diff)/realPrecision + 1;
	2621
	2622	if (itvNum < exe_params->intvCapacity)
	2623	{
	2624	if (diff < 0) itvNum = -itvNum;
	2625	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2626	P0[0] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2627	}
	2628	else
	2629	{
	2630	type[lIndex] = 0;
	2631	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2632	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2633	memcpy(preDataBytes,vce->curBytes,4);
	2634	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2635	P0[0] = vce->data;
	2636	}
	2637
	2638	/* Process row-s1+i data s2+1 --> e2 */
	2639	for (j = 1; j < R2; j++)
	2640	{
	2641	gIndex = (s1+i)*r2+(s2+j);
	2642	lIndex = i*R2+j;
	2643
	2644	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
	2645
	2646	pred2D = P0[j-1] + P1[j] - P1[j-1];
	2647
	2648	diff = spaceFillingValue[gIndex] - pred2D;
	2649
	2650	itvNum = fabs(diff)/realPrecision + 1;
	2651
	2652	if (itvNum < exe_params->intvCapacity)
	2653	{
	2654	if (diff < 0) itvNum = -itvNum;
	2655	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2656	P0[j] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2657	}
	2658	else
	2659	{
	2660	type[lIndex] = 0;
	2661	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2662	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2663	memcpy(preDataBytes,vce->curBytes,4);
	2664	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2665	P0[j] = vce->data;
	2666	}
	2667	}
	2668
	2669	float *Pt;
	2670	Pt = P1;
	2671	P1 = P0;
	2672	P0 = Pt;
	2673	}
	2674
	2675	free(P0);
	2676	free(P1);
	2677	size_t exactDataNum = exactLeadNumArray->size;
	2678
	2679	TightDataPointStorageF* tdps;
	2680
	2681	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	2682	type, exactMidByteArray->array, exactMidByteArray->size,
	2683	exactLeadNumArray->array,
	2684	resiBitArray->array, resiBitArray->size,
	2685	resiBitsLength,
	2686	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2687
	2688	//free memory
	2689	free_DIA(exactLeadNumArray);
	2690	free_DIA(resiBitArray);
	2691	free(type);
	2692	free(vce);
	2693	free(lce);
	2694	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2695
	2696	return tdps;
	2697	}
	2698
	2699	TightDataPointStorageF* SZ_compress_float_3D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
	2700	size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
	2701	{
	2702	unsigned int quantization_intervals;
	2703	if(exe_params->optQuantMode==1)
	2704	{
	2705	quantization_intervals = optimize_intervals_float_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3);
	2706	updateQuantizationInfo(quantization_intervals);
	2707	}
	2708	else
	2709	quantization_intervals = exe_params->intvCapacity;
	2710
	2711	size_t i,j,k;
	2712	int reqLength;
	2713	float pred1D, pred2D, pred3D;
	2714	float diff = 0.0;
	2715	double itvNum = 0;
	2716	float P0, P1;
	2717
	2718	size_t R1 = e1 - s1 + 1;
	2719	size_t R2 = e2 - s2 + 1;
	2720	size_t R3 = e3 - s3 + 1;
	2721	size_t dataLength = R1R2R3;
	2722
	2723	size_t r23 = r2*r3;
	2724	size_t R23 = R2*R3;
	2725
	2726	P0 = (float)malloc(R23sizeof(float));
	2727	P1 = (float)malloc(R23sizeof(float));
	2728
	2729	float medianValue = medianValue_f;
	2730	short radExpo = getExponent_float(valueRangeSize/2);
	2731	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	2732
	2733	int* type = (int) malloc(dataLengthsizeof(int));
	2734	//type[dataLength]=0;
	2735
	2736	float* spaceFillingValue = oriData; //
	2737
	2738	DynamicIntArray *exactLeadNumArray;
	2739	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2740
	2741	DynamicByteArray *exactMidByteArray;
	2742	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2743
	2744	DynamicIntArray *resiBitArray;
	2745	new_DIA(&resiBitArray, DynArrayInitLen);
	2746
	2747	unsigned char preDataBytes[4];
	2748	intToBytes_bigEndian(preDataBytes, 0);
	2749
	2750	int reqBytesLength = reqLength/8;
	2751	int resiBitsLength = reqLength%8;
	2752
	2753	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	2754	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2755
	2756
	2757	/////////////////////////// Process layer-s1 ///////////////////////////
	2758	/* Process Row-s2 data s3*/
	2759	size_t gIndex; //global index
	2760	size_t lIndex; //local index
	2761	size_t index2D; //local 2D index
	2762
	2763	gIndex = s1r23+s2r3+s3;
	2764	lIndex = 0;
	2765	index2D = 0;
	2766
	2767	type[lIndex] = 0;
	2768	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2769	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2770	memcpy(preDataBytes,vce->curBytes,4);
	2771	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2772	P1[index2D] = vce->data;
	2773
	2774	/* Process Row-s2 data s3+1*/
	2775	gIndex = s1r23+s2r3+s3+1;
	2776	lIndex = 1;
	2777	index2D = 1;
	2778
	2779	pred1D = P1[index2D-1];
	2780	diff = spaceFillingValue[gIndex] - pred1D;
	2781
	2782	itvNum = fabs(diff)/realPrecision + 1;
	2783
	2784	if (itvNum < exe_params->intvCapacity)
	2785	{
	2786	if (diff < 0) itvNum = -itvNum;
	2787	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2788	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2789	}
	2790	else
	2791	{
	2792	type[lIndex] = 0;
	2793	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2794	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2795	memcpy(preDataBytes,vce->curBytes,4);
	2796	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2797	P1[index2D] = vce->data;
	2798	}
	2799
	2800	/* Process Row-s2 data s3+2 --> data e3 */
	2801	for (j = 2; j < R3; j++)
	2802	{
	2803	gIndex = s1r23+s2r3+s3+j;
	2804	lIndex = j;
	2805	index2D = j;
	2806
	2807	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	2808	diff = spaceFillingValue[gIndex] - pred1D;
	2809
	2810	itvNum = fabs(diff)/realPrecision + 1;
	2811
	2812	if (itvNum < exe_params->intvCapacity)
	2813	{
	2814	if (diff < 0) itvNum = -itvNum;
	2815	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2816	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2817	}
	2818	else
	2819	{
	2820	type[lIndex] = 0;
	2821	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2822	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2823	memcpy(preDataBytes,vce->curBytes,4);
	2824	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2825	P1[index2D] = vce->data;
	2826	}
	2827	}
	2828
	2829	/* Process Row-s2+1 --> Row-e2 */
	2830	for (i = 1; i < R2; i++)
	2831	{
	2832	/* Process row-s2+i data s3 */
	2833	gIndex = s1r23+(s2+i)r3+s3;
	2834	lIndex = i*R3;
	2835	index2D = i*R3;
	2836
	2837	pred1D = P1[index2D-R3];
	2838	diff = spaceFillingValue[gIndex] - pred1D;
	2839
	2840	itvNum = fabs(diff)/realPrecision + 1;
	2841
	2842	if (itvNum < exe_params->intvCapacity)
	2843	{
	2844	if (diff < 0) itvNum = -itvNum;
	2845	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2846	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2847	}
	2848	else
	2849	{
	2850	type[lIndex] = 0;
	2851	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2852	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2853	memcpy(preDataBytes,vce->curBytes,4);
	2854	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2855	P1[index2D] = vce->data;
	2856	}
	2857
	2858	/* Process row-s2+i data s3+1 --> data e3*/
	2859	for (j = 1; j < R3; j++)
	2860	{
	2861	gIndex = s1r23+(s2+i)r3+s3+j;
	2862	lIndex = i*R3+j;
	2863	index2D = i*R3+j;
	2864
	2865	pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1];
	2866	diff = spaceFillingValue[gIndex] - pred2D;
	2867
	2868	itvNum = fabs(diff)/realPrecision + 1;
	2869
	2870	if (itvNum < exe_params->intvCapacity)
	2871	{
	2872	if (diff < 0) itvNum = -itvNum;
	2873	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2874	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2875	}
	2876	else
	2877	{
	2878	type[lIndex] = 0;
	2879	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2880	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2881	memcpy(preDataBytes,vce->curBytes,4);
	2882	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2883	P1[index2D] = vce->data;
	2884	}
	2885	}
	2886	}
	2887
	2888
	2889	/////////////////////////// Process layer-s1+1 --> layer-e1 ///////////////////////////
	2890
	2891	for (k = 1; k < R1; k++)
	2892	{
	2893	/* Process Row-s2 data s3*/
	2894	gIndex = (s1+k)r23+s2r3+s3;
	2895	lIndex = k*R23;
	2896	index2D = 0;
	2897
	2898	pred1D = P1[index2D];
	2899	diff = spaceFillingValue[gIndex] - pred1D;
	2900
	2901	itvNum = fabs(diff)/realPrecision + 1;
	2902
	2903	if (itvNum < exe_params->intvCapacity)
	2904	{
	2905	if (diff < 0) itvNum = -itvNum;
	2906	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2907	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2908	}
	2909	else
	2910	{
	2911	type[lIndex] = 0;
	2912	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2913	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2914	memcpy(preDataBytes,vce->curBytes,4);
	2915	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2916	P0[index2D] = vce->data;
	2917	}
	2918
	2919	/* Process Row-s2 data s3+1 --> data e3 */
	2920	for (j = 1; j < R3; j++)
	2921	{
	2922	gIndex = (s1+k)r23+s2r3+s3+j;
	2923	lIndex = k*R23+j;
	2924	index2D = j;
	2925
	2926	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	2927	diff = spaceFillingValue[gIndex] - pred2D;
	2928
	2929	itvNum = fabs(diff)/realPrecision + 1;
	2930
	2931	if (itvNum < exe_params->intvCapacity)
	2932	{
	2933	if (diff < 0) itvNum = -itvNum;
	2934	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2935	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2936	}
	2937	else
	2938	{
	2939	type[lIndex] = 0;
	2940	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2941	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2942	memcpy(preDataBytes,vce->curBytes,4);
	2943	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2944	P0[index2D] = vce->data;
	2945	}
	2946	}
	2947
	2948	/* Process Row-s2+1 --> Row-e2 */
	2949	for (i = 1; i < R2; i++)
	2950	{
	2951	/* Process Row-s2+i data s3 */
	2952	gIndex = (s1+k)r23+(s2+i)r3+s3;
	2953	lIndex = kR23+iR3;
	2954	index2D = i*R3;
	2955
	2956	pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3];
	2957	diff = spaceFillingValue[gIndex] - pred2D;
	2958
	2959	itvNum = fabs(diff)/realPrecision + 1;
	2960
	2961	if (itvNum < exe_params->intvCapacity)
	2962	{
	2963	if (diff < 0) itvNum = -itvNum;
	2964	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2965	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2966	}
	2967	else
	2968	{
	2969	type[lIndex] = 0;
	2970	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2971	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2972	memcpy(preDataBytes,vce->curBytes,4);
	2973	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2974	P0[index2D] = vce->data;
	2975	}
	2976
	2977	/* Process Row-s2+i data s3+1 --> data e3 */
	2978	for (j = 1; j < R3; j++)
	2979	{
	2980	gIndex = (s1+k)r23+(s2+i)r3+s3+j;
	2981	lIndex = kR23+iR3+j;
	2982	index2D = i*R3+j;
	2983
	2984	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
	2985
	2986	pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1];
	2987	diff = spaceFillingValue[gIndex] - pred3D;
	2988
	2989	itvNum = fabs(diff)/realPrecision + 1;
	2990
	2991	if (itvNum < exe_params->intvCapacity)
	2992	{
	2993	if (diff < 0) itvNum = -itvNum;
	2994	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2995	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2996	}
	2997	else
	2998	{
	2999	type[lIndex] = 0;
	3000	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3001	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3002	memcpy(preDataBytes,vce->curBytes,4);
	3003	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3004	P0[index2D] = vce->data;
	3005	}
	3006	}
	3007	}
	3008
	3009	float *Pt;
	3010	Pt = P1;
	3011	P1 = P0;
	3012	P0 = Pt;
	3013	}
	3014
	3015	free(P0);
	3016	free(P1);
	3017	size_t exactDataNum = exactLeadNumArray->size;
	3018
	3019	TightDataPointStorageF* tdps;
	3020
	3021	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	3022	type, exactMidByteArray->array, exactMidByteArray->size,
	3023	exactLeadNumArray->array,
	3024	resiBitArray->array, resiBitArray->size,
	3025	resiBitsLength,
	3026	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	3027
	3028	//free memory
	3029	free_DIA(exactLeadNumArray);
	3030	free_DIA(resiBitArray);
	3031	free(type);
	3032	free(vce);
	3033	free(lce);
	3034	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	3035
	3036	return tdps;
	3037	}
	3038
	3039	TightDataPointStorageF* SZ_compress_float_4D_MDQ_subblock(float *oriData, double realPrecision, float valueRangeSize, float medianValue_f,
	3040	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
	3041	{
	3042	unsigned int quantization_intervals;
	3043	if(exe_params->optQuantMode==1)
	3044	{
	3045	quantization_intervals = optimize_intervals_float_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4);
	3046	updateQuantizationInfo(quantization_intervals);
	3047	}
	3048	else
	3049	quantization_intervals = exe_params->intvCapacity;
	3050
	3051	size_t i,j,k;
	3052	int reqLength;
	3053	float pred1D, pred2D, pred3D;
	3054	float diff = 0.0;
	3055	double itvNum = 0;
	3056	float P0, P1;
	3057
	3058	size_t R1 = e1 - s1 + 1;
	3059	size_t R2 = e2 - s2 + 1;
	3060	size_t R3 = e3 - s3 + 1;
	3061	size_t R4 = e4 - s4 + 1;
	3062
	3063	size_t dataLength = R1R2R3*R4;
	3064
	3065	size_t r34 = r3*r4;
	3066	size_t r234 = r2r3r4;
	3067	size_t R34 = R3*R4;
	3068	size_t R234 = R2R3R4;
	3069
	3070	P0 = (float)malloc(R34sizeof(float));
	3071	P1 = (float)malloc(R34sizeof(float));
	3072
	3073	float medianValue = medianValue_f;
	3074	short radExpo = getExponent_float(valueRangeSize/2);
	3075	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
	3076
	3077	int* type = (int) malloc(dataLengthsizeof(int));
	3078
	3079	float* spaceFillingValue = oriData; //
	3080
	3081	DynamicIntArray *exactLeadNumArray;
	3082	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	3083
	3084	DynamicByteArray *exactMidByteArray;
	3085	new_DBA(&exactMidByteArray, DynArrayInitLen);
	3086
	3087	DynamicIntArray *resiBitArray;
	3088	new_DIA(&resiBitArray, DynArrayInitLen);
	3089
	3090	unsigned char preDataBytes[4];
	3091	intToBytes_bigEndian(preDataBytes, 0);
	3092
	3093	int reqBytesLength = reqLength/8;
	3094	int resiBitsLength = reqLength%8;
	3095
	3096	FloatValueCompressElement vce = (FloatValueCompressElement)malloc(sizeof(FloatValueCompressElement));
	3097	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	3098
	3099
	3100	size_t l;
	3101	for (l = 0; l < R1; l++)
	3102	{
	3103
	3104	/////////////////////////// Process layer-s2 ///////////////////////////
	3105	/* Process Row-s3 data s4*/
	3106	size_t gIndex; //global index
	3107	size_t lIndex; //local index
	3108	size_t index2D; //local 2D index
	3109
	3110	gIndex = (s1+l)r234+s2r34+s3*r4+s4;
	3111	lIndex = l*R234;
	3112	index2D = 0;
	3113
	3114	type[lIndex] = 0;
	3115	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3116	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3117	memcpy(preDataBytes,vce->curBytes,4);
	3118	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3119	P1[index2D] = vce->data;
	3120
	3121	/* Process Row-s3 data s4+1*/
	3122	gIndex = (s1+l)r234+s2r34+s3*r4+s4+1;
	3123	lIndex = l*R234+1;
	3124	index2D = 1;
	3125
	3126	pred1D = P1[index2D-1];
	3127	diff = spaceFillingValue[gIndex] - pred1D;
	3128
	3129	itvNum = fabs(diff)/realPrecision + 1;
	3130
	3131	if (itvNum < exe_params->intvCapacity)
	3132	{
	3133	if (diff < 0) itvNum = -itvNum;
	3134	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3135	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3136	}
	3137	else
	3138	{
	3139	type[lIndex] = 0;
	3140	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3141	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3142	memcpy(preDataBytes,vce->curBytes,4);
	3143	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3144	P1[index2D] = vce->data;
	3145	}
	3146
	3147	/* Process Row-s3 data s4+2 --> data e4 */
	3148	for (j = 2; j < R4; j++)
	3149	{
	3150	gIndex = (s1+l)r234+s2r34+s3*r4+s4+j;
	3151	lIndex = l*R234+j;
	3152	index2D = j;
	3153
	3154	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	3155	diff = spaceFillingValue[gIndex] - pred1D;
	3156
	3157	itvNum = fabs(diff)/realPrecision + 1;
	3158
	3159	if (itvNum < exe_params->intvCapacity)
	3160	{
	3161	if (diff < 0) itvNum = -itvNum;
	3162	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3163	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3164	}
	3165	else
	3166	{
	3167	type[lIndex] = 0;
	3168	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3169	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3170	memcpy(preDataBytes,vce->curBytes,4);
	3171	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3172	P1[index2D] = vce->data;
	3173	}
	3174	}
	3175
	3176	/* Process Row-s3+1 --> Row-e3 */
	3177	for (i = 1; i < R3; i++)
	3178	{
	3179	/* Process row-s2+i data s3 */
	3180	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4;
	3181	lIndex = lR234+iR4;
	3182	index2D = i*R4;
	3183
	3184	pred1D = P1[index2D-R4];
	3185	diff = spaceFillingValue[gIndex] - pred1D;
	3186
	3187	itvNum = fabs(diff)/realPrecision + 1;
	3188
	3189	if (itvNum < exe_params->intvCapacity)
	3190	{
	3191	if (diff < 0) itvNum = -itvNum;
	3192	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3193	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3194	}
	3195	else
	3196	{
	3197	type[lIndex] = 0;
	3198	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3199	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3200	memcpy(preDataBytes,vce->curBytes,4);
	3201	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3202	P1[index2D] = vce->data;
	3203	}
	3204
	3205	/* Process row-s3+i data s4+1 --> data e4*/
	3206	for (j = 1; j < R4; j++)
	3207	{
	3208	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4+j;
	3209	lIndex = lR234+iR4+j;
	3210	index2D = i*R4+j;
	3211
	3212	pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1];
	3213	diff = spaceFillingValue[gIndex] - pred2D;
	3214
	3215	itvNum = fabs(diff)/realPrecision + 1;
	3216
	3217	if (itvNum < exe_params->intvCapacity)
	3218	{
	3219	if (diff < 0) itvNum = -itvNum;
	3220	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3221	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3222	}
	3223	else
	3224	{
	3225	type[lIndex] = 0;
	3226	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3227	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3228	memcpy(preDataBytes,vce->curBytes,4);
	3229	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3230	P1[index2D] = vce->data;
	3231	}
	3232	}
	3233	}
	3234
	3235
	3236	/////////////////////////// Process layer-s2+1 --> layer-e2 ///////////////////////////
	3237
	3238	for (k = 1; k < R2; k++)
	3239	{
	3240	/* Process Row-s3 data s4*/
	3241	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4;
	3242	lIndex = lR234+kR34;
	3243	index2D = 0;
	3244
	3245	pred1D = P1[index2D];
	3246	diff = spaceFillingValue[gIndex] - pred1D;
	3247
	3248	itvNum = fabs(diff)/realPrecision + 1;
	3249
	3250	if (itvNum < exe_params->intvCapacity)
	3251	{
	3252	if (diff < 0) itvNum = -itvNum;
	3253	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3254	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3255	}
	3256	else
	3257	{
	3258	type[lIndex] = 0;
	3259	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3260	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3261	memcpy(preDataBytes,vce->curBytes,4);
	3262	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3263	P0[index2D] = vce->data;
	3264	}
	3265
	3266	/* Process Row-s3 data s4+1 --> data e4 */
	3267	for (j = 1; j < R4; j++)
	3268	{
	3269	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4+j;
	3270	lIndex = lR234+kR34+j;
	3271	index2D = j;
	3272
	3273	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	3274	diff = spaceFillingValue[gIndex] - pred2D;
	3275
	3276	itvNum = fabs(diff)/realPrecision + 1;
	3277
	3278	if (itvNum < exe_params->intvCapacity)
	3279	{
	3280	if (diff < 0) itvNum = -itvNum;
	3281	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3282	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3283	}
	3284	else
	3285	{
	3286	type[lIndex] = 0;
	3287	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3288	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3289	memcpy(preDataBytes,vce->curBytes,4);
	3290	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3291	P0[index2D] = vce->data;
	3292	}
	3293	}
	3294
	3295	/* Process Row-s3+1 --> Row-e3 */
	3296	for (i = 1; i < R3; i++)
	3297	{
	3298	/* Process Row-s3+i data s4 */
	3299	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4;
	3300	lIndex = lR234+kR34+i*R4;
	3301	index2D = i*R4;
	3302
	3303	pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4];
	3304	diff = spaceFillingValue[gIndex] - pred2D;
	3305
	3306	itvNum = fabs(diff)/realPrecision + 1;
	3307
	3308	if (itvNum < exe_params->intvCapacity)
	3309	{
	3310	if (diff < 0) itvNum = -itvNum;
	3311	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3312	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3313	}
	3314	else
	3315	{
	3316	type[lIndex] = 0;
	3317	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3318	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3319	memcpy(preDataBytes,vce->curBytes,4);
	3320	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3321	P0[index2D] = vce->data;
	3322	}
	3323
	3324	/* Process Row-s3+i data s4+1 --> data e4 */
	3325	for (j = 1; j < R4; j++)
	3326	{
	3327	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4+j;
	3328	lIndex = lR234+kR34+i*R4+j;
	3329	index2D = i*R4+j;
	3330
	3331	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
	3332
	3333	pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1];
	3334	diff = spaceFillingValue[gIndex] - pred3D;
	3335
	3336	itvNum = fabs(diff)/realPrecision + 1;
	3337
	3338	if (itvNum < exe_params->intvCapacity)
	3339	{
	3340	if (diff < 0) itvNum = -itvNum;
	3341	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3342	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3343	}
	3344	else
	3345	{
	3346	type[lIndex] = 0;
	3347	compressSingleFloatValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3348	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3349	memcpy(preDataBytes,vce->curBytes,4);
	3350	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3351	P0[index2D] = vce->data;
	3352	}
	3353	}
	3354	}
	3355
	3356	float *Pt;
	3357	Pt = P1;
	3358	P1 = P0;
	3359	P0 = Pt;
	3360	}
	3361
	3362	}
	3363
	3364	free(P0);
	3365	free(P1);
	3366	size_t exactDataNum = exactLeadNumArray->size;
	3367
	3368	TightDataPointStorageF* tdps;
	3369
	3370	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
	3371	type, exactMidByteArray->array, exactMidByteArray->size,
	3372	exactLeadNumArray->array,
	3373	resiBitArray->array, resiBitArray->size,
	3374	resiBitsLength,
	3375	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	3376
	3377	//free memory
	3378	free_DIA(exactLeadNumArray);
	3379	free_DIA(resiBitArray);
	3380	free(type);
	3381	free(vce);
	3382	free(lce);
	3383	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	3384
	3385	return tdps;
	3386	}
	3387
	3388	unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
	3389	{
	3390	size_t i;
	3391	size_t radiusIndex;
	3392	size_t r23=r2*r3;
	3393	float pred_value = 0, pred_err;
	3394	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3395	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
[9ee2ce3]	3396	size_t totalSampleSize = 0;
[2c47b73]	3397
	3398	size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
	3399	size_t offset_count_2;
	3400	float * data_pos = oriData + r23 + r3 + offset_count;
	3401	size_t n1_count = 1, n2_count = 1; // count i,j sum
	3402	size_t len = r1 * r2 * r3;
	3403	while(data_pos - oriData < len){
	3404	totalSampleSize++;
	3405	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
	3406	pred_err = fabs(pred_value - *data_pos);
	3407	radiusIndex = (pred_err/realPrecision+1)/2;
	3408	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3409	{
	3410	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3411	}
	3412	intervals[radiusIndex]++;
	3413	offset_count += confparams_cpr->sampleDistance;
	3414	if(offset_count >= r3){
	3415	n2_count ++;
	3416	if(n2_count == r2){
	3417	n1_count ++;
	3418	n2_count = 1;
	3419	data_pos += r3;
	3420	}
	3421	offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance;
	3422	data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
	3423	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
	3424	if(offset_count == 0) offset_count ++;
	3425	}
	3426	else data_pos += confparams_cpr->sampleDistance;
	3427	}
	3428	//compute the appropriate number
	3429	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3430	size_t sum = 0;
	3431	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3432	{
	3433	sum += intervals[i];
	3434	if(sum>targetCount)
	3435	break;
	3436	}
	3437	if(i>=confparams_cpr->maxRangeRadius)
	3438	i = confparams_cpr->maxRangeRadius-1;
	3439	unsigned int accIntervals = 2*(i+1);
	3440	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3441
	3442	if(powerOf2<32)
	3443	powerOf2 = 32;
	3444	free(intervals);
	3445	return powerOf2;
	3446	}
	3447
	3448	size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){
	3449
	3450	size_t dim0_offset = dim_1 * dim_2;
	3451	size_t dim1_offset = dim_2;
	3452
	3453	// data_pos = block_ori_data;
	3454	// for(size_t i=0; i<block_dim_0; i++){
	3455	// for(size_t j=0; j<block_dim_1; j++){
	3456	// for(size_t k=0; k<block_dim_2; k++){
	3457	// sum += *data_pos;
	3458	// data_pos ++;
	3459	// }
	3460	// data_pos += dim1_offset - block_dim_2;
	3461	// }
	3462	// data_pos += dim0_offset - block_dim_1 * dim1_offset;
	3463	// }
	3464	// size_t num_elements = block_dim_0 * block_dim_1 * block_dim_2;
	3465	// if(num_elements > 0) mean[0] = sum / num_elements;
	3466	// else mean[0] = 0.0;
	3467	mean[0] = block_ori_data[0];
	3468
	3469	size_t unpredictable_count = 0;
	3470	size_t r1, r2, r3;
	3471	r1 = block_dim_0;
	3472	r2 = block_dim_1;
	3473	r3 = block_dim_2;
	3474
	3475	float * cur_data_pos = block_ori_data;
	3476	float curData;
	3477	float pred1D, pred2D, pred3D;
	3478	double itvNum;
	3479	double diff;
	3480	size_t i, j, k;
	3481	size_t r23 = r2*r3;
	3482	// Process Row-0 data 0
	3483	pred1D = mean[0];
	3484	curData = *cur_data_pos;
	3485	diff = curData - pred1D;
	3486	itvNum = fabs(diff)/realPrecision + 1;
	3487	if (itvNum < exe_params->intvCapacity){
	3488	if (diff < 0) itvNum = -itvNum;
	3489	type[0] = (int) (itvNum/2) + exe_params->intvRadius;
	3490	P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
	3491	//ganrantee comporession error against the case of machine-epsilon
	3492	if(fabs(curData-P1[0])>realPrecision){
	3493	type[0] = 0;
	3494	P1[0] = curData;
	3495	unpredictable_data[unpredictable_count ++] = curData;
	3496	}
	3497	}
	3498	else{
	3499	type[0] = 0;
	3500	P1[0] = curData;
	3501	unpredictable_data[unpredictable_count ++] = curData;
	3502	}
	3503
	3504	/* Process Row-0 data 1*/
	3505	pred1D = P1[0];
	3506	curData = cur_data_pos[1];
	3507	diff = curData - pred1D;
	3508	itvNum = fabs(diff)/realPrecision + 1;
	3509	if (itvNum < exe_params->intvCapacity){
	3510	if (diff < 0) itvNum = -itvNum;
	3511	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	3512	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	3513	//ganrantee comporession error against the case of machine-epsilon
	3514	if(fabs(curData-P1[1])>realPrecision){
	3515	type[1] = 0;
	3516	P1[1] = curData;
	3517	unpredictable_data[unpredictable_count ++] = curData;
	3518	}
	3519	}
	3520	else{
	3521	type[1] = 0;
	3522	P1[1] = curData;
	3523	unpredictable_data[unpredictable_count ++] = curData;
	3524	}
	3525	/* Process Row-0 data 2 --> data r3-1 */
	3526	for (j = 2; j < r3; j++){
	3527	pred1D = 2*P1[j-1] - P1[j-2];
	3528	curData = cur_data_pos[j];
	3529	diff = curData - pred1D;
	3530	itvNum = fabs(diff)/realPrecision + 1;
	3531	if (itvNum < exe_params->intvCapacity){
	3532	if (diff < 0) itvNum = -itvNum;
	3533	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	3534	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	3535	//ganrantee comporession error against the case of machine-epsilon
	3536	if(fabs(curData-P1[j])>realPrecision){
	3537	type[j] = 0;
	3538	P1[j] = curData;
	3539	unpredictable_data[unpredictable_count ++] = curData;
	3540	}
	3541	}
	3542	else{
	3543	type[j] = 0;
	3544	P1[j] = curData;
	3545	unpredictable_data[unpredictable_count ++] = curData;
	3546	}
	3547	}
	3548	cur_data_pos += dim1_offset;
	3549
	3550	/* Process Row-1 --> Row-r2-1 */
	3551	size_t index;
	3552	for (i = 1; i < r2; i++)
	3553	{
	3554	/* Process row-i data 0 */
	3555	index = i*r3;
	3556	pred1D = P1[index-r3];
	3557	curData = *cur_data_pos;
	3558	diff = curData - pred1D;
	3559
	3560	itvNum = fabs(diff)/realPrecision + 1;
	3561
	3562	if (itvNum < exe_params->intvCapacity)
	3563	{
	3564	if (diff < 0) itvNum = -itvNum;
	3565	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	3566	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	3567
	3568	//ganrantee comporession error against the case of machine-epsilon
	3569	if(fabs(curData-P1[index])>realPrecision)
	3570	{
	3571	type[index] = 0;
	3572	P1[index] = curData;
	3573	unpredictable_data[unpredictable_count ++] = curData;
	3574	}
	3575	}
	3576	else
	3577	{
	3578	type[index] = 0;
	3579	P1[index] = curData;
	3580	unpredictable_data[unpredictable_count ++] = curData;
	3581	}
	3582
	3583	/* Process row-i data 1 --> data r3-1*/
	3584	for (j = 1; j < r3; j++)
	3585	{
	3586	index = i*r3+j;
	3587	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
	3588
	3589	curData = cur_data_pos[j];
	3590	diff = curData - pred2D;
	3591
	3592	itvNum = fabs(diff)/realPrecision + 1;
	3593
	3594	if (itvNum < exe_params->intvCapacity)
	3595	{
	3596	if (diff < 0) itvNum = -itvNum;
	3597	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	3598	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	3599
	3600	//ganrantee comporession error against the case of machine-epsilon
	3601	if(fabs(curData-P1[index])>realPrecision)
	3602	{
	3603	type[index] = 0;
	3604	P1[index] = curData;
	3605	unpredictable_data[unpredictable_count ++] = curData;
	3606	}
	3607	}
	3608	else
	3609	{
	3610	type[index] = 0;
	3611	P1[index] = curData;
	3612	unpredictable_data[unpredictable_count ++] = curData;
	3613	}
	3614	}
	3615	cur_data_pos += dim1_offset;
	3616	}
	3617	cur_data_pos += dim0_offset - r2 * dim1_offset;
	3618
	3619	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
	3620
	3621	for (k = 1; k < r1; k++)
	3622	{
	3623	/* Process Row-0 data 0*/
	3624	index = k*r23;
	3625	pred1D = P1[0];
	3626	curData = *cur_data_pos;
	3627	diff = curData - pred1D;
	3628	itvNum = fabs(diff)/realPrecision + 1;
	3629	if (itvNum < exe_params->intvCapacity)
	3630	{
	3631	if (diff < 0) itvNum = -itvNum;
	3632	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	3633	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	3634	//ganrantee comporession error against the case of machine-epsilon
	3635	if(fabs(curData-P0[0])>realPrecision)
	3636	{
	3637	type[index] = 0;
	3638	P0[0] = curData;
	3639	unpredictable_data[unpredictable_count ++] = curData;
	3640	}
	3641	}
	3642	else
	3643	{
	3644	type[index] = 0;
	3645	P0[0] = curData;
	3646	unpredictable_data[unpredictable_count ++] = curData;
	3647	}
	3648	/* Process Row-0 data 1 --> data r3-1 */
	3649	for (j = 1; j < r3; j++)
	3650	{
	3651	//index = kr2r3+j;
	3652	index ++;
	3653	pred2D = P0[j-1] + P1[j] - P1[j-1];
	3654	curData = cur_data_pos[j];
	3655	diff = curData - pred2D;
	3656	itvNum = fabs(diff)/realPrecision + 1;
	3657	if (itvNum < exe_params->intvCapacity)
	3658	{
	3659	if (diff < 0) itvNum = -itvNum;
	3660	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	3661	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	3662	//ganrantee comporession error against the case of machine-epsilon
	3663	if(fabs(curData-P0[j])>realPrecision)
	3664	{
	3665	type[index] = 0;
	3666	P0[j] = curData;
	3667	unpredictable_data[unpredictable_count ++] = curData;
	3668	}
	3669	}
	3670	else
	3671	{
	3672	type[index] = 0;
	3673	P0[j] = curData;
	3674	unpredictable_data[unpredictable_count ++] = curData;
	3675	}
	3676	}
	3677
	3678	cur_data_pos += dim1_offset;
	3679	/* Process Row-1 --> Row-r2-1 */
	3680	size_t index2D;
	3681	for (i = 1; i < r2; i++)
	3682	{
	3683	/* Process Row-i data 0 */
	3684	index = kr23 + ir3;
	3685	index2D = i*r3;
	3686	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
	3687	curData = *cur_data_pos;
	3688	diff = curData - pred2D;
	3689
	3690	itvNum = fabs(diff)/realPrecision + 1;
	3691
	3692	if (itvNum < exe_params->intvCapacity)
	3693	{
	3694	if (diff < 0) itvNum = -itvNum;
	3695	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	3696	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	3697	//ganrantee comporession error against the case of machine-epsilon
	3698	if(fabs(curData-P0[index2D])>realPrecision)
	3699	{
	3700	type[index] = 0;
	3701	P0[index2D] = curData;
	3702	unpredictable_data[unpredictable_count ++] = curData;
	3703	}
	3704	}
	3705	else
	3706	{
	3707	type[index] = 0;
	3708	P0[index2D] = curData;
	3709	unpredictable_data[unpredictable_count ++] = curData;
	3710	}
	3711
	3712	/* Process Row-i data 1 --> data r3-1 */
	3713	for (j = 1; j < r3; j++)
	3714	{
	3715	//index = kr2r3 + i*r3 + j;
	3716	index ++;
	3717	index2D = i*r3 + j;
	3718	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
	3719	curData = cur_data_pos[j];
	3720	diff = curData - pred3D;
	3721
	3722	itvNum = fabs(diff)/realPrecision + 1;
	3723
	3724	if (itvNum < exe_params->intvCapacity)
	3725	{
	3726	if (diff < 0) itvNum = -itvNum;
	3727	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	3728	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	3729
	3730	//ganrantee comporession error against the case of machine-epsilon
	3731	if(fabs(curData-P0[index2D])>realPrecision)
	3732	{
	3733	type[index] = 0;
	3734	P0[index2D] = curData;
	3735	unpredictable_data[unpredictable_count ++] = curData;
	3736	}
	3737	}
	3738	else
	3739	{
	3740	type[index] = 0;
	3741	P0[index2D] = curData;
	3742	unpredictable_data[unpredictable_count ++] = curData;
	3743	}
	3744	}
	3745	cur_data_pos += dim1_offset;
	3746	}
	3747	cur_data_pos += dim0_offset - r2 * dim1_offset;
	3748	float *Pt;
	3749	Pt = P1;
	3750	P1 = P0;
	3751	P0 = Pt;
	3752	}
	3753
	3754	return unpredictable_count;
	3755	}
	3756
	3757	unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r2, double realPrecision)
	3758	{
	3759	size_t i;
	3760	size_t radiusIndex;
	3761	float pred_value = 0, pred_err;
	3762	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3763	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
[9ee2ce3]	3764	size_t totalSampleSize = 0;
[2c47b73]	3765
	3766	size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
	3767	size_t offset_count_2;
	3768	float * data_pos = oriData + r2 + offset_count;
	3769	size_t n1_count = 1; // count i sum
	3770	size_t len = r1 * r2;
	3771	while(data_pos - oriData < len){
	3772	totalSampleSize++;
	3773	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
	3774	pred_err = fabs(pred_value - *data_pos);
	3775	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3776	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3777	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3778	intervals[radiusIndex]++;
	3779
	3780	offset_count += confparams_cpr->sampleDistance;
	3781	if(offset_count >= r2){
	3782	n1_count ++;
	3783	offset_count_2 = n1_count % confparams_cpr->sampleDistance;
	3784	data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
	3785	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
	3786	if(offset_count == 0) offset_count ++;
	3787	}
	3788	else data_pos += confparams_cpr->sampleDistance;
	3789	}
	3790
	3791	//compute the appropriate number
	3792	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3793	size_t sum = 0;
	3794	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3795	{
	3796	sum += intervals[i];
	3797	if(sum>targetCount)
	3798	break;
	3799	}
	3800	if(i>=confparams_cpr->maxRangeRadius)
	3801	i = confparams_cpr->maxRangeRadius-1;
	3802	unsigned int accIntervals = 2*(i+1);
	3803	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3804
	3805	if(powerOf2<32)
	3806	powerOf2 = 32;
	3807
	3808	free(intervals);
	3809	return powerOf2;
	3810	}
	3811
	3812	unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision)
	3813	{
	3814	size_t i = 0, radiusIndex;
	3815	float pred_value = 0, pred_err;
	3816	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3817	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	3818	size_t totalSampleSize = 0;//dataLength/confparams_cpr->sampleDistance;
	3819
	3820	float * data_pos = oriData + 2;
	3821	while(data_pos - oriData < dataLength){
	3822	totalSampleSize++;
	3823	pred_value = data_pos[-1];
	3824	pred_err = fabs(pred_value - *data_pos);
	3825	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3826	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3827	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3828	intervals[radiusIndex]++;
	3829
	3830	data_pos += confparams_cpr->sampleDistance;
	3831	}
	3832	//compute the appropriate number
	3833	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3834	size_t sum = 0;
	3835	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3836	{
	3837	sum += intervals[i];
	3838	if(sum>targetCount)
	3839	break;
	3840	}
	3841	if(i>=confparams_cpr->maxRangeRadius)
	3842	i = confparams_cpr->maxRangeRadius-1;
	3843
	3844	unsigned int accIntervals = 2*(i+1);
	3845	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3846
	3847	if(powerOf2<32)
	3848	powerOf2 = 32;
	3849
	3850	free(intervals);
	3851	return powerOf2;
	3852	}
	3853
	3854	size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t block_dim_0, double realPrecision, int * type, float * unpredictable_data){
	3855
	3856	mean[0] = block_ori_data[0];
	3857	unsigned short unpredictable_count = 0;
	3858
	3859	float curData;
	3860	double itvNum;
	3861	double diff;
	3862	float last_over_thres = mean[0];
	3863	float pred1D;
	3864	size_t type_index = 0;
	3865	float * data_pos = block_ori_data;
	3866	for(size_t i=0; i<block_dim_0; i++){
	3867	curData = *data_pos;
	3868
	3869	pred1D = last_over_thres;
	3870	diff = curData - pred1D;
	3871	itvNum = fabs(diff)/realPrecision + 1;
	3872	if (itvNum < exe_params->intvCapacity){
	3873	if (diff < 0) itvNum = -itvNum;
	3874	type[type_index] = (int) (itvNum/2) + exe_params->intvRadius;
	3875	last_over_thres = pred1D + 2 * (type[type_index] - exe_params->intvRadius) * realPrecision;
	3876	if(fabs(curData-last_over_thres)>realPrecision){
	3877	type[type_index] = 0;
	3878	last_over_thres = curData;
	3879	unpredictable_data[unpredictable_count ++] = curData;
	3880	}
	3881
	3882	}
	3883	else{
	3884	type[type_index] = 0;
	3885	unpredictable_data[unpredictable_count ++] = curData;
	3886	last_over_thres = curData;
	3887	}
	3888	type_index ++;
	3889	data_pos ++;
	3890	}
	3891	return unpredictable_count;
	3892
	3893	}
	3894
	3895	size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, size_t dim_0, size_t dim_1, size_t block_dim_0, size_t block_dim_1, double realPrecision, float * P0, float * P1, int * type, float * unpredictable_data){
	3896
	3897	size_t dim0_offset = dim_1;
	3898	mean[0] = block_ori_data[0];
	3899
	3900	size_t unpredictable_count = 0;
	3901	size_t r1, r2;
	3902	r1 = block_dim_0;
	3903	r2 = block_dim_1;
	3904
	3905	float * cur_data_pos = block_ori_data;
	3906	float curData;
	3907	float pred1D, pred2D;
	3908	double itvNum;
	3909	double diff;
	3910	size_t i, j;
	3911	/* Process Row-0 data 0*/
	3912	curData = *cur_data_pos;
	3913	pred1D = mean[0];
	3914	diff = curData - pred1D;
	3915	itvNum = fabs(diff)/realPrecision + 1;
	3916	if (itvNum < exe_params->intvCapacity){
	3917	if (diff < 0) itvNum = -itvNum;
	3918	type[0] = (int) (itvNum/2) + exe_params->intvRadius;
	3919	P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
	3920	//ganrantee comporession error against the case of machine-epsilon
	3921	if(fabs(curData-P1[0])>realPrecision){
	3922	type[0] = 0;
	3923	P1[0] = curData;
	3924	unpredictable_data[unpredictable_count ++] = curData;
	3925	}
	3926	}
	3927	else{
	3928	type[0] = 0;
	3929	P1[0] = curData;
	3930	unpredictable_data[unpredictable_count ++] = curData;
	3931	}
	3932
	3933	/* Process Row-0 data 1*/
	3934	curData = cur_data_pos[1];
	3935	pred1D = P1[0];
	3936	diff = curData - pred1D;
	3937	itvNum = fabs(diff)/realPrecision + 1;
	3938	if (itvNum < exe_params->intvCapacity){
	3939	if (diff < 0) itvNum = -itvNum;
	3940	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	3941	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	3942	//ganrantee comporession error against the case of machine-epsilon
	3943	if(fabs(curData-P1[1])>realPrecision){
	3944	type[1] = 0;
	3945	P1[1] = curData;
	3946	unpredictable_data[unpredictable_count ++] = curData;
	3947	}
	3948	}
	3949	else{
	3950	type[1] = 0;
	3951	P1[1] = curData;
	3952	unpredictable_data[unpredictable_count ++] = curData;
	3953	}
	3954
	3955	/* Process Row-0 data 2 --> data r2-1 */
	3956	for (j = 2; j < r2; j++)
	3957	{
	3958	curData = cur_data_pos[j];
	3959	pred1D = 2*P1[j-1] - P1[j-2];
	3960	diff = curData - pred1D;
	3961	itvNum = fabs(diff)/realPrecision + 1;
	3962	if (itvNum < exe_params->intvCapacity){
	3963	if (diff < 0) itvNum = -itvNum;
	3964	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	3965	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	3966	//ganrantee comporession error against the case of machine-epsilon
	3967	if(fabs(curData-P1[j])>realPrecision){
	3968	type[j] = 0;
	3969	P1[j] = curData;
	3970	unpredictable_data[unpredictable_count ++] = curData;
	3971	}
	3972	}
	3973	else{
	3974	type[j] = 0;
	3975	P1[j] = curData;
	3976	unpredictable_data[unpredictable_count ++] = curData;
	3977	}
	3978	}
	3979	cur_data_pos += dim0_offset;
	3980	/* Process Row-1 --> Row-r1-1 */
	3981	size_t index;
	3982	for (i = 1; i < r1; i++)
	3983	{
	3984	/* Process row-i data 0 */
	3985	index = i*r2;
	3986	curData = *cur_data_pos;
	3987	pred1D = P1[0];
	3988	diff = curData - pred1D;
	3989	itvNum = fabs(diff)/realPrecision + 1;
	3990	if (itvNum < exe_params->intvCapacity){
	3991	if (diff < 0) itvNum = -itvNum;
	3992	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	3993	P0[0] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	3994	//ganrantee comporession error against the case of machine-epsilon
	3995	if(fabs(curData-P0[0])>realPrecision){
	3996	type[index] = 0;
	3997	P0[0] = curData;
	3998	unpredictable_data[unpredictable_count ++] = curData;
	3999	}
	4000	}
	4001	else{
	4002	type[index] = 0;
	4003	P0[0] = curData;
	4004	unpredictable_data[unpredictable_count ++] = curData;
	4005	}
	4006
	4007	/* Process row-i data 1 --> r2-1*/
	4008	for (j = 1; j < r2; j++)
	4009	{
	4010	index = i*r2+j;
	4011	curData = cur_data_pos[j];
	4012	pred2D = P0[j-1] + P1[j] - P1[j-1];
	4013	diff = curData - pred2D;
	4014	itvNum = fabs(diff)/realPrecision + 1;
	4015	if (itvNum < exe_params->intvCapacity)
	4016	{
	4017	if (diff < 0) itvNum = -itvNum;
	4018	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	4019	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	4020
	4021	//ganrantee comporession error against the case of machine-epsilon
	4022	if(fabs(curData-P0[j])>realPrecision)
	4023	{
	4024	type[index] = 0;
	4025	P0[j] = curData;
	4026	unpredictable_data[unpredictable_count ++] = curData;
	4027	}
	4028	}
	4029	else
	4030	{
	4031	type[index] = 0;
	4032	P0[j] = curData;
	4033	unpredictable_data[unpredictable_count ++] = curData;
	4034	}
	4035	}
	4036	cur_data_pos += dim0_offset;
	4037
	4038	float *Pt;
	4039	Pt = P1;
	4040	P1 = P0;
	4041	P0 = Pt;
	4042	}
	4043	return unpredictable_count;
	4044	}
	4045
[9ee2ce3]	4046	/The above code is for sz 1.4.13; the following code is for sz 2.0/
	4047
	4048	unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float oriData, size_t r1, size_t r2, double realPrecision, float dense_pos, float * max_freq, float * mean_freq)
	4049	{
	4050	float mean = 0.0;
	4051	size_t len = r1 * r2;
	4052	size_t mean_distance = (int) (sqrt(len));
	4053
	4054	float * data_pos = oriData;
	4055	size_t mean_count = 0;
	4056	while(data_pos - oriData < len){
	4057	mean += *data_pos;
	4058	mean_count ++;
	4059	data_pos += mean_distance;
	4060	}
	4061	if(mean_count > 0) mean /= mean_count;
	4062	size_t range = 8192;
	4063	size_t radius = 4096;
	4064	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
	4065	memset(freq_intervals, 0, range*sizeof(size_t));
	4066
	4067	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
	4068	int sampleDistance = confparams_cpr->sampleDistance;
	4069	float predThreshold = confparams_cpr->predThreshold;
	4070
	4071	size_t i;
	4072	size_t radiusIndex;
	4073	float pred_value = 0, pred_err;
	4074	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
	4075	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
	4076
	4077	float mean_diff;
	4078	ptrdiff_t freq_index;
	4079	size_t freq_count = 0;
	4080	size_t n1_count = 1;
	4081	size_t offset_count = sampleDistance - 1;
	4082	size_t offset_count_2 = 0;
	4083	size_t sample_count = 0;
	4084	data_pos = oriData + r2 + offset_count;
	4085	while(data_pos - oriData < len){
	4086	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
	4087	pred_err = fabs(pred_value - *data_pos);
	4088	if(pred_err < realPrecision) freq_count ++;
	4089	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	4090	if(radiusIndex>=maxRangeRadius)
	4091	radiusIndex = maxRangeRadius - 1;
	4092	intervals[radiusIndex]++;
	4093
	4094	mean_diff = *data_pos - mean;
	4095	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
	4096	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
	4097	if(freq_index <= 0){
	4098	freq_intervals[0] ++;
	4099	}
	4100	else if(freq_index >= range){
	4101	freq_intervals[range - 1] ++;
	4102	}
	4103	else{
	4104	freq_intervals[freq_index] ++;
	4105	}
	4106	offset_count += sampleDistance;
	4107	if(offset_count >= r2){
	4108	n1_count ++;
	4109	offset_count_2 = n1_count % sampleDistance;
	4110	data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
	4111	offset_count = (sampleDistance - offset_count_2);
	4112	if(offset_count == 0) offset_count ++;
	4113	}
	4114	else data_pos += sampleDistance;
	4115	sample_count ++;
	4116	}
	4117	max_freq = freq_count 1.0/ sample_count;
	4118
	4119	//compute the appropriate number
	4120	size_t targetCount = sample_count*predThreshold;
	4121	size_t sum = 0;
	4122	for(i=0;i<maxRangeRadius;i++)
	4123	{
	4124	sum += intervals[i];
	4125	if(sum>targetCount)
	4126	break;
	4127	}
	4128	if(i>=maxRangeRadius)
	4129	i = maxRangeRadius-1;
	4130	unsigned int accIntervals = 2*(i+1);
	4131	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	4132
	4133	if(powerOf2<32)
	4134	powerOf2 = 32;
	4135
	4136	// collect frequency
	4137	size_t max_sum = 0;
	4138	size_t max_index = 0;
	4139	size_t tmp_sum;
	4140	size_t * freq_pos = freq_intervals + 1;
	4141	for(size_t i=1; i<range-2; i++){
	4142	tmp_sum = freq_pos[0] + freq_pos[1];
	4143	if(tmp_sum > max_sum){
	4144	max_sum = tmp_sum;
	4145	max_index = i;
	4146	}
	4147	freq_pos ++;
	4148	}
	4149	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
	4150	mean_freq = max_sum 1.0 / sample_count;
	4151
	4152	free(freq_intervals);
	4153	free(intervals);
	4154	return powerOf2;
	4155	}
	4156
	4157	// 2D: modified for higher performance
	4158	#define MIN(a, b) a<b? a : b
	4159	unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(float oriData, size_t r1, size_t r2, double realPrecision, size_t comp_size){
	4160
	4161	unsigned int quantization_intervals;
	4162	float sz_sample_correct_freq = -1;//0.5; //-1
	4163	float dense_pos;
	4164	float mean_flush_freq;
	4165	unsigned char use_mean = 0;
	4166
	4167	if(exe_params->optQuantMode==1)
	4168	{
	4169	quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
	4170	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
	4171	updateQuantizationInfo(quantization_intervals);
	4172	}
	4173	else{
	4174	quantization_intervals = exe_params->intvCapacity;
	4175	}
	4176
	4177	// calculate block dims
	4178	size_t num_x, num_y;
	4179	size_t block_size = 16;
	4180
	4181	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
	4182	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
	4183
	4184	size_t split_index_x, split_index_y;
	4185	size_t early_blockcount_x, early_blockcount_y;
	4186	size_t late_blockcount_x, late_blockcount_y;
	4187	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
	4188	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
	4189
	4190	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
	4191	size_t num_blocks = num_x * num_y;
	4192	size_t num_elements = r1 * r2;
	4193
	4194	size_t dim0_offset = r2;
	4195
	4196	int * result_type = (int ) malloc(num_elements sizeof(int));
	4197	size_t unpred_data_max_size = max_num_block_elements;
	4198	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
	4199	size_t total_unpred = 0;
	4200	size_t unpredictable_count;
	4201	float * data_pos = oriData;
	4202	int * type = result_type;
	4203	size_t offset_x, offset_y;
	4204	size_t current_blockcount_x, current_blockcount_y;
	4205
	4206	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
	4207	float * reg_params_pos = reg_params;
	4208	// move regression part out
	4209	size_t params_offset_b = num_blocks;
	4210	size_t params_offset_c = 2*num_blocks;
	4211	for(size_t i=0; i<num_x; i++){
	4212	for(size_t j=0; j<num_y; j++){
	4213	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4214	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4215	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4216	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4217
	4218	data_pos = oriData + offset_x * dim0_offset + offset_y;
	4219
	4220	{
	4221	float * cur_data_pos = data_pos;
	4222	float fx = 0.0;
	4223	float fy = 0.0;
	4224	float f = 0;
	4225	double sum_x;
	4226	float curData;
	4227	for(size_t i=0; i<current_blockcount_x; i++){
	4228	sum_x = 0;
	4229	for(size_t j=0; j<current_blockcount_y; j++){
	4230	curData = *cur_data_pos;
	4231	sum_x += curData;
	4232	fy += curData * j;
	4233	cur_data_pos ++;
	4234	}
	4235	fx += sum_x * i;
	4236	f += sum_x;
	4237	cur_data_pos += dim0_offset - current_blockcount_y;
	4238	}
	4239	float coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
	4240	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
	4241	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
	4242	reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
	4243	}
	4244
	4245	reg_params_pos ++;
	4246	}
	4247	}
	4248
	4249	//Compress coefficient arrays
	4250	double precision_a, precision_b, precision_c;
	4251	float rel_param_err = 0.15/3;
	4252	precision_a = rel_param_err * realPrecision / late_blockcount_x;
	4253	precision_b = rel_param_err * realPrecision / late_blockcount_y;
	4254	precision_c = rel_param_err * realPrecision;
	4255
	4256	float mean = 0;
	4257	use_mean = 0;
	4258	if(use_mean){
	4259	// compute mean
	4260	double sum = 0.0;
	4261	size_t mean_count = 0;
	4262	for(size_t i=0; i<num_elements; i++){
	4263	if(fabs(oriData[i] - dense_pos) < realPrecision){
	4264	sum += oriData[i];
	4265	mean_count ++;
	4266	}
	4267	}
	4268	if(mean_count > 0) mean = sum / mean_count;
	4269	}
	4270
	4271
	4272	double tmp_realPrecision = realPrecision;
	4273
	4274	// use two prediction buffers for higher performance
	4275	float * unpredictable_data = result_unpredictable_data;
	4276	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
	4277	memset(indicator, 0, num_blocks * sizeof(unsigned char));
	4278	size_t reg_count = 0;
	4279	size_t strip_dim_0 = early_blockcount_x + 1;
	4280	size_t strip_dim_1 = r2 + 1;
	4281	size_t strip_dim0_offset = strip_dim_1;
	4282	unsigned char * indicator_pos = indicator;
	4283	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
	4284	float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
	4285	memset(prediction_buffer_1, 0, prediction_buffer_size);
	4286	float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
	4287	memset(prediction_buffer_2, 0, prediction_buffer_size);
	4288	float * cur_pb_buf = prediction_buffer_1;
	4289	float * next_pb_buf = prediction_buffer_2;
	4290	float * cur_pb_buf_pos;
	4291	float * next_pb_buf_pos;
	4292	int intvCapacity = exe_params->intvCapacity;
	4293	int intvRadius = exe_params->intvRadius;
	4294	int use_reg = 0;
	4295
	4296	reg_params_pos = reg_params;
	4297	// compress the regression coefficients on the fly
	4298	float last_coeffcients[3] = {0.0};
	4299	int coeff_intvCapacity_sz = 65536;
	4300	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
	4301	int * coeff_type[3];
	4302	int * coeff_result_type = (int ) malloc(num_blocks3*sizeof(int));
	4303	float * coeff_unpred_data[3];
	4304	float * coeff_unpredictable_data = (float ) malloc(num_blocks3*sizeof(float));
	4305	double precision[3];
	4306	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
	4307	for(int i=0; i<3; i++){
	4308	coeff_type[i] = coeff_result_type + i * num_blocks;
	4309	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
	4310	}
	4311	int coeff_index = 0;
	4312	unsigned int coeff_unpredictable_count[3] = {0};
	4313	if(use_mean){
	4314	type = result_type;
	4315	int intvCapacity_sz = intvCapacity - 2;
	4316	for(size_t i=0; i<num_x; i++){
	4317	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4318	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4319	data_pos = oriData + offset_x * dim0_offset;
	4320
	4321	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
	4322	next_pb_buf_pos = next_pb_buf + 1;
	4323	float * pb_pos = cur_pb_buf_pos;
	4324	float * next_pb_pos = next_pb_buf_pos;
	4325
	4326	for(size_t j=0; j<num_y; j++){
	4327	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4328	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4329
	4330	/sampling: decide which predictor to use (regression or lorenzo)/
	4331	{
	4332	float * cur_data_pos;
	4333	float curData;
	4334	float pred_reg, pred_sz;
	4335	float err_sz = 0.0, err_reg = 0.0;
	4336	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
	4337	// [1, 9] [3, 7] [7, 3] [9, 1]
	4338	int count = 0;
	4339	for(int i=1; i<current_blockcount_x; i+=2){
	4340	cur_data_pos = data_pos + i * dim0_offset + i;
	4341	curData = *cur_data_pos;
	4342	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4343	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
	4344
	4345	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
	4346
	4347	err_reg += fabs(pred_reg - curData);
	4348
	4349	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
	4350	curData = *cur_data_pos;
	4351	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4352	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
	4353	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
	4354
	4355	err_reg += fabs(pred_reg - curData);
	4356
	4357	count += 2;
	4358	}
	4359
	4360	use_reg = (err_reg < err_sz);
	4361	}
	4362	if(use_reg)
	4363	{
	4364	{
	4365	/predict coefficients in current block via previous reg_block/
	4366	float cur_coeff;
	4367	double diff, itvNum;
	4368	for(int e=0; e<3; e++){
	4369	cur_coeff = reg_params_pos[e*num_blocks];
	4370	diff = cur_coeff - last_coeffcients[e];
	4371	itvNum = fabs(diff)/precision[e] + 1;
	4372	if (itvNum < coeff_intvCapacity_sz){
	4373	if (diff < 0) itvNum = -itvNum;
	4374	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	4375	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	4376	//ganrantee comporession error against the case of machine-epsilon
	4377	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	4378	coeff_type[e][coeff_index] = 0;
	4379	last_coeffcients[e] = cur_coeff;
	4380	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4381	}
	4382	}
	4383	else{
	4384	coeff_type[e][coeff_index] = 0;
	4385	last_coeffcients[e] = cur_coeff;
	4386	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4387	}
	4388	}
	4389	coeff_index ++;
	4390	}
	4391	float curData;
	4392	float pred;
	4393	double itvNum;
	4394	double diff;
	4395	size_t index = 0;
	4396	size_t block_unpredictable_count = 0;
	4397	float * cur_data_pos = data_pos;
	4398	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4399	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4400	curData = *cur_data_pos;
	4401	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4402	diff = curData - pred;
	4403	itvNum = fabs(diff)/realPrecision + 1;
	4404	if (itvNum < intvCapacity){
	4405	if (diff < 0) itvNum = -itvNum;
	4406	type[index] = (int) (itvNum/2) + intvRadius;
	4407	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4408	//ganrantee comporession error against the case of machine-epsilon
	4409	if(fabs(curData - pred)>realPrecision){
	4410	type[index] = 0;
	4411	pred = curData;
	4412	unpredictable_data[block_unpredictable_count ++] = curData;
	4413	}
	4414	}
	4415	else{
	4416	type[index] = 0;
	4417	pred = curData;
	4418	unpredictable_data[block_unpredictable_count ++] = curData;
	4419	}
	4420	index ++;
	4421	cur_data_pos ++;
	4422	}
	4423	/dealing with the last jj (boundary)/
	4424	{
	4425	size_t jj = current_blockcount_y - 1;
	4426	curData = *cur_data_pos;
	4427	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4428	diff = curData - pred;
	4429	itvNum = fabs(diff)/realPrecision + 1;
	4430	if (itvNum < intvCapacity){
	4431	if (diff < 0) itvNum = -itvNum;
	4432	type[index] = (int) (itvNum/2) + intvRadius;
	4433	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4434	//ganrantee comporession error against the case of machine-epsilon
	4435	if(fabs(curData - pred)>realPrecision){
	4436	type[index] = 0;
	4437	pred = curData;
	4438	unpredictable_data[block_unpredictable_count ++] = curData;
	4439	}
	4440	}
	4441	else{
	4442	type[index] = 0;
	4443	pred = curData;
	4444	unpredictable_data[block_unpredictable_count ++] = curData;
	4445	}
	4446
	4447	// assign value to block surfaces
	4448	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4449	index ++;
	4450	cur_data_pos ++;
	4451	}
	4452	cur_data_pos += dim0_offset - current_blockcount_y;
	4453	}
	4454	/dealing with the last ii (boundary)/
	4455	{
	4456	size_t ii = current_blockcount_x - 1;
	4457	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4458	curData = *cur_data_pos;
	4459	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4460	diff = curData - pred;
	4461	itvNum = fabs(diff)/realPrecision + 1;
	4462	if (itvNum < intvCapacity){
	4463	if (diff < 0) itvNum = -itvNum;
	4464	type[index] = (int) (itvNum/2) + intvRadius;
	4465	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4466	//ganrantee comporession error against the case of machine-epsilon
	4467	if(fabs(curData - pred)>realPrecision){
	4468	type[index] = 0;
	4469	pred = curData;
	4470	unpredictable_data[block_unpredictable_count ++] = curData;
	4471	}
	4472	}
	4473	else{
	4474	type[index] = 0;
	4475	pred = curData;
	4476	unpredictable_data[block_unpredictable_count ++] = curData;
	4477	}
	4478	// assign value to next prediction buffer
	4479	next_pb_pos[jj] = pred;
	4480	index ++;
	4481	cur_data_pos ++;
	4482	}
	4483	/dealing with the last jj (boundary)/
	4484	{
	4485	size_t jj = current_blockcount_y - 1;
	4486	curData = *cur_data_pos;
	4487	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4488	diff = curData - pred;
	4489	itvNum = fabs(diff)/realPrecision + 1;
	4490	if (itvNum < intvCapacity){
	4491	if (diff < 0) itvNum = -itvNum;
	4492	type[index] = (int) (itvNum/2) + intvRadius;
	4493	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4494	//ganrantee comporession error against the case of machine-epsilon
	4495	if(fabs(curData - pred)>realPrecision){
	4496	type[index] = 0;
	4497	pred = curData;
	4498	unpredictable_data[block_unpredictable_count ++] = curData;
	4499	}
	4500	}
	4501	else{
	4502	type[index] = 0;
	4503	pred = curData;
	4504	unpredictable_data[block_unpredictable_count ++] = curData;
	4505	}
	4506
	4507	// assign value to block surfaces
	4508	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4509	// assign value to next prediction buffer
	4510	next_pb_pos[jj] = pred;
	4511
	4512	index ++;
	4513	cur_data_pos ++;
	4514	}
	4515	} // end ii == -1
	4516	unpredictable_count = block_unpredictable_count;
	4517	total_unpred += unpredictable_count;
	4518	unpredictable_data += unpredictable_count;
	4519	reg_count ++;
	4520	}// end use_reg
	4521	else{
	4522	// use SZ
	4523	// SZ predication
	4524	unpredictable_count = 0;
	4525	float * cur_pb_pos = pb_pos;
	4526	float * cur_data_pos = data_pos;
	4527	float curData;
	4528	float pred2D;
	4529	double itvNum, diff;
	4530	size_t index = 0;
	4531	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4532	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4533	curData = *cur_data_pos;
	4534	if(fabs(curData - mean) <= realPrecision){
	4535	// adjust type[index] to intvRadius for coherence with freq in reg
	4536	type[index] = intvRadius;
	4537	*cur_pb_pos = mean;
	4538	}
	4539	else
	4540	{
	4541	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4542	diff = curData - pred2D;
	4543	itvNum = fabs(diff)/realPrecision + 1;
	4544	if (itvNum < intvCapacity_sz){
	4545	if (diff < 0) itvNum = -itvNum;
	4546	type[index] = (int) (itvNum/2) + intvRadius;
	4547	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4548	if(type[index] <= intvRadius) type[index] -= 1;
	4549	//ganrantee comporession error against the case of machine-epsilon
	4550	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4551	type[index] = 0;
	4552	*cur_pb_pos = curData;
	4553	unpredictable_data[unpredictable_count ++] = curData;
	4554	}
	4555	}
	4556	else{
	4557	type[index] = 0;
	4558	*cur_pb_pos = curData;
	4559	unpredictable_data[unpredictable_count ++] = curData;
	4560	}
	4561	}
	4562	index ++;
	4563	cur_pb_pos ++;
	4564	cur_data_pos ++;
	4565	}
	4566	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
	4567	cur_data_pos += dim0_offset - current_blockcount_y;
	4568	}
	4569	/dealing with the last ii (boundary)/
	4570	{
	4571	// ii == current_blockcount_x - 1
	4572	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4573	curData = *cur_data_pos;
	4574	if(fabs(curData - mean) <= realPrecision){
	4575	// adjust type[index] to intvRadius for coherence with freq in reg
	4576	type[index] = intvRadius;
	4577	*cur_pb_pos = mean;
	4578	}
	4579	else
	4580	{
	4581	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4582	diff = curData - pred2D;
	4583	itvNum = fabs(diff)/realPrecision + 1;
	4584	if (itvNum < intvCapacity_sz){
	4585	if (diff < 0) itvNum = -itvNum;
	4586	type[index] = (int) (itvNum/2) + intvRadius;
	4587	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4588	if(type[index] <= intvRadius) type[index] -= 1;
	4589	//ganrantee comporession error against the case of machine-epsilon
	4590	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4591	type[index] = 0;
	4592	*cur_pb_pos = curData;
	4593	unpredictable_data[unpredictable_count ++] = curData;
	4594	}
	4595	}
	4596	else{
	4597	type[index] = 0;
	4598	*cur_pb_pos = curData;
	4599	unpredictable_data[unpredictable_count ++] = curData;
	4600	}
	4601	}
	4602	next_pb_pos[jj] = *cur_pb_pos;
	4603	index ++;
	4604	cur_pb_pos ++;
	4605	cur_data_pos ++;
	4606	}
	4607	}
	4608	total_unpred += unpredictable_count;
	4609	unpredictable_data += unpredictable_count;
	4610	// change indicator
	4611	indicator_pos[j] = 1;
	4612	}// end SZ
	4613	reg_params_pos ++;
	4614	data_pos += current_blockcount_y;
	4615	pb_pos += current_blockcount_y;
	4616	next_pb_pos += current_blockcount_y;
	4617	type += current_blockcount_x * current_blockcount_y;
	4618	}// end j
	4619	indicator_pos += num_y;
	4620	float * tmp;
	4621	tmp = cur_pb_buf;
	4622	cur_pb_buf = next_pb_buf;
	4623	next_pb_buf = tmp;
	4624	}// end i
	4625	}// end use mean
	4626	else{
	4627	type = result_type;
	4628	int intvCapacity_sz = intvCapacity - 2;
	4629	for(size_t i=0; i<num_x; i++){
	4630	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4631	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4632	data_pos = oriData + offset_x * dim0_offset;
	4633
	4634	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
	4635	next_pb_buf_pos = next_pb_buf + 1;
	4636	float * pb_pos = cur_pb_buf_pos;
	4637	float * next_pb_pos = next_pb_buf_pos;
	4638
	4639	for(size_t j=0; j<num_y; j++){
	4640	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4641	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4642	/sampling/
	4643	{
	4644	// sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
	4645	float * cur_data_pos;
	4646	float curData;
	4647	float pred_reg, pred_sz;
	4648	float err_sz = 0.0, err_reg = 0.0;
	4649	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
	4650	// [1, 9] [3, 7] [7, 3] [9, 1]
	4651	int count = 0;
	4652	for(int i=1; i<current_blockcount_x; i+=2){
	4653	cur_data_pos = data_pos + i * dim0_offset + i;
	4654	curData = *cur_data_pos;
	4655	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4656	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
	4657	err_sz += fabs(pred_sz - curData);
	4658	err_reg += fabs(pred_reg - curData);
	4659
	4660	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
	4661	curData = *cur_data_pos;
	4662	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4663	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
	4664	err_sz += fabs(pred_sz - curData);
	4665	err_reg += fabs(pred_reg - curData);
	4666
	4667	count += 2;
	4668	}
	4669	err_sz += realPrecision * count * 0.81;
	4670	use_reg = (err_reg < err_sz);
	4671
	4672	}
	4673	if(use_reg)
	4674	{
	4675	{
	4676	/predict coefficients in current block via previous reg_block/
	4677	float cur_coeff;
	4678	double diff, itvNum;
	4679	for(int e=0; e<3; e++){
	4680	cur_coeff = reg_params_pos[e*num_blocks];
	4681	diff = cur_coeff - last_coeffcients[e];
	4682	itvNum = fabs(diff)/precision[e] + 1;
	4683	if (itvNum < coeff_intvCapacity_sz){
	4684	if (diff < 0) itvNum = -itvNum;
	4685	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	4686	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	4687	//ganrantee comporession error against the case of machine-epsilon
	4688	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	4689	coeff_type[e][coeff_index] = 0;
	4690	last_coeffcients[e] = cur_coeff;
	4691	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4692	}
	4693	}
	4694	else{
	4695	coeff_type[e][coeff_index] = 0;
	4696	last_coeffcients[e] = cur_coeff;
	4697	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4698	}
	4699	}
	4700	coeff_index ++;
	4701	}
	4702	float curData;
	4703	float pred;
	4704	double itvNum;
	4705	double diff;
	4706	size_t index = 0;
	4707	size_t block_unpredictable_count = 0;
	4708	float * cur_data_pos = data_pos;
	4709	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4710	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4711	curData = *cur_data_pos;
	4712	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4713	diff = curData - pred;
	4714	itvNum = fabs(diff)/realPrecision + 1;
	4715	if (itvNum < intvCapacity){
	4716	if (diff < 0) itvNum = -itvNum;
	4717	type[index] = (int) (itvNum/2) + intvRadius;
	4718	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4719	//ganrantee comporession error against the case of machine-epsilon
	4720	if(fabs(curData - pred)>realPrecision){
	4721	type[index] = 0;
	4722	pred = curData;
	4723	unpredictable_data[block_unpredictable_count ++] = curData;
	4724	}
	4725	}
	4726	else{
	4727	type[index] = 0;
	4728	pred = curData;
	4729	unpredictable_data[block_unpredictable_count ++] = curData;
	4730	}
	4731	index ++;
	4732	cur_data_pos ++;
	4733	}
	4734	/dealing with the last jj (boundary)/
	4735	{
	4736	// jj == current_blockcount_y - 1
	4737	size_t jj = current_blockcount_y - 1;
	4738	curData = *cur_data_pos;
	4739	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4740	diff = curData - pred;
	4741	itvNum = fabs(diff)/realPrecision + 1;
	4742	if (itvNum < intvCapacity){
	4743	if (diff < 0) itvNum = -itvNum;
	4744	type[index] = (int) (itvNum/2) + intvRadius;
	4745	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4746	//ganrantee comporession error against the case of machine-epsilon
	4747	if(fabs(curData - pred)>realPrecision){
	4748	type[index] = 0;
	4749	pred = curData;
	4750	unpredictable_data[block_unpredictable_count ++] = curData;
	4751	}
	4752	}
	4753	else{
	4754	type[index] = 0;
	4755	pred = curData;
	4756	unpredictable_data[block_unpredictable_count ++] = curData;
	4757	}
	4758
	4759	// assign value to block surfaces
	4760	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4761	index ++;
	4762	cur_data_pos ++;
	4763	}
	4764	cur_data_pos += dim0_offset - current_blockcount_y;
	4765	}
	4766	/dealing with the last ii (boundary)/
	4767	{
	4768	size_t ii = current_blockcount_x - 1;
	4769	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4770	curData = *cur_data_pos;
	4771	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4772	diff = curData - pred;
	4773	itvNum = fabs(diff)/realPrecision + 1;
	4774	if (itvNum < intvCapacity){
	4775	if (diff < 0) itvNum = -itvNum;
	4776	type[index] = (int) (itvNum/2) + intvRadius;
	4777	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4778	//ganrantee comporession error against the case of machine-epsilon
	4779	if(fabs(curData - pred)>realPrecision){
	4780	type[index] = 0;
	4781	pred = curData;
	4782	unpredictable_data[block_unpredictable_count ++] = curData;
	4783	}
	4784	}
	4785	else{
	4786	type[index] = 0;
	4787	pred = curData;
	4788	unpredictable_data[block_unpredictable_count ++] = curData;
	4789	}
	4790	// assign value to next prediction buffer
	4791	next_pb_pos[jj] = pred;
	4792	index ++;
	4793	cur_data_pos ++;
	4794	}
	4795	/dealing with the last jj (boundary)/
	4796	{
	4797	// jj == current_blockcount_y - 1
	4798	size_t jj = current_blockcount_y - 1;
	4799	curData = *cur_data_pos;
	4800	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4801	diff = curData - pred;
	4802	itvNum = fabs(diff)/realPrecision + 1;
	4803	if (itvNum < intvCapacity){
	4804	if (diff < 0) itvNum = -itvNum;
	4805	type[index] = (int) (itvNum/2) + intvRadius;
	4806	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4807	//ganrantee comporession error against the case of machine-epsilon
	4808	if(fabs(curData - pred)>realPrecision){
	4809	type[index] = 0;
	4810	pred = curData;
	4811	unpredictable_data[block_unpredictable_count ++] = curData;
	4812	}
	4813	}
	4814	else{
	4815	type[index] = 0;
	4816	pred = curData;
	4817	unpredictable_data[block_unpredictable_count ++] = curData;
	4818	}
	4819
	4820	// assign value to block surfaces
	4821	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4822	// assign value to next prediction buffer
	4823	next_pb_pos[jj] = pred;
	4824
	4825	index ++;
	4826	cur_data_pos ++;
	4827	}
	4828	} // end ii == -1
	4829	unpredictable_count = block_unpredictable_count;
	4830	total_unpred += unpredictable_count;
	4831	unpredictable_data += unpredictable_count;
	4832	reg_count ++;
	4833	}// end use_reg
	4834	else{
	4835	// use SZ
	4836	// SZ predication
	4837	unpredictable_count = 0;
	4838	float * cur_pb_pos = pb_pos;
	4839	float * cur_data_pos = data_pos;
	4840	float curData;
	4841	float pred2D;
	4842	double itvNum, diff;
	4843	size_t index = 0;
	4844	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4845	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4846	curData = *cur_data_pos;
	4847
	4848	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4849	diff = curData - pred2D;
	4850	itvNum = fabs(diff)/realPrecision + 1;
	4851	if (itvNum < intvCapacity_sz){
	4852	if (diff < 0) itvNum = -itvNum;
	4853	type[index] = (int) (itvNum/2) + intvRadius;
	4854	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4855	//ganrantee comporession error against the case of machine-epsilon
	4856	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4857	type[index] = 0;
	4858	*cur_pb_pos = curData;
	4859	unpredictable_data[unpredictable_count ++] = curData;
	4860	}
	4861	}
	4862	else{
	4863	type[index] = 0;
	4864	*cur_pb_pos = curData;
	4865	unpredictable_data[unpredictable_count ++] = curData;
	4866	}
	4867
	4868	index ++;
	4869	cur_pb_pos ++;
	4870	cur_data_pos ++;
	4871	}
	4872	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
	4873	cur_data_pos += dim0_offset - current_blockcount_y;
	4874	}
	4875	/dealing with the last ii (boundary)/
	4876	{
	4877	// ii == current_blockcount_x - 1
	4878	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4879	curData = *cur_data_pos;
	4880
	4881	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4882	diff = curData - pred2D;
	4883	itvNum = fabs(diff)/realPrecision + 1;
	4884	if (itvNum < intvCapacity_sz){
	4885	if (diff < 0) itvNum = -itvNum;
	4886	type[index] = (int) (itvNum/2) + intvRadius;
	4887	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4888	//ganrantee comporession error against the case of machine-epsilon
	4889	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4890	type[index] = 0;
	4891	*cur_pb_pos = curData;
	4892	unpredictable_data[unpredictable_count ++] = curData;
	4893	}
	4894	}
	4895	else{
	4896	type[index] = 0;
	4897	*cur_pb_pos = curData;
	4898	unpredictable_data[unpredictable_count ++] = curData;
	4899	}
	4900	next_pb_pos[jj] = *cur_pb_pos;
	4901	index ++;
	4902	cur_pb_pos ++;
	4903	cur_data_pos ++;
	4904	}
	4905	}
	4906	total_unpred += unpredictable_count;
	4907	unpredictable_data += unpredictable_count;
	4908	// change indicator
	4909	indicator_pos[j] = 1;
	4910	}// end SZ
	4911	reg_params_pos ++;
	4912	data_pos += current_blockcount_y;
	4913	pb_pos += current_blockcount_y;
	4914	next_pb_pos += current_blockcount_y;
	4915	type += current_blockcount_x * current_blockcount_y;
	4916	}// end j
	4917	indicator_pos += num_y;
	4918	float * tmp;
	4919	tmp = cur_pb_buf;
	4920	cur_pb_buf = next_pb_buf;
	4921	next_pb_buf = tmp;
	4922	}// end i
	4923	}
	4924	free(prediction_buffer_1);
	4925	free(prediction_buffer_2);
	4926
	4927	int stateNum = 2*quantization_intervals;
	4928	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	4929
	4930	size_t nodeCount = 0;
	4931	size_t i = 0;
	4932	init(huffmanTree, result_type, num_elements);
	4933	for (i = 0; i < stateNum; i++)
	4934	if (huffmanTree->code[i]) nodeCount++;
	4935	nodeCount = nodeCount*2-1;
	4936
	4937	unsigned char *treeBytes;
	4938	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	4939
	4940	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
	4941	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
	4942	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
	4943	unsigned char * result_pos = result;
	4944	initRandomAccessBytes(result_pos);
	4945	result_pos += meta_data_offset;
	4946
	4947	sizeToBytes(result_pos, num_elements);
	4948	result_pos += exe_params->SZ_SIZE_TYPE;
	4949
	4950	intToBytes_bigEndian(result_pos, block_size);
	4951	result_pos += sizeof(int);
	4952	doubleToBytes(result_pos, realPrecision);
	4953	result_pos += sizeof(double);
	4954	intToBytes_bigEndian(result_pos, quantization_intervals);
	4955	result_pos += sizeof(int);
	4956	intToBytes_bigEndian(result_pos, treeByteSize);
	4957	result_pos += sizeof(int);
	4958	intToBytes_bigEndian(result_pos, nodeCount);
	4959	result_pos += sizeof(int);
	4960	memcpy(result_pos, treeBytes, treeByteSize);
	4961	result_pos += treeByteSize;
	4962	free(treeBytes);
	4963
	4964	memcpy(result_pos, &use_mean, sizeof(unsigned char));
	4965	result_pos += sizeof(unsigned char);
	4966	memcpy(result_pos, &mean, sizeof(float));
	4967	result_pos += sizeof(float);
	4968
	4969	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
	4970	result_pos += indicator_size;
	4971
	4972	//convert the lead/mid/resi to byte stream
	4973	if(reg_count>0){
	4974	for(int e=0; e<3; e++){
	4975	int stateNum = 2*coeff_intvCapacity_sz;
	4976	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	4977	size_t nodeCount = 0;
	4978	init(huffmanTree, coeff_type[e], reg_count);
	4979	size_t i = 0;
	4980	for (i = 0; i < huffmanTree->stateNum; i++)
	4981	if (huffmanTree->code[i]) nodeCount++;
	4982	nodeCount = nodeCount*2-1;
	4983	unsigned char *treeBytes;
	4984	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	4985	doubleToBytes(result_pos, precision[e]);
	4986	result_pos += sizeof(double);
	4987	intToBytes_bigEndian(result_pos, coeff_intvRadius);
	4988	result_pos += sizeof(int);
	4989	intToBytes_bigEndian(result_pos, treeByteSize);
	4990	result_pos += sizeof(int);
	4991	intToBytes_bigEndian(result_pos, nodeCount);
	4992	result_pos += sizeof(int);
	4993	memcpy(result_pos, treeBytes, treeByteSize);
	4994	result_pos += treeByteSize;
	4995	free(treeBytes);
	4996	size_t typeArray_size = 0;
	4997	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
	4998	sizeToBytes(result_pos, typeArray_size);
	4999	result_pos += sizeof(size_t) + typeArray_size;
	5000	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
	5001	result_pos += sizeof(int);
	5002	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
	5003	result_pos += coeff_unpredictable_count[e]*sizeof(float);
	5004	SZ_ReleaseHuffman(huffmanTree);
	5005	}
	5006	}
	5007	free(coeff_result_type);
	5008	free(coeff_unpredictable_data);
	5009
	5010	//record the number of unpredictable data and also store them
	5011	memcpy(result_pos, &total_unpred, sizeof(size_t));
	5012	result_pos += sizeof(size_t);
	5013	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
	5014	result_pos += total_unpred * sizeof(float);
	5015	size_t typeArray_size = 0;
	5016	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
	5017	result_pos += typeArray_size;
	5018
	5019	size_t totalEncodeSize = result_pos - result;
	5020	free(indicator);
	5021	free(result_unpredictable_data);
	5022	free(result_type);
	5023	free(reg_params);
	5024
	5025	SZ_ReleaseHuffman(huffmanTree);
	5026	*comp_size = totalEncodeSize;
	5027
	5028	return result;
	5029	}
	5030
	5031	unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float dense_pos, float * max_freq, float * mean_freq)
	5032	{
	5033	float mean = 0.0;
	5034	size_t len = r1 * r2 * r3;
	5035	size_t mean_distance = (int) (sqrt(len));
	5036	float * data_pos = oriData;
	5037	size_t offset_count = 0;
	5038	size_t offset_count_2 = 0;
	5039	size_t mean_count = 0;
	5040	while(data_pos - oriData < len){
	5041	mean += *data_pos;
	5042	mean_count ++;
	5043	data_pos += mean_distance;
	5044	offset_count += mean_distance;
	5045	offset_count_2 += mean_distance;
	5046	if(offset_count >= r3){
	5047	offset_count = 0;
	5048	data_pos -= 1;
	5049	}
	5050	if(offset_count_2 >= r2 * r3){
	5051	offset_count_2 = 0;
	5052	data_pos -= 1;
	5053	}
	5054	}
	5055	if(mean_count > 0) mean /= mean_count;
	5056	size_t range = 8192;
	5057	size_t radius = 4096;
	5058	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
	5059	memset(freq_intervals, 0, range*sizeof(size_t));
	5060
	5061	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
	5062	int sampleDistance = confparams_cpr->sampleDistance;
	5063	float predThreshold = confparams_cpr->predThreshold;
	5064
	5065	size_t i;
	5066	size_t radiusIndex;
	5067	size_t r23=r2*r3;
	5068	float pred_value = 0, pred_err;
	5069	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
	5070	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
	5071
	5072	float mean_diff;
	5073	ptrdiff_t freq_index;
	5074	size_t freq_count = 0;
	5075	size_t sample_count = 0;
	5076
	5077	offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
	5078	data_pos = oriData + r23 + r3 + offset_count;
	5079	size_t n1_count = 1, n2_count = 1; // count i,j sum
	5080
	5081	while(data_pos - oriData < len){
	5082
	5083	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
	5084	pred_err = fabs(pred_value - *data_pos);
	5085	if(pred_err < realPrecision) freq_count ++;
	5086	radiusIndex = (pred_err/realPrecision+1)/2;
	5087	if(radiusIndex>=maxRangeRadius)
	5088	{
	5089	radiusIndex = maxRangeRadius - 1;
	5090	}
	5091	intervals[radiusIndex]++;
	5092
	5093	mean_diff = *data_pos - mean;
	5094	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
	5095	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
	5096	if(freq_index <= 0){
	5097	freq_intervals[0] ++;
	5098	}
	5099	else if(freq_index >= range){
	5100	freq_intervals[range - 1] ++;
	5101	}
	5102	else{
	5103	freq_intervals[freq_index] ++;
	5104	}
	5105	offset_count += sampleDistance;
	5106	if(offset_count >= r3){
	5107	n2_count ++;
	5108	if(n2_count == r2){
	5109	n1_count ++;
	5110	n2_count = 1;
	5111	data_pos += r3;
	5112	}
	5113	offset_count_2 = (n1_count + n2_count) % sampleDistance;
	5114	data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
	5115	offset_count = (sampleDistance - offset_count_2);
	5116	if(offset_count == 0) offset_count ++;
	5117	}
	5118	else data_pos += sampleDistance;
	5119	sample_count ++;
	5120	}
	5121	max_freq = freq_count 1.0/ sample_count;
	5122
	5123	//compute the appropriate number
	5124	size_t targetCount = sample_count*predThreshold;
	5125	size_t sum = 0;
	5126	for(i=0;i<maxRangeRadius;i++)
	5127	{
	5128	sum += intervals[i];
	5129	if(sum>targetCount)
	5130	break;
	5131	}
	5132	if(i>=maxRangeRadius)
	5133	i = maxRangeRadius-1;
	5134	unsigned int accIntervals = 2*(i+1);
	5135	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	5136
	5137	if(powerOf2<32)
	5138	powerOf2 = 32;
	5139	// collect frequency
	5140	size_t max_sum = 0;
	5141	size_t max_index = 0;
	5142	size_t tmp_sum;
	5143	size_t * freq_pos = freq_intervals + 1;
	5144	for(size_t i=1; i<range-2; i++){
	5145	tmp_sum = freq_pos[0] + freq_pos[1];
	5146	if(tmp_sum > max_sum){
	5147	max_sum = tmp_sum;
	5148	max_index = i;
	5149	}
	5150	freq_pos ++;
	5151	}
	5152	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
	5153	mean_freq = max_sum 1.0 / sample_count;
	5154
	5155	free(freq_intervals);
	5156	free(intervals);
	5157	return powerOf2;
	5158	}
	5159
	5160
	5161	// 3D: modified for higher performance
	5162	unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
	5163
	5164	#ifdef HAVE_TIMECMPR
	5165	float* decData = NULL;
	5166	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5167	decData = (float*)(multisteps->hist_data);
	5168	#endif
	5169
	5170	unsigned int quantization_intervals;
	5171	float sz_sample_correct_freq = -1;//0.5; //-1
	5172	float dense_pos;
	5173	float mean_flush_freq;
	5174	unsigned char use_mean = 0;
	5175
	5176	// calculate block dims
	5177	size_t num_x, num_y, num_z;
	5178	size_t block_size = 6;
	5179	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
	5180	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
	5181	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
	5182
	5183	size_t split_index_x, split_index_y, split_index_z;
	5184	size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
	5185	size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
	5186	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
	5187	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
	5188	SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
	5189
	5190	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
	5191	size_t num_blocks = num_x * num_y * num_z;
	5192	size_t num_elements = r1 * r2 * r3;
	5193
	5194	size_t dim0_offset = r2 * r3;
	5195	size_t dim1_offset = r3;
	5196
	5197	int * result_type = (int ) malloc(num_elements sizeof(int));
	5198	size_t unpred_data_max_size = max_num_block_elements;
	5199	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
	5200	size_t total_unpred = 0;
	5201	size_t unpredictable_count;
	5202	size_t max_unpred_count = 0;
	5203	float * data_pos = oriData;
	5204	int * type = result_type;
	5205	size_t type_offset;
	5206	size_t offset_x, offset_y, offset_z;
	5207	size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
	5208
	5209	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
	5210	float * reg_params_pos = reg_params;
	5211	// move regression part out
	5212	size_t params_offset_b = num_blocks;
	5213	size_t params_offset_c = 2*num_blocks;
	5214	size_t params_offset_d = 3*num_blocks;
	5215	for(size_t i=0; i<num_x; i++){
	5216	for(size_t j=0; j<num_y; j++){
	5217	for(size_t k=0; k<num_z; k++){
	5218	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	5219	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	5220	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	5221	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	5222	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	5223	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
	5224
	5225	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
	5226	/Calculate regression coefficients/
	5227	{
	5228	float * cur_data_pos = data_pos;
	5229	float fx = 0.0;
	5230	float fy = 0.0;
	5231	float fz = 0.0;
	5232	float f = 0;
	5233	float sum_x, sum_y;
	5234	float curData;
	5235	for(size_t i=0; i<current_blockcount_x; i++){
	5236	sum_x = 0;
	5237	for(size_t j=0; j<current_blockcount_y; j++){
	5238	sum_y = 0;
	5239	for(size_t k=0; k<current_blockcount_z; k++){
	5240	curData = *cur_data_pos;
	5241	// f += curData;
	5242	// fx += curData * i;
	5243	// fy += curData * j;
	5244	// fz += curData * k;
	5245	sum_y += curData;
	5246	fz += curData * k;
	5247	cur_data_pos ++;
	5248	}
	5249	fy += sum_y * j;
	5250	sum_x += sum_y;
	5251	cur_data_pos += dim1_offset - current_blockcount_z;
	5252	}
	5253	fx += sum_x * i;
	5254	f += sum_x;
	5255	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5256	}
	5257	float coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
	5258	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
	5259	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
	5260	reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
	5261	reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
	5262	}
	5263	reg_params_pos ++;
	5264	}
	5265	}
	5266	}
	5267
	5268	//Compress coefficient arrays
	5269	double precision_a, precision_b, precision_c, precision_d;
	5270	float rel_param_err = 0.025;
	5271	precision_a = rel_param_err * realPrecision / late_blockcount_x;
	5272	precision_b = rel_param_err * realPrecision / late_blockcount_y;
	5273	precision_c = rel_param_err * realPrecision / late_blockcount_z;
	5274	precision_d = rel_param_err * realPrecision;
	5275
	5276	if(exe_params->optQuantMode==1)
	5277	{
	5278	quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
	5279	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
	5280	updateQuantizationInfo(quantization_intervals);
	5281	}
	5282	else{
	5283	quantization_intervals = exe_params->intvCapacity;
	5284	}
	5285
	5286	float mean = 0;
	5287	if(use_mean){
	5288	// compute mean
	5289	double sum = 0.0;
	5290	size_t mean_count = 0;
	5291	for(size_t i=0; i<num_elements; i++){
	5292	if(fabs(oriData[i] - dense_pos) < realPrecision){
	5293	sum += oriData[i];
	5294	mean_count ++;
	5295	}
	5296	}
	5297	if(mean_count > 0) mean = sum / mean_count;
	5298	}
	5299
	5300	double tmp_realPrecision = realPrecision;
	5301
	5302	// use two prediction buffers for higher performance
	5303	float * unpredictable_data = result_unpredictable_data;
	5304	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
	5305	memset(indicator, 0, num_blocks * sizeof(unsigned char));
	5306	size_t reg_count = 0;
	5307	size_t strip_dim_0 = early_blockcount_x + 1;
	5308	size_t strip_dim_1 = r2 + 1;
	5309	size_t strip_dim_2 = r3 + 1;
	5310	size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
	5311	size_t strip_dim1_offset = strip_dim_2;
	5312	unsigned char * indicator_pos = indicator;
	5313
	5314	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(float);
	5315	float * prediction_buffer_1 = (float *) malloc(prediction_buffer_size);
	5316	memset(prediction_buffer_1, 0, prediction_buffer_size);
	5317	float * prediction_buffer_2 = (float *) malloc(prediction_buffer_size);
	5318	memset(prediction_buffer_2, 0, prediction_buffer_size);
	5319	float * cur_pb_buf = prediction_buffer_1;
	5320	float * next_pb_buf = prediction_buffer_2;
	5321	float * cur_pb_buf_pos;
	5322	float * next_pb_buf_pos;
	5323	int intvCapacity = exe_params->intvCapacity;
	5324	int intvRadius = exe_params->intvRadius;
	5325	int use_reg = 0;
	5326	float noise = realPrecision * 1.22;
	5327
	5328	reg_params_pos = reg_params;
	5329	// compress the regression coefficients on the fly
	5330	float last_coeffcients[4] = {0.0};
	5331	int coeff_intvCapacity_sz = 65536;
	5332	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
	5333	int * coeff_type[4];
	5334	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
	5335	float * coeff_unpred_data[4];
	5336	float * coeff_unpredictable_data = (float ) malloc(num_blocks4*sizeof(float));
	5337	double precision[4];
	5338	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
	5339	for(int i=0; i<4; i++){
	5340	coeff_type[i] = coeff_result_type + i * num_blocks;
	5341	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
	5342	}
	5343	int coeff_index = 0;
	5344	unsigned int coeff_unpredictable_count[4] = {0};
	5345
	5346	if(use_mean){
	5347	int intvCapacity_sz = intvCapacity - 2;
	5348	for(size_t i=0; i<num_x; i++){
	5349	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	5350	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	5351	for(size_t j=0; j<num_y; j++){
	5352	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	5353	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	5354	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
	5355	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
	5356	type = result_type + type_offset;
	5357
	5358	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
	5359	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
	5360	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
	5361
	5362	size_t current_blockcount_z;
	5363	float * pb_pos = cur_pb_buf_pos;
	5364	float * next_pb_pos = next_pb_buf_pos;
	5365	size_t strip_unpredictable_count = 0;
	5366	for(size_t k=0; k<num_z; k++){
	5367	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	5368	#ifdef HAVE_TIMECMPR
	5369	size_t offset_z = 0;
	5370	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
	5371	size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
	5372	#endif
	5373	/sampling and decide which predictor/
	5374	{
	5375	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	5376	float * cur_data_pos;
	5377	float curData;
	5378	float pred_reg, pred_sz;
	5379	float err_sz = 0.0, err_reg = 0.0;
	5380	int bmi = 0;
	5381	if(i>0 && j>0 && k>0){
	5382	for(int i=0; i<block_size; i++){
	5383	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	5384	curData = *cur_data_pos;
	5385	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5386	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5387	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5388	err_reg += fabs(pred_reg - curData);
	5389
	5390	bmi = block_size - i;
	5391	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	5392	curData = *cur_data_pos;
	5393	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5394	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5395	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5396	err_reg += fabs(pred_reg - curData);
	5397
	5398	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	5399	curData = *cur_data_pos;
	5400	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5401	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5402	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5403	err_reg += fabs(pred_reg - curData);
	5404
	5405	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	5406	curData = *cur_data_pos;
	5407	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5408	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5409	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5410	err_reg += fabs(pred_reg - curData);
	5411	}
	5412	}
	5413	else{
	5414	for(int i=1; i<block_size; i++){
	5415	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	5416	curData = *cur_data_pos;
	5417	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5418	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5419	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5420	err_reg += fabs(pred_reg - curData);
	5421
	5422	bmi = block_size - i;
	5423	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	5424	curData = *cur_data_pos;
	5425	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5426	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5427	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5428	err_reg += fabs(pred_reg - curData);
	5429
	5430	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	5431	curData = *cur_data_pos;
	5432	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5433	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5434	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5435	err_reg += fabs(pred_reg - curData);
	5436
	5437	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	5438	curData = *cur_data_pos;
	5439	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5440	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5441	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	5442	err_reg += fabs(pred_reg - curData);
	5443
	5444	}
	5445	}
	5446	use_reg = (err_reg < err_sz);
	5447	}
	5448	if(use_reg){
	5449	{
	5450	/predict coefficients in current block via previous reg_block/
	5451	float cur_coeff;
	5452	double diff, itvNum;
	5453	for(int e=0; e<4; e++){
	5454	cur_coeff = reg_params_pos[e*num_blocks];
	5455	diff = cur_coeff - last_coeffcients[e];
	5456	itvNum = fabs(diff)/precision[e] + 1;
	5457	if (itvNum < coeff_intvCapacity_sz){
	5458	if (diff < 0) itvNum = -itvNum;
	5459	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	5460	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	5461	//ganrantee comporession error against the case of machine-epsilon
	5462	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	5463	coeff_type[e][coeff_index] = 0;
	5464	last_coeffcients[e] = cur_coeff;
	5465	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5466	}
	5467	}
	5468	else{
	5469	coeff_type[e][coeff_index] = 0;
	5470	last_coeffcients[e] = cur_coeff;
	5471	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5472	}
	5473	}
	5474	coeff_index ++;
	5475	}
	5476	float curData;
	5477	float pred;
	5478	double itvNum;
	5479	double diff;
	5480	size_t index = 0;
	5481	size_t block_unpredictable_count = 0;
	5482	float * cur_data_pos = data_pos;
	5483	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5484	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5485	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5486	curData = *cur_data_pos;
	5487	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5488	diff = curData - pred;
	5489	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5490	if (itvNum < intvCapacity){
	5491	if (diff < 0) itvNum = -itvNum;
	5492	type[index] = (int) (itvNum/2) + intvRadius;
	5493	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5494	//ganrantee comporession error against the case of machine-epsilon
	5495	if(fabs(curData - pred)>tmp_realPrecision){
	5496	type[index] = 0;
	5497	pred = curData;
	5498	unpredictable_data[block_unpredictable_count ++] = curData;
	5499	}
	5500	}
	5501	else{
	5502	type[index] = 0;
	5503	pred = curData;
	5504	unpredictable_data[block_unpredictable_count ++] = curData;
	5505	}
	5506
	5507	#ifdef HAVE_TIMECMPR
	5508	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	5509	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5510	decData[block_offset + point_offset] = pred;
	5511	#endif
	5512
	5513	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5514	// assign value to block surfaces
	5515	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5516	}
	5517	index ++;
	5518	cur_data_pos ++;
	5519	}
	5520	cur_data_pos += dim1_offset - current_blockcount_z;
	5521	}
	5522	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5523	}
	5524	/dealing with the last ii (boundary)/
	5525	{
	5526	// ii == current_blockcount_x - 1
	5527	size_t ii = current_blockcount_x - 1;
	5528	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5529	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5530	curData = *cur_data_pos;
	5531	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5532	diff = curData - pred;
	5533	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5534	if (itvNum < intvCapacity){
	5535	if (diff < 0) itvNum = -itvNum;
	5536	type[index] = (int) (itvNum/2) + intvRadius;
	5537	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5538	//ganrantee comporession error against the case of machine-epsilon
	5539	if(fabs(curData - pred)>tmp_realPrecision){
	5540	type[index] = 0;
	5541	pred = curData;
	5542	unpredictable_data[block_unpredictable_count ++] = curData;
	5543	}
	5544	}
	5545	else{
	5546	type[index] = 0;
	5547	pred = curData;
	5548	unpredictable_data[block_unpredictable_count ++] = curData;
	5549	}
	5550
	5551	#ifdef HAVE_TIMECMPR
	5552	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	5553	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5554	decData[block_offset + point_offset] = pred;
	5555	#endif
	5556
	5557	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5558	// assign value to block surfaces
	5559	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5560	}
	5561	// assign value to next prediction buffer
	5562	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
	5563	index ++;
	5564	cur_data_pos ++;
	5565	}
	5566	cur_data_pos += dim1_offset - current_blockcount_z;
	5567	}
	5568	}
	5569	unpredictable_count = block_unpredictable_count;
	5570	strip_unpredictable_count += unpredictable_count;
	5571	unpredictable_data += unpredictable_count;
	5572
	5573	reg_count ++;
	5574	}
	5575	else{
	5576	// use SZ
	5577	// SZ predication
	5578	unpredictable_count = 0;
	5579	float * cur_pb_pos = pb_pos;
	5580	float * cur_data_pos = data_pos;
	5581	float curData;
	5582	float pred3D;
	5583	double itvNum, diff;
	5584	size_t index = 0;
	5585	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5586	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5587	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5588
	5589	curData = *cur_data_pos;
	5590	if(fabs(curData - mean) <= realPrecision){
	5591	// adjust type[index] to intvRadius for coherence with freq in reg
	5592	type[index] = intvRadius;
	5593	*cur_pb_pos = mean;
	5594	}
	5595	else
	5596	{
	5597	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	5598	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	5599	diff = curData - pred3D;
	5600	itvNum = fabs(diff)/realPrecision + 1;
	5601	if (itvNum < intvCapacity_sz){
	5602	if (diff < 0) itvNum = -itvNum;
	5603	type[index] = (int) (itvNum/2) + intvRadius;
	5604	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	5605	if(type[index] <= intvRadius) type[index] -= 1;
	5606	//ganrantee comporession error against the case of machine-epsilon
	5607	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	5608	type[index] = 0;
	5609	*cur_pb_pos = curData;
	5610	unpredictable_data[unpredictable_count ++] = curData;
	5611	}
	5612	}
	5613	else{
	5614	type[index] = 0;
	5615	*cur_pb_pos = curData;
	5616	unpredictable_data[unpredictable_count ++] = curData;
	5617	}
	5618	}
	5619	#ifdef HAVE_TIMECMPR
	5620	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	5621	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5622	decData[block_offset + point_offset] = *cur_pb_pos;
	5623	#endif
	5624
	5625	index ++;
	5626	cur_pb_pos ++;
	5627	cur_data_pos ++;
	5628	}
	5629	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	5630	cur_data_pos += dim1_offset - current_blockcount_z;
	5631	}
	5632	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
	5633	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5634	}
	5635	/dealing with the last ii (boundary)/
	5636	{
	5637	// ii == current_blockcount_x - 1
	5638	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5639	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5640
	5641	curData = *cur_data_pos;
	5642	if(fabs(curData - mean) <= realPrecision){
	5643	// adjust type[index] to intvRadius for coherence with freq in reg
	5644	type[index] = intvRadius;
	5645	*cur_pb_pos = mean;
	5646	}
	5647	else
	5648	{
	5649	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	5650	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	5651	diff = curData - pred3D;
	5652	itvNum = fabs(diff)/realPrecision + 1;
	5653	if (itvNum < intvCapacity_sz){
	5654	if (diff < 0) itvNum = -itvNum;
	5655	type[index] = (int) (itvNum/2) + intvRadius;
	5656	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	5657	if(type[index] <= intvRadius) type[index] -= 1;
	5658	//ganrantee comporession error against the case of machine-epsilon
	5659	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	5660	type[index] = 0;
	5661	*cur_pb_pos = curData;
	5662	unpredictable_data[unpredictable_count ++] = curData;
	5663	}
	5664	}
	5665	else{
	5666	type[index] = 0;
	5667	*cur_pb_pos = curData;
	5668	unpredictable_data[unpredictable_count ++] = curData;
	5669	}
	5670	}
	5671	#ifdef HAVE_TIMECMPR
	5672	size_t ii = current_blockcount_x - 1;
	5673	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	5674	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5675	decData[block_offset + point_offset] = *cur_pb_pos;
	5676	#endif
	5677
	5678	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
	5679	index ++;
	5680	cur_pb_pos ++;
	5681	cur_data_pos ++;
	5682	}
	5683	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	5684	cur_data_pos += dim1_offset - current_blockcount_z;
	5685	}
	5686	}
	5687	strip_unpredictable_count += unpredictable_count;
	5688	unpredictable_data += unpredictable_count;
	5689	// change indicator
	5690	indicator_pos[k] = 1;
	5691	}// end SZ
	5692
	5693	reg_params_pos ++;
	5694	data_pos += current_blockcount_z;
	5695	pb_pos += current_blockcount_z;
	5696	next_pb_pos += current_blockcount_z;
	5697	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
	5698
	5699	} // end k
	5700
	5701	if(strip_unpredictable_count > max_unpred_count){
	5702	max_unpred_count = strip_unpredictable_count;
	5703	}
	5704	total_unpred += strip_unpredictable_count;
	5705	indicator_pos += num_z;
	5706	}// end j
	5707	float * tmp;
	5708	tmp = cur_pb_buf;
	5709	cur_pb_buf = next_pb_buf;
	5710	next_pb_buf = tmp;
	5711	}// end i
	5712	}
	5713	else{
	5714	int intvCapacity_sz = intvCapacity - 2;
	5715	for(size_t i=0; i<num_x; i++){
	5716	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	5717	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	5718
	5719	for(size_t j=0; j<num_y; j++){
	5720	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	5721	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	5722	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
	5723	// copy bottom plane from plane buffer
	5724	// memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(float));
	5725	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
	5726	type = result_type + type_offset;
	5727
	5728	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
	5729	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
	5730	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
	5731
	5732	size_t current_blockcount_z;
	5733	float * pb_pos = cur_pb_buf_pos;
	5734	float * next_pb_pos = next_pb_buf_pos;
	5735	size_t strip_unpredictable_count = 0;
	5736	for(size_t k=0; k<num_z; k++){
	5737	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	5738	#ifdef HAVE_TIMECMPR
	5739	size_t offset_z = 0;
	5740	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
	5741	size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
	5742	#endif
	5743	/sampling/
	5744	{
	5745	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	5746	float * cur_data_pos;
	5747	float curData;
	5748	float pred_reg, pred_sz;
	5749	float err_sz = 0.0, err_reg = 0.0;
	5750	int bmi;
	5751	if(i>0 && j>0 && k>0){
	5752	for(int i=0; i<block_size; i++){
	5753	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	5754	curData = *cur_data_pos;
	5755	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5756	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5757	err_sz += fabs(pred_sz - curData) + noise;
	5758	err_reg += fabs(pred_reg - curData);
	5759
	5760	bmi = block_size - i;
	5761	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	5762	curData = *cur_data_pos;
	5763	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5764	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5765	err_sz += fabs(pred_sz - curData) + noise;
	5766	err_reg += fabs(pred_reg - curData);
	5767
	5768	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	5769	curData = *cur_data_pos;
	5770	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5771	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5772	err_sz += fabs(pred_sz - curData) + noise;
	5773	err_reg += fabs(pred_reg - curData);
	5774
	5775	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	5776	curData = *cur_data_pos;
	5777	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5778	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5779	err_sz += fabs(pred_sz - curData) + noise;
	5780	err_reg += fabs(pred_reg - curData);
	5781	}
	5782	}
	5783	else{
	5784	for(int i=1; i<block_size; i++){
	5785	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	5786	curData = *cur_data_pos;
	5787	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5788	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5789	err_sz += fabs(pred_sz - curData) + noise;
	5790	err_reg += fabs(pred_reg - curData);
	5791
	5792	bmi = block_size - i;
	5793	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	5794	curData = *cur_data_pos;
	5795	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5796	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5797	err_sz += fabs(pred_sz - curData) + noise;
	5798	err_reg += fabs(pred_reg - curData);
	5799
	5800	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	5801	curData = *cur_data_pos;
	5802	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5803	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	5804	err_sz += fabs(pred_sz - curData) + noise;
	5805	err_reg += fabs(pred_reg - curData);
	5806
	5807	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	5808	curData = *cur_data_pos;
	5809	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	5810	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	5811	err_sz += fabs(pred_sz - curData) + noise;
	5812	err_reg += fabs(pred_reg - curData);
	5813	}
	5814	}
	5815	use_reg = (err_reg < err_sz);
	5816
	5817	}
	5818	if(use_reg)
	5819	{
	5820	{
	5821	/predict coefficients in current block via previous reg_block/
	5822	float cur_coeff;
	5823	double diff, itvNum;
	5824	for(int e=0; e<4; e++){
	5825	cur_coeff = reg_params_pos[e*num_blocks];
	5826	diff = cur_coeff - last_coeffcients[e];
	5827	itvNum = fabs(diff)/precision[e] + 1;
	5828	if (itvNum < coeff_intvCapacity_sz){
	5829	if (diff < 0) itvNum = -itvNum;
	5830	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	5831	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	5832	//ganrantee comporession error against the case of machine-epsilon
	5833	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	5834	coeff_type[e][coeff_index] = 0;
	5835	last_coeffcients[e] = cur_coeff;
	5836	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5837	}
	5838	}
	5839	else{
	5840	coeff_type[e][coeff_index] = 0;
	5841	last_coeffcients[e] = cur_coeff;
	5842	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5843	}
	5844	}
	5845	coeff_index ++;
	5846	}
	5847	float curData;
	5848	float pred;
	5849	double itvNum;
	5850	double diff;
	5851	size_t index = 0;
	5852	size_t block_unpredictable_count = 0;
	5853	float * cur_data_pos = data_pos;
	5854	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5855	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5856	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5857
	5858	curData = *cur_data_pos;
	5859	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5860	diff = curData - pred;
	5861	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5862	if (itvNum < intvCapacity){
	5863	if (diff < 0) itvNum = -itvNum;
	5864	type[index] = (int) (itvNum/2) + intvRadius;
	5865	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5866	//ganrantee comporession error against the case of machine-epsilon
	5867	if(fabs(curData - pred)>tmp_realPrecision){
	5868	type[index] = 0;
	5869	pred = curData;
	5870	unpredictable_data[block_unpredictable_count ++] = curData;
	5871	}
	5872	}
	5873	else{
	5874	type[index] = 0;
	5875	pred = curData;
	5876	unpredictable_data[block_unpredictable_count ++] = curData;
	5877	}
	5878
	5879	#ifdef HAVE_TIMECMPR
	5880	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	5881	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5882	decData[block_offset + point_offset] = pred;
	5883	#endif
	5884
	5885
	5886	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5887	// assign value to block surfaces
	5888	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5889	}
	5890	index ++;
	5891	cur_data_pos ++;
	5892	}
	5893	cur_data_pos += dim1_offset - current_blockcount_z;
	5894	}
	5895	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5896	}
	5897	/dealing with the last ii (boundary)/
	5898	{
	5899	// ii == current_blockcount_x - 1
	5900	size_t ii = current_blockcount_x - 1;
	5901	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5902	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5903	curData = *cur_data_pos;
	5904	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5905	diff = curData - pred;
	5906	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5907	if (itvNum < intvCapacity){
	5908	if (diff < 0) itvNum = -itvNum;
	5909	type[index] = (int) (itvNum/2) + intvRadius;
	5910	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5911	//ganrantee comporession error against the case of machine-epsilon
	5912	if(fabs(curData - pred)>tmp_realPrecision){
	5913	type[index] = 0;
	5914	pred = curData;
	5915	unpredictable_data[block_unpredictable_count ++] = curData;
	5916	}
	5917	}
	5918	else{
	5919	type[index] = 0;
	5920	pred = curData;
	5921	unpredictable_data[block_unpredictable_count ++] = curData;
	5922	}
	5923
	5924	#ifdef HAVE_TIMECMPR
	5925	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	5926	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5927	decData[block_offset + point_offset] = pred;
	5928	#endif
	5929
	5930	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5931	// assign value to block surfaces
	5932	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5933	}
	5934	// assign value to next prediction buffer
	5935	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
	5936	index ++;
	5937	cur_data_pos ++;
	5938	}
	5939	cur_data_pos += dim1_offset - current_blockcount_z;
	5940	}
	5941	}
	5942	unpredictable_count = block_unpredictable_count;
	5943	strip_unpredictable_count += unpredictable_count;
	5944	unpredictable_data += unpredictable_count;
	5945	reg_count ++;
	5946	}
	5947	else{
	5948	// use SZ
	5949	// SZ predication
	5950	unpredictable_count = 0;
	5951	float * cur_pb_pos = pb_pos;
	5952	float * cur_data_pos = data_pos;
	5953	float curData;
	5954	float pred3D;
	5955	double itvNum, diff;
	5956	size_t index = 0;
	5957	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5958	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5959	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5960
	5961	curData = *cur_data_pos;
	5962	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	5963	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	5964	diff = curData - pred3D;
	5965	itvNum = fabs(diff)/realPrecision + 1;
	5966	if (itvNum < intvCapacity_sz){
	5967	if (diff < 0) itvNum = -itvNum;
	5968	type[index] = (int) (itvNum/2) + intvRadius;
	5969	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	5970	//ganrantee comporession error against the case of machine-epsilon
	5971	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	5972	type[index] = 0;
	5973	*cur_pb_pos = curData;
	5974	unpredictable_data[unpredictable_count ++] = curData;
	5975	}
	5976	}
	5977	else{
	5978	type[index] = 0;
	5979	*cur_pb_pos = curData;
	5980	unpredictable_data[unpredictable_count ++] = curData;
	5981	}
	5982
	5983	#ifdef HAVE_TIMECMPR
	5984	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	5985	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	5986	decData[block_offset + point_offset] = *cur_pb_pos;
	5987	#endif
	5988	index ++;
	5989	cur_pb_pos ++;
	5990	cur_data_pos ++;
	5991	}
	5992	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	5993	cur_data_pos += dim1_offset - current_blockcount_z;
	5994	}
	5995	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
	5996	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5997	}
	5998	/dealing with the last ii (boundary)/
	5999	{
	6000	// ii == current_blockcount_x - 1
	6001	for(size_t jj=0; jj<current_blockcount_y; jj++){
	6002	for(size_t kk=0; kk<current_blockcount_z; kk++){
	6003
	6004	curData = *cur_data_pos;
	6005	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	6006	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6007	diff = curData - pred3D;
	6008	itvNum = fabs(diff)/realPrecision + 1;
	6009	if (itvNum < intvCapacity_sz){
	6010	if (diff < 0) itvNum = -itvNum;
	6011	type[index] = (int) (itvNum/2) + intvRadius;
	6012	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	6013	//ganrantee comporession error against the case of machine-epsilon
	6014	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	6015	type[index] = 0;
	6016	*cur_pb_pos = curData;
	6017	unpredictable_data[unpredictable_count ++] = curData;
	6018	}
	6019	}
	6020	else{
	6021	type[index] = 0;
	6022	*cur_pb_pos = curData;
	6023	unpredictable_data[unpredictable_count ++] = curData;
	6024	}
	6025
	6026	#ifdef HAVE_TIMECMPR
	6027	size_t ii = current_blockcount_x - 1;
	6028	size_t point_offset = iidim0_offset + jjdim1_offset + kk;
	6029	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	6030	decData[block_offset + point_offset] = *cur_pb_pos;
	6031	#endif
	6032
	6033	// assign value to next prediction buffer
	6034	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
	6035	index ++;
	6036	cur_pb_pos ++;
	6037	cur_data_pos ++;
	6038	}
	6039	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	6040	cur_data_pos += dim1_offset - current_blockcount_z;
	6041	}
	6042	}
	6043	strip_unpredictable_count += unpredictable_count;
	6044	unpredictable_data += unpredictable_count;
	6045	// change indicator
	6046	indicator_pos[k] = 1;
	6047	}// end SZ
	6048
	6049	reg_params_pos ++;
	6050	data_pos += current_blockcount_z;
	6051	pb_pos += current_blockcount_z;
	6052	next_pb_pos += current_blockcount_z;
	6053	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
	6054
	6055	}
	6056
	6057	if(strip_unpredictable_count > max_unpred_count){
	6058	max_unpred_count = strip_unpredictable_count;
	6059	}
	6060	total_unpred += strip_unpredictable_count;
	6061	indicator_pos += num_z;
	6062	}
	6063	float * tmp;
	6064	tmp = cur_pb_buf;
	6065	cur_pb_buf = next_pb_buf;
	6066	next_pb_buf = tmp;
	6067	}
	6068	}
	6069
	6070	free(prediction_buffer_1);
	6071	free(prediction_buffer_2);
	6072
	6073	int stateNum = 2*quantization_intervals;
	6074	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	6075
	6076	size_t nodeCount = 0;
	6077	init(huffmanTree, result_type, num_elements);
	6078	size_t i = 0;
	6079	for (i = 0; i < huffmanTree->stateNum; i++)
	6080	if (huffmanTree->code[i]) nodeCount++;
	6081	nodeCount = nodeCount*2-1;
	6082
	6083	unsigned char *treeBytes;
	6084	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	6085
	6086	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
	6087	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
	6088	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
	6089	unsigned char * result_pos = result;
	6090	initRandomAccessBytes(result_pos);
	6091
	6092	result_pos += meta_data_offset;
	6093
	6094	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
	6095	result_pos += exe_params->SZ_SIZE_TYPE;
	6096
	6097	intToBytes_bigEndian(result_pos, block_size);
	6098	result_pos += sizeof(int);
	6099	doubleToBytes(result_pos, realPrecision);
	6100	result_pos += sizeof(double);
	6101	intToBytes_bigEndian(result_pos, quantization_intervals);
	6102	result_pos += sizeof(int);
	6103	intToBytes_bigEndian(result_pos, treeByteSize);
	6104	result_pos += sizeof(int);
	6105	intToBytes_bigEndian(result_pos, nodeCount);
	6106	result_pos += sizeof(int);
	6107	memcpy(result_pos, treeBytes, treeByteSize);
	6108	result_pos += treeByteSize;
	6109	free(treeBytes);
	6110
	6111	memcpy(result_pos, &use_mean, sizeof(unsigned char));
	6112	result_pos += sizeof(unsigned char);
	6113	memcpy(result_pos, &mean, sizeof(float));
	6114	result_pos += sizeof(float);
	6115	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
	6116	result_pos += indicator_size;
	6117
	6118	//convert the lead/mid/resi to byte stream
	6119	if(reg_count > 0){
	6120	for(int e=0; e<4; e++){
	6121	int stateNum = 2*coeff_intvCapacity_sz;
	6122	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	6123	size_t nodeCount = 0;
	6124	init(huffmanTree, coeff_type[e], reg_count);
	6125	size_t i = 0;
	6126	for (i = 0; i < huffmanTree->stateNum; i++)
	6127	if (huffmanTree->code[i]) nodeCount++;
	6128	nodeCount = nodeCount*2-1;
	6129	unsigned char *treeBytes;
	6130	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	6131	doubleToBytes(result_pos, precision[e]);
	6132	result_pos += sizeof(double);
	6133	intToBytes_bigEndian(result_pos, coeff_intvRadius);
	6134	result_pos += sizeof(int);
	6135	intToBytes_bigEndian(result_pos, treeByteSize);
	6136	result_pos += sizeof(int);
	6137	intToBytes_bigEndian(result_pos, nodeCount);
	6138	result_pos += sizeof(int);
	6139	memcpy(result_pos, treeBytes, treeByteSize);
	6140	result_pos += treeByteSize;
	6141	free(treeBytes);
	6142	size_t typeArray_size = 0;
	6143	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
	6144	sizeToBytes(result_pos, typeArray_size);
	6145	result_pos += sizeof(size_t) + typeArray_size;
	6146	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
	6147	result_pos += sizeof(int);
	6148	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
	6149	result_pos += coeff_unpredictable_count[e]*sizeof(float);
	6150	SZ_ReleaseHuffman(huffmanTree);
	6151	}
	6152	}
	6153	free(coeff_result_type);
	6154	free(coeff_unpredictable_data);
	6155
	6156	//record the number of unpredictable data and also store them
	6157	memcpy(result_pos, &total_unpred, sizeof(size_t));
	6158	result_pos += sizeof(size_t);
	6159	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
	6160	result_pos += total_unpred * sizeof(float);
	6161	size_t typeArray_size = 0;
	6162	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
	6163	result_pos += typeArray_size;
	6164	size_t totalEncodeSize = result_pos - result;
	6165	free(indicator);
	6166	free(result_unpredictable_data);
	6167	free(result_type);
	6168	free(reg_params);
	6169
	6170
	6171	SZ_ReleaseHuffman(huffmanTree);
	6172	*comp_size = totalEncodeSize;
	6173	return result;
	6174	}
	6175
	6176	unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(float oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
	6177
	6178	unsigned int quantization_intervals;
	6179	float sz_sample_correct_freq = -1;//0.5; //-1
	6180	float dense_pos;
	6181	float mean_flush_freq;
	6182	unsigned char use_mean = 0;
	6183
	6184	// calculate block dims
	6185	size_t num_x, num_y, num_z;
	6186	size_t block_size = 6;
	6187	num_x = (r1 - 1) / block_size + 1;
	6188	num_y = (r2 - 1) / block_size + 1;
	6189	num_z = (r3 - 1) / block_size + 1;
	6190
	6191	size_t max_num_block_elements = block_size * block_size * block_size;
	6192	size_t num_blocks = num_x * num_y * num_z;
	6193	size_t num_elements = r1 * r2 * r3;
	6194
	6195	size_t dim0_offset = r2 * r3;
	6196	size_t dim1_offset = r3;
	6197
	6198	int * result_type = (int ) malloc(num_blocksmax_num_block_elements * sizeof(int));
	6199	size_t unpred_data_max_size = max_num_block_elements;
	6200	float * result_unpredictable_data = (float ) malloc(unpred_data_max_size sizeof(float) * num_blocks);
	6201	size_t total_unpred = 0;
	6202	size_t unpredictable_count;
	6203	float * data_pos = oriData;
	6204	int * type = result_type;
	6205	float * reg_params = (float ) malloc(num_blocks 4 * sizeof(float));
	6206	float * reg_params_pos = reg_params;
	6207	// move regression part out
	6208	size_t params_offset_b = num_blocks;
	6209	size_t params_offset_c = 2*num_blocks;
	6210	size_t params_offset_d = 3*num_blocks;
	6211	float * pred_buffer = (float ) malloc((block_size+1)(block_size+1)(block_size+1)sizeof(float));
	6212	float * pred_buffer_pos = NULL;
	6213	float * block_data_pos_x = NULL;
	6214	float * block_data_pos_y = NULL;
	6215	float * block_data_pos_z = NULL;
	6216	for(size_t i=0; i<num_x; i++){
	6217	for(size_t j=0; j<num_y; j++){
	6218	for(size_t k=0; k<num_z; k++){
	6219	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
	6220	pred_buffer_pos = pred_buffer;
	6221	block_data_pos_x = data_pos;
	6222	// use the buffer as block_sizeblock_sizeblock_size
	6223	for(int ii=0; ii<block_size; ii++){
	6224	block_data_pos_y = block_data_pos_x;
	6225	for(int jj=0; jj<block_size; jj++){
	6226	block_data_pos_z = block_data_pos_y;
	6227	for(int kk=0; kk<block_size; kk++){
	6228	pred_buffer_pos = block_data_pos_z;
	6229	if(k*block_size + kk + 1 < r3) block_data_pos_z ++;
	6230	pred_buffer_pos ++;
	6231	}
	6232	if(j*block_size + jj + 1 < r2) block_data_pos_y += dim1_offset;
	6233	}
	6234	if(i*block_size + ii + 1 < r1) block_data_pos_x += dim0_offset;
	6235	}
	6236	/Calculate regression coefficients/
	6237	{
	6238	float * cur_data_pos = pred_buffer;
	6239	float fx = 0.0;
	6240	float fy = 0.0;
	6241	float fz = 0.0;
	6242	float f = 0;
	6243	float sum_x, sum_y;
	6244	float curData;
	6245	for(size_t i=0; i<block_size; i++){
	6246	sum_x = 0;
	6247	for(size_t j=0; j<block_size; j++){
	6248	sum_y = 0;
	6249	for(size_t k=0; k<block_size; k++){
	6250	curData = *cur_data_pos;
	6251	sum_y += curData;
	6252	fz += curData * k;
	6253	cur_data_pos ++;
	6254	}
	6255	fy += sum_y * j;
	6256	sum_x += sum_y;
	6257	}
	6258	fx += sum_x * i;
	6259	f += sum_x;
	6260	}
	6261	float coeff = 1.0 / (block_size * block_size * block_size);
	6262	reg_params_pos[0] = (2 * fx / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
	6263	reg_params_pos[params_offset_b] = (2 * fy / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
	6264	reg_params_pos[params_offset_c] = (2 * fz / (block_size - 1) - f) * 6 * coeff / (block_size + 1);
	6265	reg_params_pos[params_offset_d] = f * coeff - ((block_size - 1) * reg_params_pos[0] / 2 + (block_size - 1) * reg_params_pos[params_offset_b] / 2 + (block_size - 1) * reg_params_pos[params_offset_c] / 2);
	6266	}
	6267	reg_params_pos ++;
	6268	}
	6269	}
	6270	}
	6271
	6272	//Compress coefficient arrays
	6273	double precision_a, precision_b, precision_c, precision_d;
	6274	float rel_param_err = 0.025;
	6275	precision_a = rel_param_err * realPrecision / block_size;
	6276	precision_b = rel_param_err * realPrecision / block_size;
	6277	precision_c = rel_param_err * realPrecision / block_size;
	6278	precision_d = rel_param_err * realPrecision;
	6279
	6280	if(exe_params->optQuantMode==1)
	6281	{
	6282	quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
	6283	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
	6284	updateQuantizationInfo(quantization_intervals);
	6285	}
	6286	else{
	6287	quantization_intervals = exe_params->intvCapacity;
	6288	}
	6289
	6290	float mean = 0;
	6291	if(use_mean){
	6292	// compute mean
	6293	double sum = 0.0;
	6294	size_t mean_count = 0;
	6295	for(size_t i=0; i<num_elements; i++){
	6296	if(fabs(oriData[i] - dense_pos) < realPrecision){
	6297	sum += oriData[i];
	6298	mean_count ++;
	6299	}
	6300	}
	6301	if(mean_count > 0) mean = sum / mean_count;
	6302	}
	6303
	6304	double tmp_realPrecision = realPrecision;
	6305
	6306	// use two prediction buffers for higher performance
	6307	float * unpredictable_data = result_unpredictable_data;
	6308	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
	6309	memset(indicator, 0, num_blocks * sizeof(unsigned char));
	6310	size_t reg_count = 0;
	6311	unsigned char * indicator_pos = indicator;
	6312
	6313	int intvCapacity = exe_params->intvCapacity;
	6314	int intvRadius = exe_params->intvRadius;
	6315	int use_reg = 0;
	6316	float noise = realPrecision * 1.22;
	6317
	6318	reg_params_pos = reg_params;
	6319	// compress the regression coefficients on the fly
	6320	float last_coeffcients[4] = {0.0};
	6321	int coeff_intvCapacity_sz = 65536;
	6322	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
	6323	int * coeff_type[4];
	6324	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
	6325	float * coeff_unpred_data[4];
	6326	float * coeff_unpredictable_data = (float ) malloc(num_blocks4*sizeof(float));
	6327	double precision[4];
	6328	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
	6329	for(int i=0; i<4; i++){
	6330	coeff_type[i] = coeff_result_type + i * num_blocks;
	6331	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
	6332	}
	6333	int coeff_index = 0;
	6334	unsigned int coeff_unpredictable_count[4] = {0};
	6335
	6336	memset(pred_buffer, 0, (block_size+1)(block_size+1)(block_size+1)*sizeof(float));
	6337	int pred_buffer_block_size = block_size + 1;
	6338	int strip_dim0_offset = pred_buffer_block_size * pred_buffer_block_size;
	6339	int strip_dim1_offset = pred_buffer_block_size;
	6340
	6341	if(use_mean){
	6342	int intvCapacity_sz = intvCapacity - 2;
	6343	type = result_type;
	6344	for(size_t i=0; i<num_x; i++){
	6345	for(size_t j=0; j<num_y; j++){
	6346	for(size_t k=0; k<num_z; k++){
	6347	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
	6348	// add 1 in x, y, z offset
	6349	pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
	6350	block_data_pos_x = data_pos;
	6351	for(int ii=0; ii<block_size; ii++){
	6352	block_data_pos_y = block_data_pos_x;
	6353	for(int jj=0; jj<block_size; jj++){
	6354	block_data_pos_z = block_data_pos_y;
	6355	for(int kk=0; kk<block_size; kk++){
	6356	pred_buffer_pos = block_data_pos_z;
	6357	if(k*block_size + kk + 1< r3) block_data_pos_z ++;
	6358	pred_buffer_pos ++;
	6359	}
	6360	// add 1 in z offset
	6361	pred_buffer_pos ++;
	6362	if(j*block_size + jj + 1< r2) block_data_pos_y += dim1_offset;
	6363	}
	6364	// add 1 in y offset
	6365	pred_buffer_pos += pred_buffer_block_size;
	6366	if(i*block_size + ii + 1< r1) block_data_pos_x += dim0_offset;
	6367	}
	6368	/sampling and decide which predictor/
	6369	{
	6370	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	6371	float * cur_data_pos;
	6372	float curData;
	6373	float pred_reg, pred_sz;
	6374	float err_sz = 0.0, err_reg = 0.0;
	6375	int bmi = 0;
	6376	for(int i=2; i<=block_size; i++){
	6377	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + i;
	6378	curData = *cur_data_pos;
	6379	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6380	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	6381	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	6382	err_reg += fabs(pred_reg - curData);
	6383
	6384	bmi = block_size - i;
	6385	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + bmi;
	6386	curData = *cur_data_pos;
	6387	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6388	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	6389	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	6390	err_reg += fabs(pred_reg - curData);
	6391
	6392	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + i;
	6393	curData = *cur_data_pos;
	6394	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6395	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	6396	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	6397	err_reg += fabs(pred_reg - curData);
	6398
	6399	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
	6400	curData = *cur_data_pos;
	6401	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6402	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	6403	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	6404	err_reg += fabs(pred_reg - curData);
	6405	}
	6406
	6407	use_reg = (err_reg < err_sz);
	6408	}
	6409	if(use_reg){
	6410	{
	6411	/predict coefficients in current block via previous reg_block/
	6412	float cur_coeff;
	6413	double diff, itvNum;
	6414	for(int e=0; e<4; e++){
	6415	cur_coeff = reg_params_pos[e*num_blocks];
	6416	diff = cur_coeff - last_coeffcients[e];
	6417	itvNum = fabs(diff)/precision[e] + 1;
	6418	if (itvNum < coeff_intvCapacity_sz){
	6419	if (diff < 0) itvNum = -itvNum;
	6420	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	6421	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	6422	//ganrantee comporession error against the case of machine-epsilon
	6423	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	6424	coeff_type[e][coeff_index] = 0;
	6425	last_coeffcients[e] = cur_coeff;
	6426	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	6427	}
	6428	}
	6429	else{
	6430	coeff_type[e][coeff_index] = 0;
	6431	last_coeffcients[e] = cur_coeff;
	6432	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	6433	}
	6434	}
	6435	coeff_index ++;
	6436	}
	6437	float curData;
	6438	float pred;
	6439	double itvNum;
	6440	double diff;
	6441	size_t index = 0;
	6442	size_t block_unpredictable_count = 0;
	6443	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
	6444	for(size_t ii=0; ii<block_size; ii++){
	6445	for(size_t jj=0; jj<block_size; jj++){
	6446	for(size_t kk=0; kk<block_size; kk++){
	6447	curData = *cur_data_pos;
	6448	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	6449	diff = curData - pred;
	6450	itvNum = fabs(diff)/tmp_realPrecision + 1;
	6451	if (itvNum < intvCapacity){
	6452	if (diff < 0) itvNum = -itvNum;
	6453	type[index] = (int) (itvNum/2) + intvRadius;
	6454	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	6455	//ganrantee comporession error against the case of machine-epsilon
	6456	if(fabs(curData - pred)>tmp_realPrecision){
	6457	type[index] = 0;
	6458	pred = curData;
	6459	unpredictable_data[block_unpredictable_count ++] = curData;
	6460	}
	6461	}
	6462	else{
	6463	type[index] = 0;
	6464	pred = curData;
	6465	unpredictable_data[block_unpredictable_count ++] = curData;
	6466	}
	6467	index ++;
	6468	cur_data_pos ++;
	6469	}
	6470	cur_data_pos ++;
	6471	}
	6472	cur_data_pos += pred_buffer_block_size;
	6473	}
	6474
	6475	total_unpred += block_unpredictable_count;
	6476	unpredictable_data += block_unpredictable_count;
	6477	reg_count ++;
	6478	}
	6479	else{
	6480	// use SZ
	6481	// SZ predication
	6482	unpredictable_count = 0;
	6483	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
	6484	float curData;
	6485	float pred3D;
	6486	double itvNum, diff;
	6487	size_t index = 0;
	6488	for(size_t ii=0; ii<block_size; ii++){
	6489	for(size_t jj=0; jj<block_size; jj++){
	6490	for(size_t kk=0; kk<block_size; kk++){
	6491
	6492	curData = *cur_data_pos;
	6493	if(fabs(curData - mean) <= realPrecision){
	6494	type[index] = 1;
	6495	*cur_data_pos = mean;
	6496	}
	6497	else
	6498	{
	6499	pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
	6500	- cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6501	diff = curData - pred3D;
	6502	itvNum = fabs(diff)/realPrecision + 1;
	6503	if (itvNum < intvCapacity_sz){
	6504	if (diff < 0) itvNum = -itvNum;
	6505	type[index] = (int) (itvNum/2) + intvRadius;
	6506	cur_data_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	6507	//ganrantee comporession error against the case of machine-epsilon
	6508	if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
	6509	type[index] = 0;
	6510	*cur_data_pos = curData;
	6511	unpredictable_data[unpredictable_count ++] = curData;
	6512	}
	6513	}
	6514	else{
	6515	type[index] = 0;
	6516	*cur_data_pos = curData;
	6517	unpredictable_data[unpredictable_count ++] = curData;
	6518	}
	6519	}
	6520	index ++;
	6521	cur_data_pos ++;
	6522	}
	6523	cur_data_pos ++;
	6524	}
	6525	cur_data_pos += pred_buffer_block_size;
	6526	}
	6527	total_unpred += unpredictable_count;
	6528	unpredictable_data += unpredictable_count;
	6529	// change indicator
	6530	indicator_pos[k] = 1;
	6531	}// end SZ
	6532	reg_params_pos ++;
	6533	type += block_size * block_size * block_size;
	6534	} // end k
	6535	indicator_pos += num_z;
	6536	}// end j
	6537	}// end i
	6538	}
	6539	else{
	6540	int intvCapacity_sz = intvCapacity - 2;
	6541	type = result_type;
	6542	for(size_t i=0; i<num_x; i++){
	6543	for(size_t j=0; j<num_y; j++){
	6544	for(size_t k=0; k<num_z; k++){
	6545	data_pos = oriData + iblock_size dim0_offset + jblock_size dim1_offset + k*block_size;
	6546	// add 1 in x, y, z offset
	6547	pred_buffer_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
	6548	block_data_pos_x = data_pos;
	6549	for(int ii=0; ii<block_size; ii++){
	6550	block_data_pos_y = block_data_pos_x;
	6551	for(int jj=0; jj<block_size; jj++){
	6552	block_data_pos_z = block_data_pos_y;
	6553	for(int kk=0; kk<block_size; kk++){
	6554	pred_buffer_pos = block_data_pos_z;
	6555	if(k*block_size + kk < r3) block_data_pos_z ++;
	6556	pred_buffer_pos ++;
	6557	}
	6558	// add 1 in z offset
	6559	pred_buffer_pos ++;
	6560	if(j*block_size + jj < r2) block_data_pos_y += dim1_offset;
	6561	}
	6562	// add 1 in y offset
	6563	pred_buffer_pos += pred_buffer_block_size;
	6564	if(i*block_size + ii < r1) block_data_pos_x += dim0_offset;
	6565	}
	6566	/sampling/
	6567	{
	6568	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	6569	float * cur_data_pos;
	6570	float curData;
	6571	float pred_reg, pred_sz;
	6572	float err_sz = 0.0, err_reg = 0.0;
	6573	int bmi;
	6574	for(int i=2; i<=block_size; i++){
	6575	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + i;
	6576	curData = *cur_data_pos;
	6577	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6578	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	6579	err_sz += fabs(pred_sz - curData) + noise;
	6580	err_reg += fabs(pred_reg - curData);
	6581
	6582	bmi = block_size - i;
	6583	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + i*pred_buffer_block_size + bmi;
	6584	curData = *cur_data_pos;
	6585	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6586	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	6587	err_sz += fabs(pred_sz - curData) + noise;
	6588	err_reg += fabs(pred_reg - curData);
	6589
	6590	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + i;
	6591	curData = *cur_data_pos;
	6592	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6593	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	6594	err_sz += fabs(pred_sz - curData) + noise;
	6595	err_reg += fabs(pred_reg - curData);
	6596
	6597	cur_data_pos = pred_buffer + ipred_buffer_block_sizepred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
	6598	curData = *cur_data_pos;
	6599	pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6600	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	6601	err_sz += fabs(pred_sz - curData) + noise;
	6602	err_reg += fabs(pred_reg - curData);
	6603	}
	6604
	6605	use_reg = (err_reg < err_sz);
	6606
	6607	}
	6608	if(use_reg)
	6609	{
	6610	{
	6611	/predict coefficients in current block via previous reg_block/
	6612	float cur_coeff;
	6613	double diff, itvNum;
	6614	for(int e=0; e<4; e++){
	6615	cur_coeff = reg_params_pos[e*num_blocks];
	6616	diff = cur_coeff - last_coeffcients[e];
	6617	itvNum = fabs(diff)/precision[e] + 1;
	6618	if (itvNum < coeff_intvCapacity_sz){
	6619	if (diff < 0) itvNum = -itvNum;
	6620	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	6621	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	6622	//ganrantee comporession error against the case of machine-epsilon
	6623	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	6624	coeff_type[e][coeff_index] = 0;
	6625	last_coeffcients[e] = cur_coeff;
	6626	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	6627	}
	6628	}
	6629	else{
	6630	coeff_type[e][coeff_index] = 0;
	6631	last_coeffcients[e] = cur_coeff;
	6632	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	6633	}
	6634	}
	6635	coeff_index ++;
	6636	}
	6637	float curData;
	6638	float pred;
	6639	double itvNum;
	6640	double diff;
	6641	size_t index = 0;
	6642	size_t block_unpredictable_count = 0;
	6643	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
	6644	for(size_t ii=0; ii<block_size; ii++){
	6645	for(size_t jj=0; jj<block_size; jj++){
	6646	for(size_t kk=0; kk<block_size; kk++){
	6647	curData = *cur_data_pos;
	6648	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	6649	diff = curData - pred;
	6650	itvNum = fabs(diff)/tmp_realPrecision + 1;
	6651	if (itvNum < intvCapacity){
	6652	if (diff < 0) itvNum = -itvNum;
	6653	type[index] = (int) (itvNum/2) + intvRadius;
	6654	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	6655	//ganrantee comporession error against the case of machine-epsilon
	6656	if(fabs(curData - pred)>tmp_realPrecision){
	6657	type[index] = 0;
	6658	pred = curData;
	6659	unpredictable_data[block_unpredictable_count ++] = curData;
	6660	}
	6661	}
	6662	else{
	6663	type[index] = 0;
	6664	pred = curData;
	6665	unpredictable_data[block_unpredictable_count ++] = curData;
	6666	}
	6667	index ++;
	6668	cur_data_pos ++;
	6669	}
	6670	cur_data_pos ++;
	6671	}
	6672	cur_data_pos += pred_buffer_block_size;
	6673	}
	6674	total_unpred += block_unpredictable_count;
	6675	unpredictable_data += block_unpredictable_count;
	6676	reg_count ++;
	6677	}
	6678	else{
	6679	// use SZ
	6680	// SZ predication
	6681	unpredictable_count = 0;
	6682	float * cur_data_pos = pred_buffer + pred_buffer_block_size*pred_buffer_block_size + pred_buffer_block_size + 1;
	6683	float curData;
	6684	float pred3D;
	6685	double itvNum, diff;
	6686	size_t index = 0;
	6687	for(size_t ii=0; ii<block_size; ii++){
	6688	for(size_t jj=0; jj<block_size; jj++){
	6689	for(size_t kk=0; kk<block_size; kk++){
	6690	curData = *cur_data_pos;
	6691	pred3D = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1]
	6692	- cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	6693	diff = curData - pred3D;
	6694	itvNum = fabs(diff)/realPrecision + 1;
	6695	if (itvNum < intvCapacity_sz){
	6696	if (diff < 0) itvNum = -itvNum;
	6697	type[index] = (int) (itvNum/2) + intvRadius;
	6698	cur_data_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	6699	//ganrantee comporession error against the case of machine-epsilon
	6700	if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
	6701	type[index] = 0;
	6702	*cur_data_pos = curData;
	6703	unpredictable_data[unpredictable_count ++] = curData;
	6704	}
	6705	}
	6706	else{
	6707	type[index] = 0;
	6708	*cur_data_pos = curData;
	6709	unpredictable_data[unpredictable_count ++] = curData;
	6710	}
	6711	index ++;
	6712	cur_data_pos ++;
	6713	}
	6714	cur_data_pos ++;
	6715	}
	6716	cur_data_pos += pred_buffer_block_size;
	6717	}
	6718	total_unpred += unpredictable_count;
	6719	unpredictable_data += unpredictable_count;
	6720	// change indicator
	6721	indicator_pos[k] = 1;
	6722	}// end SZ
	6723	reg_params_pos ++;
	6724	type += block_size * block_size * block_size;
	6725	}
	6726	indicator_pos += num_z;
	6727	}
	6728	}
	6729	}
	6730	free(pred_buffer);
	6731	int stateNum = 2*quantization_intervals;
	6732	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	6733
	6734	size_t nodeCount = 0;
	6735	init(huffmanTree, result_type, num_blocks*max_num_block_elements);
	6736	size_t i = 0;
	6737	for (i = 0; i < huffmanTree->stateNum; i++)
	6738	if (huffmanTree->code[i]) nodeCount++;
	6739	nodeCount = nodeCount*2-1;
	6740
	6741	unsigned char *treeBytes;
	6742	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	6743
	6744	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
	6745	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
	6746	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
	6747	unsigned char * result_pos = result;
	6748	initRandomAccessBytes(result_pos);
	6749
	6750	result_pos += meta_data_offset;
	6751
	6752	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
	6753	result_pos += exe_params->SZ_SIZE_TYPE;
	6754
	6755	intToBytes_bigEndian(result_pos, block_size);
	6756	result_pos += sizeof(int);
	6757	doubleToBytes(result_pos, realPrecision);
	6758	result_pos += sizeof(double);
	6759	intToBytes_bigEndian(result_pos, quantization_intervals);
	6760	result_pos += sizeof(int);
	6761	intToBytes_bigEndian(result_pos, treeByteSize);
	6762	result_pos += sizeof(int);
	6763	intToBytes_bigEndian(result_pos, nodeCount);
	6764	result_pos += sizeof(int);
	6765	memcpy(result_pos, treeBytes, treeByteSize);
	6766	result_pos += treeByteSize;
	6767	free(treeBytes);
	6768
	6769	memcpy(result_pos, &use_mean, sizeof(unsigned char));
	6770	result_pos += sizeof(unsigned char);
	6771	memcpy(result_pos, &mean, sizeof(float));
	6772	result_pos += sizeof(float);
	6773	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
	6774	result_pos += indicator_size;
	6775
	6776	//convert the lead/mid/resi to byte stream
	6777	if(reg_count > 0){
	6778	for(int e=0; e<4; e++){
	6779	int stateNum = 2*coeff_intvCapacity_sz;
	6780	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	6781	size_t nodeCount = 0;
	6782	init(huffmanTree, coeff_type[e], reg_count);
	6783	size_t i = 0;
	6784	for (i = 0; i < huffmanTree->stateNum; i++)
	6785	if (huffmanTree->code[i]) nodeCount++;
	6786	nodeCount = nodeCount*2-1;
	6787	unsigned char *treeBytes;
	6788	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	6789	doubleToBytes(result_pos, precision[e]);
	6790	result_pos += sizeof(double);
	6791	intToBytes_bigEndian(result_pos, coeff_intvRadius);
	6792	result_pos += sizeof(int);
	6793	intToBytes_bigEndian(result_pos, treeByteSize);
	6794	result_pos += sizeof(int);
	6795	intToBytes_bigEndian(result_pos, nodeCount);
	6796	result_pos += sizeof(int);
	6797	memcpy(result_pos, treeBytes, treeByteSize);
	6798	result_pos += treeByteSize;
	6799	free(treeBytes);
	6800	size_t typeArray_size = 0;
	6801	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
	6802	sizeToBytes(result_pos, typeArray_size);
	6803	result_pos += sizeof(size_t) + typeArray_size;
	6804	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
	6805	result_pos += sizeof(int);
	6806	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(float));
	6807	result_pos += coeff_unpredictable_count[e]*sizeof(float);
	6808	SZ_ReleaseHuffman(huffmanTree);
	6809	}
	6810	}
	6811	free(coeff_result_type);
	6812	free(coeff_unpredictable_data);
	6813
	6814	//record the number of unpredictable data and also store them
	6815	memcpy(result_pos, &total_unpred, sizeof(size_t));
	6816	result_pos += sizeof(size_t);
	6817	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(float));
	6818	result_pos += total_unpred * sizeof(float);
	6819	size_t typeArray_size = 0;
	6820	encode(huffmanTree, result_type, num_blocks*max_num_block_elements, result_pos, &typeArray_size);
	6821	result_pos += typeArray_size;
	6822	size_t totalEncodeSize = result_pos - result;
	6823	free(indicator);
	6824	free(result_unpredictable_data);
	6825	free(result_type);
	6826	free(reg_params);
	6827
	6828
	6829	SZ_ReleaseHuffman(huffmanTree);
	6830	*comp_size = totalEncodeSize;
	6831	return result;
	6832	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: