Context Navigation

source: thirdparty/SZ/sz/src/sz_double.c @ e6aa0eb

Revision e6aa0eb, 186.1 KB checked in by Hal Finkel <hfinkel@…>, 6 years ago (diff)
add stddef.h for ptrdiff_t
Property mode set to `100644`

Rev	Line
[2c47b73]	1	/**
	2	* @file sz_double.c
	3	* @author Sheng Di and Dingwen Tao
	4	* @date Aug, 2016
	5	* @brief SZ_Init, Compression and Decompression functions
	6	* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
	7	* See COPYRIGHT in top-level directory.
	8	*/
	9
	10
	11	#include <stdio.h>
	12	#include <stdlib.h>
[e6aa0eb]	13	#include <stddef.h>
[2c47b73]	14	#include <string.h>
	15	#include <unistd.h>
	16	#include <math.h>
	17	#include "sz.h"
	18	#include "CompressElement.h"
	19	#include "DynamicByteArray.h"
	20	#include "DynamicIntArray.h"
	21	#include "TightDataPointStorageD.h"
	22	#include "sz_double.h"
	23	#include "sz_double_pwr.h"
	24	#include "szd_double.h"
	25	#include "szd_double_pwr.h"
	26	#include "zlib.h"
	27	#include "rw.h"
	28	#include "sz_double_ts.h"
[9ee2ce3]	29	#include "utility.h"
[2c47b73]	30
	31	unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize)
	32	{
	33	outSize = dataLengthsizeof(double);
	34	unsigned char* out = (unsigned char)malloc(dataLengthsizeof(double));
	35	memcpy(out, data, dataLength*sizeof(double));
	36	return out;
	37	}
	38
	39	void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue)
	40	{
	41	short reqExpo = getPrecisionReqLength_double(realPrecision);
	42	*reqLength = 12+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength
	43	if(*reqLength<12)
	44	*reqLength = 12;
	45	if(*reqLength>64)
	46	{
	47	*reqLength = 64;
	48	*medianValue = 0;
	49	}
	50	}
	51
	52	unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision)
	53	{
	54	size_t i = 0, radiusIndex;
	55	double pred_value = 0, pred_err;
	56	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	57	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	58	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
	59	for(i=2;i<dataLength;i++)
	60	{
	61	if(i%confparams_cpr->sampleDistance==0)
	62	{
	63	//pred_value = 2*oriData[i-1] - oriData[i-2];
	64	pred_value = oriData[i-1];
	65	pred_err = fabs(pred_value - oriData[i]);
	66	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	67	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	68	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	69	intervals[radiusIndex]++;
	70	}
	71	}
	72	//compute the appropriate number
	73	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	74	size_t sum = 0;
	75	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	76	{
	77	sum += intervals[i];
	78	if(sum>targetCount)
	79	break;
	80	}
	81
	82	if(i>=confparams_cpr->maxRangeRadius)
	83	i = confparams_cpr->maxRangeRadius-1;
	84	unsigned int accIntervals = 2*(i+1);
	85	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	86
	87	if(powerOf2<32)
	88	powerOf2 = 32;
	89
	90	free(intervals);
	91	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
	92	return powerOf2;
	93	}
	94
	95	unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision)
	96	{
	97	size_t i,j, index;
	98	size_t radiusIndex;
	99	double pred_value = 0, pred_err;
	100	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	101	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	102	size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance;
	103	for(i=1;i<r1;i++)
	104	{
	105	for(j=1;j<r2;j++)
	106	{
	107	if((i+j)%confparams_cpr->sampleDistance==0)
	108	{
	109	index = i*r2+j;
	110	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
	111	pred_err = fabs(pred_value - oriData[index]);
	112	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	113	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	114	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	115	intervals[radiusIndex]++;
	116	}
	117	}
	118	}
	119	//compute the appropriate number
	120	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	121	size_t sum = 0;
	122	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	123	{
	124	sum += intervals[i];
	125	if(sum>targetCount)
	126	break;
	127	}
	128	if(i>=confparams_cpr->maxRangeRadius)
	129	i = confparams_cpr->maxRangeRadius-1;
	130	unsigned int accIntervals = 2*(i+1);
	131	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	132	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
	133
	134	if(powerOf2<32)
	135	powerOf2 = 32;
	136
	137	free(intervals);
	138	return powerOf2;
	139	}
	140
	141	unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
	142	{
	143	size_t i,j,k, index;
	144	size_t radiusIndex;
	145	size_t r23=r2*r3;
	146	double pred_value = 0, pred_err;
	147	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	148	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	149	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)/confparams_cpr->sampleDistance;
	150	for(i=1;i<r1;i++)
	151	{
	152	for(j=1;j<r2;j++)
	153	{
	154	for(k=1;k<r3;k++)
	155	{
	156	if((i+j+k)%confparams_cpr->sampleDistance==0)
	157	{
	158	index = ir23+jr3+k;
	159	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
	160	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
	161	pred_err = fabs(pred_value - oriData[index]);
	162	radiusIndex = (pred_err/realPrecision+1)/2;
	163	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	164	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	165	intervals[radiusIndex]++;
	166	}
	167	}
	168
	169	}
	170	}
	171	//compute the appropriate number
	172	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	173	size_t sum = 0;
	174	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	175	{
	176	sum += intervals[i];
	177	if(sum>targetCount)
	178	break;
	179	}
	180	if(i>=confparams_cpr->maxRangeRadius)
	181	i = confparams_cpr->maxRangeRadius-1;
	182
	183	unsigned int accIntervals = 2*(i+1);
	184	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	185
	186	if(powerOf2<32)
	187	powerOf2 = 32;
	188
	189	free(intervals);
	190	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
	191	return powerOf2;
	192	}
	193
	194	unsigned int optimize_intervals_double_4D(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision)
	195	{
	196	size_t i,j,k,l, index;
	197	size_t radiusIndex;
	198	size_t r234=r2r3r4;
	199	size_t r34=r3*r4;
	200	double pred_value = 0, pred_err;
	201	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	202	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	203	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)*(r4-1)/confparams_cpr->sampleDistance;
	204	for(i=1;i<r1;i++)
	205	{
	206	for(j=1;j<r2;j++)
	207	{
	208	for(k=1;k<r3;k++)
	209	{
	210	for (l=1;l<r4;l++)
	211	{
	212	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
	213	{
	214	index = ir234+jr34+k*r4+l;
	215	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34]
	216	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
	217	pred_err = fabs(pred_value - oriData[index]);
	218	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	219	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	220	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	221	intervals[radiusIndex]++;
	222	}
	223	}
	224	}
	225	}
	226	}
	227	//compute the appropriate number
	228	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	229	size_t sum = 0;
	230	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	231	{
	232	sum += intervals[i];
	233	if(sum>targetCount)
	234	break;
	235	}
	236	if(i>=confparams_cpr->maxRangeRadius)
	237	i = confparams_cpr->maxRangeRadius-1;
	238
	239	unsigned int accIntervals = 2*(i+1);
	240	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	241
	242	if(powerOf2<32)
	243	powerOf2 = 32;
	244
	245	free(intervals);
	246	return powerOf2;
	247	}
	248
	249	TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData,
	250	size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d)
	251	{
	252	#ifdef HAVE_TIMECMPR
	253	double* decData = NULL;
	254	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	255	decData = (double*)(multisteps->hist_data);
	256	#endif
	257
	258	unsigned int quantization_intervals;
	259	if(exe_params->optQuantMode==1)
	260	quantization_intervals = optimize_intervals_double_1D_opt(oriData, dataLength, realPrecision);
	261	else
	262	quantization_intervals = exe_params->intvCapacity;
	263	updateQuantizationInfo(quantization_intervals);
	264
	265	size_t i;
	266	int reqLength;
	267	double medianValue = medianValue_d;
	268	short radExpo = getExponent_double(valueRangeSize/2);
	269
	270	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	271
	272	int* type = (int) malloc(dataLengthsizeof(int));
	273
	274	double* spaceFillingValue = oriData; //
	275
	276	DynamicIntArray *exactLeadNumArray;
	277	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	278
	279	DynamicByteArray *exactMidByteArray;
	280	new_DBA(&exactMidByteArray, DynArrayInitLen);
	281
	282	DynamicIntArray *resiBitArray;
	283	new_DIA(&resiBitArray, DynArrayInitLen);
	284
	285	unsigned char preDataBytes[8];
	286	longToBytes_bigEndian(preDataBytes, 0);
	287
	288	int reqBytesLength = reqLength/8;
	289	int resiBitsLength = reqLength%8;
	290	double last3CmprsData[3] = {0};
	291
	292	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	293	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	294
	295	//add the first data
	296	type[0] = 0;
	297	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	298	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	299	memcpy(preDataBytes,vce->curBytes,8);
	300	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	301	listAdd_double(last3CmprsData, vce->data);
	302	#ifdef HAVE_TIMECMPR
	303	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	304	decData[0] = vce->data;
	305	#endif
	306
	307	//add the second data
	308	type[1] = 0;
	309	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	310	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	311	memcpy(preDataBytes,vce->curBytes,8);
	312	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	313	listAdd_double(last3CmprsData, vce->data);
	314	#ifdef HAVE_TIMECMPR
	315	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	316	decData[1] = vce->data;
	317	#endif
	318	int state;
	319	double checkRadius;
	320	double curData;
	321	double pred;
	322	double predAbsErr;
	323	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
	324	double interval = 2*realPrecision;
	325
	326	for(i=2;i<dataLength;i++)
	327	{
	328	//printf("%.30G\n",last3CmprsData[0]);
	329	curData = spaceFillingValue[i];
	330	//pred = 2*last3CmprsData[0] - last3CmprsData[1];
	331	pred = last3CmprsData[0];
	332	predAbsErr = fabs(curData - pred);
[9ee2ce3]	333	if(predAbsErr<checkRadius)
[2c47b73]	334	{
	335	state = (predAbsErr/realPrecision+1)/2;
	336	if(curData>=pred)
	337	{
	338	type[i] = exe_params->intvRadius+state;
	339	pred = pred + state*interval;
	340	}
	341	else //curData<pred
	342	{
	343	type[i] = exe_params->intvRadius-state;
	344	pred = pred - state*interval;
	345	}
	346	listAdd_double(last3CmprsData, pred);
	347	#ifdef HAVE_TIMECMPR
	348	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	349	decData[i] = pred;
	350	#endif
	351	continue;
	352	}
	353
	354	//unpredictable data processing
	355	type[i] = 0;
	356	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	357	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	358	memcpy(preDataBytes,vce->curBytes,8);
	359	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	360
	361	listAdd_double(last3CmprsData, vce->data);
	362	#ifdef HAVE_TIMECMPR
	363	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	364	decData[i] = vce->data;
	365	#endif
	366
	367	}//end of for
	368
	369	int exactDataNum = exactLeadNumArray->size;
	370
	371	TightDataPointStorageD* tdps;
	372
	373	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	374	type, exactMidByteArray->array, exactMidByteArray->size,
	375	exactLeadNumArray->array,
	376	resiBitArray->array, resiBitArray->size,
	377	resiBitsLength,
	378	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	379
	380	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	381	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	382
	383	//free memory
	384	free_DIA(exactLeadNumArray);
	385	free_DIA(resiBitArray);
	386	free(type);
	387	free(vce);
	388	free(lce);
	389	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	390
	391	return tdps;
	392	}
	393
	394	void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, TightDataPointStorageD* tdps,
	395	unsigned char** newByteData, size_t *outSize)
	396	{
	397	int doubleSize = sizeof(double);
	398	size_t k = 0, i;
	399	tdps->isLossless = 1;
	400	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength;
	401	newByteData = (unsigned char)malloc(totalByteLength);
	402
	403	unsigned char dsLengthBytes[8];
	404	for (i = 0; i < 3; i++)//3
	405	(*newByteData)[k++] = versionNumber[i];
	406
	407	if(exe_params->SZ_SIZE_TYPE==4)//1
	408	(*newByteData)[k++] = 16; //00010000
	409	else
	410	(*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8
	411
	412	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
	413	k = k + MetaDataByteLength;
	414
	415	sizeToBytes(dsLengthBytes,dataLength);
	416	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8
	417	(*newByteData)[k++] = dsLengthBytes[i];
	418
	419	if(sysEndianType==BIG_ENDIAN_SYSTEM)
	420	memcpy((newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLengthdoubleSize);
	421	else
	422	{
	423	unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
	424	for(i=0;i<dataLength;i++,p+=doubleSize)
	425	doubleToBytes(p, oriData[i]);
	426	}
	427	*outSize = totalByteLength;
	428	}
	429
	430
	431	char SZ_compress_args_double_NoCkRngeNoGzip_1D(unsigned char** newByteData, double *oriData,
	432	size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d)
	433	{
	434	char compressionType = 0;
	435	TightDataPointStorageD* tdps = NULL;
	436	#ifdef HAVE_TIMECMPR
	437	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	438	{
	439	int timestep = sz_tsc->currentStep;
	440	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	441	{
	442	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
	443	compressionType = 1; //time-series based compression
	444	}
	445	else
	446	{
	447	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
	448	compressionType = 0; //snapshot-based compression
	449	multisteps->lastSnapshotStep = timestep;
	450	}
	451	}
	452	else
	453	#endif
	454	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
	455
	456	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	457
	458	if(outSize>dataLengthsizeof(double))
	459	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	460
	461	free_TightDataPointStorageD(tdps);
	462	return compressionType;
	463	}
	464
	465	TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_d)
	466	{
	467	#ifdef HAVE_TIMECMPR
	468	double* decData = NULL;
	469	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	470	decData = (double*)(multisteps->hist_data);
	471	#endif
	472
	473	unsigned int quantization_intervals;
	474	if(exe_params->optQuantMode==1)
	475	{
	476	quantization_intervals = optimize_intervals_double_2D_opt(oriData, r1, r2, realPrecision);
	477	updateQuantizationInfo(quantization_intervals);
	478	}
	479	else
	480	quantization_intervals = exe_params->intvCapacity;
	481	size_t i,j;
	482	int reqLength;
	483	double pred1D, pred2D;
	484	double diff = 0.0;
	485	double itvNum = 0;
	486	double P0, P1;
	487
	488	size_t dataLength = r1*r2;
	489
	490	P0 = (double)malloc(r2sizeof(double));
	491	memset(P0, 0, r2*sizeof(double));
	492	P1 = (double)malloc(r2sizeof(double));
	493	memset(P1, 0, r2*sizeof(double));
	494
	495	double medianValue = medianValue_d;
	496	short radExpo = getExponent_double(valueRangeSize/2);
	497	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	498
	499	int* type = (int) malloc(dataLengthsizeof(int));
	500	//type[dataLength]=0;
	501
	502	double* spaceFillingValue = oriData; //
	503
	504	DynamicIntArray *exactLeadNumArray;
	505	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	506
	507	DynamicByteArray *exactMidByteArray;
	508	new_DBA(&exactMidByteArray, DynArrayInitLen);
	509
	510	DynamicIntArray *resiBitArray;
	511	new_DIA(&resiBitArray, DynArrayInitLen);
	512
	513	type[0] = 0;
	514
	515	unsigned char preDataBytes[8];
	516	longToBytes_bigEndian(preDataBytes, 0);
	517
	518	int reqBytesLength = reqLength/8;
	519	int resiBitsLength = reqLength%8;
	520
	521	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	522	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	523
	524	/* Process Row-0 data 0*/
	525	type[0] = 0;
	526	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	527	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	528	memcpy(preDataBytes,vce->curBytes,8);
	529	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	530	P1[0] = vce->data;
	531	#ifdef HAVE_TIMECMPR
	532	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	533	decData[0] = vce->data;
	534	#endif
	535
	536	/* Process Row-0 data 1*/
	537	pred1D = P1[0];
	538	diff = spaceFillingValue[1] - pred1D;
	539
	540	itvNum = fabs(diff)/realPrecision + 1;
	541
	542	if (itvNum < exe_params->intvCapacity)
	543	{
	544	if (diff < 0) itvNum = -itvNum;
	545	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	546	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	547	}
	548	else
	549	{
	550	type[1] = 0;
	551	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	552	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	553	memcpy(preDataBytes,vce->curBytes,8);
	554	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	555	P1[1] = vce->data;
	556	}
	557	#ifdef HAVE_TIMECMPR
	558	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	559	decData[1] = P1[1];
	560	#endif
	561
	562	/* Process Row-0 data 2 --> data r2-1 */
	563	for (j = 2; j < r2; j++)
	564	{
	565	pred1D = 2*P1[j-1] - P1[j-2];
	566	diff = spaceFillingValue[j] - pred1D;
	567
	568	itvNum = fabs(diff)/realPrecision + 1;
	569
	570	if (itvNum < exe_params->intvCapacity)
	571	{
	572	if (diff < 0) itvNum = -itvNum;
	573	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	574	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	575	}
	576	else
	577	{
	578	type[j] = 0;
	579	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	580	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	581	memcpy(preDataBytes,vce->curBytes,8);
	582	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	583	P1[j] = vce->data;
	584	}
	585	#ifdef HAVE_TIMECMPR
	586	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	587	decData[j] = P1[j];
	588	#endif
	589	}
	590
	591	/* Process Row-1 --> Row-r1-1 */
	592	size_t index;
	593	for (i = 1; i < r1; i++)
	594	{
	595	/* Process row-i data 0 */
	596	index = i*r2;
	597	pred1D = P1[0];
	598	diff = spaceFillingValue[index] - pred1D;
	599
	600	itvNum = fabs(diff)/realPrecision + 1;
	601
	602	if (itvNum < exe_params->intvCapacity)
	603	{
	604	if (diff < 0) itvNum = -itvNum;
	605	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	606	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	607	}
	608	else
	609	{
	610	type[index] = 0;
	611	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	612	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	613	memcpy(preDataBytes,vce->curBytes,8);
	614	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	615	P0[0] = vce->data;
	616	}
	617	#ifdef HAVE_TIMECMPR
	618	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	619	decData[index] = P0[0];
	620	#endif
	621
	622	/* Process row-i data 1 --> r2-1*/
	623	for (j = 1; j < r2; j++)
	624	{
	625	index = i*r2+j;
	626	pred2D = P0[j-1] + P1[j] - P1[j-1];
	627
	628	diff = spaceFillingValue[index] - pred2D;
	629
	630	itvNum = fabs(diff)/realPrecision + 1;
	631
	632	if (itvNum < exe_params->intvCapacity)
	633	{
	634	if (diff < 0) itvNum = -itvNum;
	635	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	636	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	637	}
	638	else
	639	{
	640	type[index] = 0;
	641	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	642	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	643	memcpy(preDataBytes,vce->curBytes,8);
	644	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	645	P0[j] = vce->data;
	646	}
	647	#ifdef HAVE_TIMECMPR
	648	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	649	decData[index] = P0[j];
	650	#endif
	651	}
	652
	653	double *Pt;
	654	Pt = P1;
	655	P1 = P0;
	656	P0 = Pt;
	657	}
	658
	659	if(r2!=1)
	660	free(P0);
	661	free(P1);
	662	size_t exactDataNum = exactLeadNumArray->size;
	663
	664	TightDataPointStorageD* tdps;
	665
	666	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	667	type, exactMidByteArray->array, exactMidByteArray->size,
	668	exactLeadNumArray->array,
	669	resiBitArray->array, resiBitArray->size,
	670	resiBitsLength,
	671	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	672
	673	/* int sum =0;
	674	for(i=0;i<dataLength;i++)
	675	if(type[i]==0) sum++;
	676	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);
	677
	678	for(i=0;i<dataLength;i++)
	679	printf("%d ", type[i]);
	680	printf("\n");*/
	681
	682	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	683	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	684
	685	// for(i = 3800;i<3844;i++)
	686	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
	687
	688	//free memory
	689	free_DIA(exactLeadNumArray);
	690	free_DIA(resiBitArray);
	691	free(type);
	692	free(vce);
	693	free(lce);
	694	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	695
	696	return tdps;
	697	}
	698
	699	/**
	700	*
	701	* Note: @r1 is high dimension
	702	* @r2 is low dimension
	703	* */
	704	char SZ_compress_args_double_NoCkRngeNoGzip_2D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
	705	{
	706	size_t dataLength = r1*r2;
	707	char compressionType = 0;
	708	TightDataPointStorageD* tdps = NULL;
	709	#ifdef HAVE_TIMECMPR
	710	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	711	{
	712	int timestep = sz_tsc->currentStep;
	713	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	714	{
	715	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
	716	compressionType = 1; //time-series based compression
	717	}
	718	else
	719	{
	720	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
	721	compressionType = 0; //snapshot-based compression
	722	multisteps->lastSnapshotStep = timestep;
	723	}
	724	}
	725	else
	726	#endif
	727	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
	728
	729	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	730
	731	if(outSize>dataLengthsizeof(double))
	732	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	733
	734	free_TightDataPointStorageD(tdps);
	735	return compressionType;
	736	}
	737
	738	TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_d)
	739	{
	740	#ifdef HAVE_TIMECMPR
	741	double* decData = NULL;
	742	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	743	decData = (double*)(multisteps->hist_data);
	744	#endif
	745
	746	unsigned int quantization_intervals;
	747	if(exe_params->optQuantMode==1)
	748	{
	749	quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision);
	750	updateQuantizationInfo(quantization_intervals);
	751	}
	752	else
	753	quantization_intervals = exe_params->intvCapacity;
	754	size_t i,j,k;
	755	int reqLength;
	756	double pred1D, pred2D, pred3D;
	757	double diff = 0.0;
	758	double itvNum = 0;
	759	double P0, P1;
	760
	761	size_t dataLength = r1r2r3;
	762
	763	size_t r23 = r2*r3;
	764
	765	P0 = (double)malloc(r23sizeof(double));
	766	P1 = (double)malloc(r23sizeof(double));
	767
	768	double medianValue = medianValue_d;
	769	short radExpo = getExponent_double(valueRangeSize/2);
	770	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	771
	772	int* type = (int) malloc(dataLengthsizeof(int));
	773	//type[dataLength]=0;
	774
	775	double* spaceFillingValue = oriData; //
	776
	777	DynamicIntArray *exactLeadNumArray;
	778	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	779
	780	DynamicByteArray *exactMidByteArray;
	781	new_DBA(&exactMidByteArray, DynArrayInitLen);
	782
	783	DynamicIntArray *resiBitArray;
	784	new_DIA(&resiBitArray, DynArrayInitLen);
	785
	786	type[0] = 0;
	787
	788	unsigned char preDataBytes[8];
	789	longToBytes_bigEndian(preDataBytes, 0);
	790
	791	int reqBytesLength = reqLength/8;
	792	int resiBitsLength = reqLength%8;
	793
	794	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	795	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	796
	797
	798	/////////////////////////// Process layer-0 ///////////////////////////
	799	/* Process Row-0 data 0*/
	800	type[0] = 0;
	801	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	802	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	803	memcpy(preDataBytes,vce->curBytes,8);
	804	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	805	P1[0] = vce->data;
	806	#ifdef HAVE_TIMECMPR
	807	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	808	decData[0] = P1[0];
	809	#endif
	810
	811	/* Process Row-0 data 1*/
	812	pred1D = P1[0];
	813	diff = spaceFillingValue[1] - pred1D;
	814
	815	itvNum = fabs(diff)/realPrecision + 1;
	816
	817	if (itvNum < exe_params->intvCapacity)
	818	{
	819	if (diff < 0) itvNum = -itvNum;
	820	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	821	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	822	}
	823	else
	824	{
	825	type[1] = 0;
	826	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	827	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	828	memcpy(preDataBytes,vce->curBytes,8);
	829	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	830	P1[1] = vce->data;
	831	}
	832	#ifdef HAVE_TIMECMPR
	833	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	834	decData[1] = P1[1];
	835	#endif
	836
	837	/* Process Row-0 data 2 --> data r3-1 */
	838	for (j = 2; j < r3; j++)
	839	{
	840	pred1D = 2*P1[j-1] - P1[j-2];
	841	diff = spaceFillingValue[j] - pred1D;
	842
	843	itvNum = fabs(diff)/realPrecision + 1;
	844
	845	if (itvNum < exe_params->intvCapacity)
	846	{
	847	if (diff < 0) itvNum = -itvNum;
	848	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	849	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	850	}
	851	else
	852	{
	853	type[j] = 0;
	854	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	855	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	856	memcpy(preDataBytes,vce->curBytes,8);
	857	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	858	P1[j] = vce->data;
	859	}
	860	#ifdef HAVE_TIMECMPR
	861	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	862	decData[j] = P1[j];
	863	#endif
	864	}
	865
	866	/* Process Row-1 --> Row-r2-1 */
	867	size_t index;
	868	for (i = 1; i < r2; i++)
	869	{
	870	/* Process row-i data 0 */
	871	index = i*r3;
	872	pred1D = P1[index-r3];
	873	diff = spaceFillingValue[index] - pred1D;
	874
	875	itvNum = fabs(diff)/realPrecision + 1;
	876
	877	if (itvNum < exe_params->intvCapacity)
	878	{
	879	if (diff < 0) itvNum = -itvNum;
	880	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	881	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	882	}
	883	else
	884	{
	885	type[index] = 0;
	886	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	887	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	888	memcpy(preDataBytes,vce->curBytes,8);
	889	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	890	P1[index] = vce->data;
	891	}
	892	#ifdef HAVE_TIMECMPR
	893	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	894	decData[index] = P1[index];
	895	#endif
	896
	897	/* Process row-i data 1 --> data r3-1*/
	898	for (j = 1; j < r3; j++)
	899	{
	900	index = i*r3+j;
	901	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
	902
	903	diff = spaceFillingValue[index] - pred2D;
	904
	905	itvNum = fabs(diff)/realPrecision + 1;
	906
	907	if (itvNum < exe_params->intvCapacity)
	908	{
	909	if (diff < 0) itvNum = -itvNum;
	910	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	911	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	912	}
	913	else
	914	{
	915	type[index] = 0;
	916	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	917	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	918	memcpy(preDataBytes,vce->curBytes,8);
	919	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	920	P1[index] = vce->data;
	921	}
	922	#ifdef HAVE_TIMECMPR
	923	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	924	decData[index] = P1[index];
	925	#endif
	926	}
	927	}
	928
	929
	930	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
	931
	932	for (k = 1; k < r1; k++)
	933	{
	934	/* Process Row-0 data 0*/
	935	index = k*r23;
	936	pred1D = P1[0];
	937	diff = spaceFillingValue[index] - pred1D;
	938
	939	itvNum = fabs(diff)/realPrecision + 1;
	940
	941	if (itvNum < exe_params->intvCapacity)
	942	{
	943	if (diff < 0) itvNum = -itvNum;
	944	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	945	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	946	}
	947	else
	948	{
	949	type[index] = 0;
	950	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	951	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	952	memcpy(preDataBytes,vce->curBytes,8);
	953	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	954	P0[0] = vce->data;
	955	}
	956	#ifdef HAVE_TIMECMPR
	957	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	958	decData[index] = P0[0];
	959	#endif
	960
	961	/* Process Row-0 data 1 --> data r3-1 */
	962	for (j = 1; j < r3; j++)
	963	{
	964	//index = kr2r3+j;
	965	index ++;
	966	pred2D = P0[j-1] + P1[j] - P1[j-1];
	967	diff = spaceFillingValue[index] - pred2D;
	968
	969	itvNum = fabs(diff)/realPrecision + 1;
	970
	971	if (itvNum < exe_params->intvCapacity)
	972	{
	973	if (diff < 0) itvNum = -itvNum;
	974	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	975	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	976	}
	977	else
	978	{
	979	type[index] = 0;
	980	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	981	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	982	memcpy(preDataBytes,vce->curBytes,8);
	983	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	984	P0[j] = vce->data;
	985	}
	986	#ifdef HAVE_TIMECMPR
	987	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	988	decData[index] = P0[j];
	989	#endif
	990	}
	991
	992	/* Process Row-1 --> Row-r2-1 */
	993	size_t index2D;
	994	for (i = 1; i < r2; i++)
	995	{
	996	/* Process Row-i data 0 */
	997	index = kr23 + ir3;
	998	index2D = i*r3;
	999	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
	1000	diff = spaceFillingValue[index] - pred2D;
	1001
	1002	itvNum = fabs(diff)/realPrecision + 1;
	1003
	1004	if (itvNum < exe_params->intvCapacity)
	1005	{
	1006	if (diff < 0) itvNum = -itvNum;
	1007	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1008	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1009	}
	1010	else
	1011	{
	1012	type[index] = 0;
	1013	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1014	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1015	memcpy(preDataBytes,vce->curBytes,8);
	1016	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1017	P0[index2D] = vce->data;
	1018	}
	1019	#ifdef HAVE_TIMECMPR
	1020	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1021	decData[index] = P0[index2D];
	1022	#endif
	1023
	1024	/* Process Row-i data 1 --> data r3-1 */
	1025	for (j = 1; j < r3; j++)
	1026	{
	1027	//index = kr2r3 + i*r3 + j;
	1028	index ++;
	1029	index2D = i*r3 + j;
	1030	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
	1031	diff = spaceFillingValue[index] - pred3D;
	1032
	1033	itvNum = fabs(diff)/realPrecision + 1;
	1034
	1035	if (itvNum < exe_params->intvCapacity)
	1036	{
	1037	if (diff < 0) itvNum = -itvNum;
	1038	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1039	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1040	}
	1041	else
	1042	{
	1043	type[index] = 0;
	1044	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1045	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1046	memcpy(preDataBytes,vce->curBytes,8);
	1047	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1048	P0[index2D] = vce->data;
	1049	}
	1050	#ifdef HAVE_TIMECMPR
	1051	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1052	decData[index] = P0[index2D];
	1053	#endif
	1054	}
	1055	}
	1056
	1057	double *Pt;
	1058	Pt = P1;
	1059	P1 = P0;
	1060	P0 = Pt;
	1061	}
	1062	if(r23!=1)
	1063	free(P0);
	1064	free(P1);
	1065	size_t exactDataNum = exactLeadNumArray->size;
	1066
	1067	TightDataPointStorageD* tdps;
	1068
	1069	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	1070	type, exactMidByteArray->array, exactMidByteArray->size,
	1071	exactLeadNumArray->array,
	1072	resiBitArray->array, resiBitArray->size,
	1073	resiBitsLength,
	1074	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	1075
	1076	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	1077	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	1078
	1079	// for(i = 3800;i<3844;i++)
	1080	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
	1081
	1082	//free memory
	1083	free_DIA(exactLeadNumArray);
	1084	free_DIA(resiBitArray);
	1085	free(type);
	1086	free(vce);
	1087	free(lce);
	1088	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	1089
	1090	return tdps;
	1091	}
	1092
	1093
	1094	char SZ_compress_args_double_NoCkRngeNoGzip_3D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
	1095	{
	1096	size_t dataLength = r1r2r3;
	1097	char compressionType = 0;
	1098	TightDataPointStorageD* tdps = NULL;
	1099	#ifdef HAVE_TIMECMPR
	1100	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1101	{
	1102	int timestep = sz_tsc->currentStep;
	1103	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	1104	{
	1105	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
	1106	compressionType = 1; //time-series based compression
	1107	}
	1108	else
	1109	{
	1110	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
	1111	compressionType = 0; //snapshot-based compression
	1112	multisteps->lastSnapshotStep = timestep;
	1113	}
	1114	}
	1115	else
	1116	#endif
	1117	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
	1118
	1119	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	1120
	1121	if(outSize>dataLengthsizeof(double))
	1122	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1123
	1124	free_TightDataPointStorageD(tdps);
	1125	return compressionType;
	1126	}
	1127
	1128	TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, double valueRangeSize, double medianValue_d)
	1129	{
	1130	unsigned int quantization_intervals;
	1131	if(exe_params->optQuantMode==1)
	1132	{
	1133	quantization_intervals = optimize_intervals_double_4D(oriData, r1, r2, r3, r4, realPrecision);
	1134	updateQuantizationInfo(quantization_intervals);
	1135	}
	1136	else
	1137	quantization_intervals = exe_params->intvCapacity;
	1138
	1139	size_t i,j,k;
	1140	int reqLength;
	1141	double pred1D, pred2D, pred3D;
	1142	double diff = 0.0;
	1143	double itvNum = 0;
	1144	double P0, P1;
	1145
	1146	size_t dataLength = r1r2r3*r4;
	1147
	1148	size_t r234 = r2r3r4;
	1149	size_t r34 = r3*r4;
	1150
	1151	P0 = (double)malloc(r34sizeof(double));
	1152	P1 = (double)malloc(r34sizeof(double));
	1153
	1154	double medianValue = medianValue_d;
	1155	short radExpo = getExponent_double(valueRangeSize/2);
	1156	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	1157
	1158	int* type = (int) malloc(dataLengthsizeof(int));
	1159
	1160	double* spaceFillingValue = oriData; //
	1161
	1162	DynamicIntArray *exactLeadNumArray;
	1163	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	1164
	1165	DynamicByteArray *exactMidByteArray;
	1166	new_DBA(&exactMidByteArray, DynArrayInitLen);
	1167
	1168	DynamicIntArray *resiBitArray;
	1169	new_DIA(&resiBitArray, DynArrayInitLen);
	1170
	1171	unsigned char preDataBytes[8];
	1172	longToBytes_bigEndian(preDataBytes, 0);
	1173
	1174	int reqBytesLength = reqLength/8;
	1175	int resiBitsLength = reqLength%8;
	1176
	1177	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	1178	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	1179
	1180
	1181	size_t l;
	1182	for (l = 0; l < r1; l++)
	1183	{
	1184
	1185	/////////////////////////// Process layer-0 ///////////////////////////
	1186	/* Process Row-0 data 0*/
	1187	size_t index = l*r234;
	1188	size_t index2D = 0;
	1189
	1190	type[index] = 0;
	1191	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1192	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1193	memcpy(preDataBytes,vce->curBytes,8);
	1194	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1195	P1[index2D] = vce->data;
	1196
	1197	/* Process Row-0 data 1*/
	1198	index = l*r234+1;
	1199	index2D = 1;
	1200
	1201	pred1D = P1[index2D-1];
	1202	diff = spaceFillingValue[index] - pred1D;
	1203
	1204	itvNum = fabs(diff)/realPrecision + 1;
	1205
	1206	if (itvNum < exe_params->intvCapacity)
	1207	{
	1208	if (diff < 0) itvNum = -itvNum;
	1209	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1210	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1211	}
	1212	else
	1213	{
	1214	type[index] = 0;
	1215	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1216	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1217	memcpy(preDataBytes,vce->curBytes,8);
	1218	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1219	P1[index2D] = vce->data;
	1220	}
	1221
	1222	/* Process Row-0 data 2 --> data r4-1 */
	1223	for (j = 2; j < r4; j++)
	1224	{
	1225	index = l*r234+j;
	1226	index2D = j;
	1227
	1228	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	1229	diff = spaceFillingValue[index] - pred1D;
	1230
	1231	itvNum = fabs(diff)/realPrecision + 1;
	1232
	1233	if (itvNum < exe_params->intvCapacity)
	1234	{
	1235	if (diff < 0) itvNum = -itvNum;
	1236	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1237	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1238	}
	1239	else
	1240	{
	1241	type[index] = 0;
	1242	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1243	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1244	memcpy(preDataBytes,vce->curBytes,8);
	1245	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1246	P1[index2D] = vce->data;
	1247	}
	1248	}
	1249
	1250	/* Process Row-1 --> Row-r3-1 */
	1251	for (i = 1; i < r3; i++)
	1252	{
	1253	/* Process row-i data 0 */
	1254	index = lr234+ir4;
	1255	index2D = i*r4;
	1256
	1257	pred1D = P1[index2D-r4];
	1258	diff = spaceFillingValue[index] - pred1D;
	1259
	1260	itvNum = fabs(diff)/realPrecision + 1;
	1261
	1262	if (itvNum < exe_params->intvCapacity)
	1263	{
	1264	if (diff < 0) itvNum = -itvNum;
	1265	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1266	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1267	}
	1268	else
	1269	{
	1270	type[index] = 0;
	1271	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1272	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1273	memcpy(preDataBytes,vce->curBytes,8);
	1274	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1275	P1[index2D] = vce->data;
	1276	}
	1277
	1278	/* Process row-i data 1 --> data r4-1*/
	1279	for (j = 1; j < r4; j++)
	1280	{
	1281	index = lr234+ir4+j;
	1282	index2D = i*r4+j;
	1283
	1284	pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1];
	1285
	1286	diff = spaceFillingValue[index] - pred2D;
	1287
	1288	itvNum = fabs(diff)/realPrecision + 1;
	1289
	1290	if (itvNum < exe_params->intvCapacity)
	1291	{
	1292	if (diff < 0) itvNum = -itvNum;
	1293	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1294	P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1295	}
	1296	else
	1297	{
	1298	type[index] = 0;
	1299	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1300	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1301	memcpy(preDataBytes,vce->curBytes,8);
	1302	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1303	P1[index2D] = vce->data;
	1304	}
	1305	}
	1306	}
	1307
	1308
	1309	/////////////////////////// Process layer-1 --> layer-r2-1 ///////////////////////////
	1310
	1311	for (k = 1; k < r2; k++)
	1312	{
	1313	/* Process Row-0 data 0*/
	1314	index = lr234+kr34;
	1315	index2D = 0;
	1316
	1317	pred1D = P1[index2D];
	1318	diff = spaceFillingValue[index] - pred1D;
	1319
	1320	itvNum = fabs(diff)/realPrecision + 1;
	1321
	1322	if (itvNum < exe_params->intvCapacity)
	1323	{
	1324	if (diff < 0) itvNum = -itvNum;
	1325	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1326	P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1327	}
	1328	else
	1329	{
	1330	type[index] = 0;
	1331	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1332	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1333	memcpy(preDataBytes,vce->curBytes,8);
	1334	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1335	P0[index2D] = vce->data;
	1336	}
	1337
	1338
	1339	/* Process Row-0 data 1 --> data r4-1 */
	1340	for (j = 1; j < r4; j++)
	1341	{
	1342	index = lr234+kr34+j;
	1343	index2D = j;
	1344
	1345	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	1346	diff = spaceFillingValue[index] - pred2D;
	1347
	1348	itvNum = fabs(diff)/realPrecision + 1;
	1349
	1350	if (itvNum < exe_params->intvCapacity)
	1351	{
	1352	if (diff < 0) itvNum = -itvNum;
	1353	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1354	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1355	}
	1356	else
	1357	{
	1358	type[index] = 0;
	1359	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1360	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1361	memcpy(preDataBytes,vce->curBytes,8);
	1362	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1363	P0[index2D] = vce->data;
	1364	}
	1365	}
	1366
	1367	/* Process Row-1 --> Row-r3-1 */
	1368	for (i = 1; i < r3; i++)
	1369	{
	1370	/* Process Row-i data 0 */
	1371	index = lr234+kr34+i*r4;
	1372	index2D = i*r4;
	1373
	1374	pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4];
	1375	diff = spaceFillingValue[index] - pred2D;
	1376
	1377	itvNum = fabs(diff)/realPrecision + 1;
	1378
	1379	if (itvNum < exe_params->intvCapacity)
	1380	{
	1381	if (diff < 0) itvNum = -itvNum;
	1382	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1383	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1384	}
	1385	else
	1386	{
	1387	type[index] = 0;
	1388	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1389	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1390	memcpy(preDataBytes,vce->curBytes,8);
	1391	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1392	P0[index2D] = vce->data;
	1393	}
	1394
	1395	/* Process Row-i data 1 --> data r4-1 */
	1396	for (j = 1; j < r4; j++)
	1397	{
	1398	index = lr234+kr34+i*r4+j;
	1399	index2D = i*r4+j;
	1400
	1401	pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1];
	1402	diff = spaceFillingValue[index] - pred3D;
	1403
	1404
	1405	itvNum = fabs(diff)/realPrecision + 1;
	1406
	1407	if (itvNum < exe_params->intvCapacity)
	1408	{
	1409	if (diff < 0) itvNum = -itvNum;
	1410	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1411	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1412	}
	1413	else
	1414	{
	1415	type[index] = 0;
	1416	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1417	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1418	memcpy(preDataBytes,vce->curBytes,8);
	1419	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1420	P0[index2D] = vce->data;
	1421	}
	1422	}
	1423	}
	1424
	1425	double *Pt;
	1426	Pt = P1;
	1427	P1 = P0;
	1428	P0 = Pt;
	1429	}
	1430	}
	1431
	1432	free(P0);
	1433	free(P1);
	1434	size_t exactDataNum = exactLeadNumArray->size;
	1435
	1436	TightDataPointStorageD* tdps;
	1437
	1438	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	1439	type, exactMidByteArray->array, exactMidByteArray->size,
	1440	exactLeadNumArray->array,
	1441	resiBitArray->array, resiBitArray->size,
	1442	resiBitsLength,
	1443	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	1444
	1445	//free memory
	1446	free_DIA(exactLeadNumArray);
	1447	free_DIA(resiBitArray);
	1448	free(type);
	1449	free(vce);
	1450	free(lce);
	1451	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	1452
	1453	return tdps;
	1454	}
	1455
	1456
	1457	char SZ_compress_args_double_NoCkRngeNoGzip_4D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
	1458	{
	1459	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_d);
	1460
	1461	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	1462
	1463	size_t dataLength = r1r2r3*r4;
	1464	if(outSize>dataLengthsizeof(double))
	1465	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1466
	1467	free_TightDataPointStorageD(tdps);
	1468	return 0;
	1469	}
	1470
	1471	void SZ_compress_args_double_withinRange(unsigned char** newByteData, double oriData, size_t dataLength, size_t outSize)
	1472	{
	1473	TightDataPointStorageD* tdps = (TightDataPointStorageD*) malloc(sizeof(TightDataPointStorageD));
	1474	tdps->rtypeArray = NULL;
	1475	tdps->typeArray = NULL;
	1476	tdps->leadNumArray = NULL;
	1477	tdps->residualMidBits = NULL;
	1478
	1479	tdps->allSameData = 1;
	1480	tdps->dataSeriesLength = dataLength;
	1481	tdps->exactMidBytes = (unsigned char)malloc(sizeof(unsigned char)8);
	1482	tdps->pwrErrBoundBytes = NULL;
	1483	tdps->isLossless = 0;
	1484	double value = oriData[0];
	1485	doubleToBytes(tdps->exactMidBytes, value);
	1486	tdps->exactMidBytes_size = 8;
	1487
	1488	size_t tmpOutSize;
	1489	//unsigned char *tmpByteData;
	1490	convertTDPStoFlatBytes_double(tdps, newByteData, &tmpOutSize);
	1491	//convertTDPStoFlatBytes_double(tdps, &tmpByteData, &tmpOutSize);
	1492
	1493	//newByteData = (unsigned char)malloc(sizeof(unsigned char)*16); //for floating-point data (1+3+4+4)
	1494	//memcpy(*newByteData, tmpByteData, 16);
	1495	*outSize = tmpOutSize;//12==3+1+8(double_size)+MetaDataByteLength
	1496	free_TightDataPointStorageD(tdps);
	1497	}
	1498
	1499	int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData,
	1500	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
	1501	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
	1502	{
	1503	int status = SZ_SCES;
	1504	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
	1505	double valueRangeSize = 0, medianValue = 0;
	1506
	1507	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
	1508	double max = min+valueRangeSize;
	1509	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1510
	1511	if(valueRangeSize <= realPrecision)
	1512	{
	1513	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
	1514	}
	1515	else
	1516	{
	1517	if(r5==0&&r4==0&&r3==0&&r2==0)
	1518	{
	1519	if(errBoundMode>=PW_REL)
	1520	{
[9ee2ce3]	1521	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
	1522	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
[2c47b73]	1523	}
	1524	else
	1525	SZ_compress_args_double_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1526	}
	1527	else if(r5==0&&r4==0&&r3==0)
	1528	{
	1529	if(errBoundMode>=PW_REL)
	1530	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(newByteData, oriData, realPrecision, r2, r1, outSize, min, max);
	1531	else
	1532	SZ_compress_args_double_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1533	}
	1534	else if(r5==0&&r4==0)
	1535	{
	1536	if(errBoundMode>=PW_REL)
	1537	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max);
	1538	else
	1539	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1540	}
	1541	else if(r5==0)
	1542	{
	1543	if(errBoundMode>=PW_REL)
	1544	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max);
	1545	else
	1546	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1547	}
	1548	}
	1549	return status;
	1550	}
	1551
	1552	int SZ_compress_args_double(unsigned char** newByteData, double *oriData,
	1553	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
	1554	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
	1555	{
	1556	confparams_cpr->errorBoundMode = errBoundMode;
	1557	if(errBoundMode==PW_REL)
	1558	{
	1559	confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
	1560	//confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
	1561	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE && r3 != 0 )
	1562	{
	1563	printf("Error: Current version doesn't support 3D data compression with point-wise relative error bound being based on pwrType=AVG\n");
	1564	exit(0);
	1565	return SZ_NSCS;
	1566	}
[9ee2ce3]	1567	}
[2c47b73]	1568
	1569	int status = SZ_SCES;
	1570	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
	1571
	1572	if(dataLength <= MIN_NUM_OF_ELEMENTS)
	1573	{
	1574	*newByteData = SZ_skip_compress_double(oriData, dataLength, outSize);
	1575	return status;
	1576	}
	1577
	1578	double valueRangeSize = 0, medianValue = 0;
	1579
	1580	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
	1581	double max = min+valueRangeSize;
	1582
	1583	double realPrecision = 0;
	1584
	1585	if(confparams_cpr->errorBoundMode==PSNR)
	1586	{
	1587	confparams_cpr->errorBoundMode = ABS;
	1588	realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, valueRangeSize);
	1589	}
	1590	else
	1591	realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1592
	1593	if(valueRangeSize <= realPrecision)
	1594	{
	1595	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
	1596	}
	1597	else
	1598	{
	1599	size_t tmpOutSize = 0;
	1600	unsigned char* tmpByteData;
	1601	if (r2==0)
	1602	{
	1603	if(confparams_cpr->errorBoundMode>=PW_REL)
	1604	{
[9ee2ce3]	1605	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
	1606	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
[2c47b73]	1607	}
	1608	else
	1609	#ifdef HAVE_TIMECMPR
[9ee2ce3]	1610	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
[2c47b73]	1611	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1612	else
	1613	#endif
	1614	SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1615	}
	1616	else
	1617	if (r3==0)
	1618	{
	1619	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1620	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1621	else
	1622	#ifdef HAVE_TIMECMPR
	1623	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1624	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1625	else
	1626	#endif
[9ee2ce3]	1627	{
	1628	if(sz_with_regression == SZ_NO_REGRESSION)
	1629	SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1630	else
	1631	tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
	1632	}
[2c47b73]	1633	}
	1634	else
	1635	if (r4==0)
	1636	{
	1637	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1638	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1639	else
	1640	#ifdef HAVE_TIMECMPR
	1641	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1642	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1643	else
	1644	#endif
[9ee2ce3]	1645	{
	1646	if(sz_with_regression == SZ_NO_REGRESSION)
	1647	SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1648	else
	1649	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
	1650	}
	1651
	1652
[2c47b73]	1653	}
	1654	else
	1655	if (r5==0)
	1656	{
	1657	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1658	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1659	else
	1660	#ifdef HAVE_TIMECMPR
	1661	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1662	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1663	else
[9ee2ce3]	1664	#endif
	1665	{
	1666	if(sz_with_regression == SZ_NO_REGRESSION)
	1667	SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1668	else
	1669	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
	1670	}
	1671
[2c47b73]	1672	}
	1673	else
	1674	{
	1675	printf("Error: doesn't support 5 dimensions for now.\n");
	1676	status = SZ_DERR;
	1677	}
	1678
	1679	//Call Gzip to do the further compression.
	1680	if(confparams_cpr->szMode==SZ_BEST_SPEED)
	1681	{
	1682	*outSize = tmpOutSize;
	1683	*newByteData = tmpByteData;
	1684	}
	1685	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1686	{
[9ee2ce3]	1687	*outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
[2c47b73]	1688	free(tmpByteData);
	1689	}
	1690	else
	1691	{
	1692	printf("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1693	status = SZ_MERR;
	1694	}
	1695	}
	1696
	1697	return status;
	1698	}
	1699
	1700	//TODO
	1701	int SZ_compress_args_double_subblock(unsigned char* compressedBytes, double *oriData,
	1702	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
	1703	size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
	1704	size_t e5, size_t e4, size_t e3, size_t e2, size_t e1,
	1705	size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio)
	1706	{
	1707	int status = SZ_SCES;
	1708	double valueRangeSize = 0, medianValue = 0;
	1709	computeRangeSize_double_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1);
	1710
	1711	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1712
	1713	if(valueRangeSize <= realPrecision)
	1714	{
	1715	//TODO
	1716	//SZ_compress_args_double_withinRange_subblock();
	1717	}
	1718	else
	1719	{
	1720	if (r2==0)
	1721	{
	1722	//TODO
	1723	if(errBoundMode==PW_REL)
	1724	{
	1725	//TODO
	1726	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_subblock();
	1727	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1728	}
	1729	else
	1730	SZ_compress_args_double_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1);
	1731	}
	1732	else
	1733	if (r3==0)
	1734	{
	1735	if(errBoundMode==PW_REL)
	1736	{
	1737	//TODO
	1738	//SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_subblock();
	1739	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1740	}
	1741	else
	1742	SZ_compress_args_double_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1);
	1743	}
	1744	else
	1745	if (r4==0)
	1746	{
	1747	if(errBoundMode==PW_REL)
	1748	{
	1749	//TODO
	1750	//SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_subblock();
	1751	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1752	}
	1753	else
	1754	SZ_compress_args_double_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1);
	1755	}
	1756	else
	1757	if (r5==0)
	1758	{
	1759	if(errBoundMode==PW_REL)
	1760	{
	1761	//TODO
	1762	//SZ_compress_args_double_NoCkRngeNoGzip_4D_pwr_subblock();
	1763	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1764	}
	1765	else
	1766	SZ_compress_args_double_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
	1767	}
	1768	else
	1769	{
	1770	printf("Error: doesn't support 5 dimensions for now.\n");
	1771	status = SZ_DERR; //dimension error
	1772	}
	1773	}
	1774	return status;
	1775	}
	1776
	1777	void SZ_compress_args_double_NoCkRnge_1D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1778	size_t r1, size_t s1, size_t e1)
	1779	{
	1780	TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r1, s1, e1);
	1781
	1782	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1783	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1784	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1785	{
	1786	unsigned char *tmpCompBytes;
	1787	size_t tmpOutSize;
	1788	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1789	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1790	free(tmpCompBytes);
	1791	}
	1792	else
	1793	{
	1794	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1795	}
	1796
	1797	//TODO
	1798	// if(outSize>dataLengthsizeof(double))
	1799	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1800
	1801	free_TightDataPointStorageD(tdps);
	1802	}
	1803
	1804	void SZ_compress_args_double_NoCkRnge_2D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1805	size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1)
	1806	{
	1807	TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r2, r1, s2, s1, e2, e1);
	1808
	1809	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1810	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1811	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1812	{
	1813	unsigned char *tmpCompBytes;
	1814	size_t tmpOutSize;
	1815	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1816	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1817	free(tmpCompBytes);
	1818	}
	1819	else
	1820	{
	1821	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1822	}
	1823
	1824	//TODO
	1825	// if(outSize>dataLengthsizeof(double))
	1826	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1827
	1828	free_TightDataPointStorageD(tdps);
	1829	}
	1830
	1831	void SZ_compress_args_double_NoCkRnge_3D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1832	size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1)
	1833	{
	1834	TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r3, r2, r1, s3, s2, s1, e3, e2, e1);
	1835
	1836	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1837	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1838	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1839	{
	1840	unsigned char *tmpCompBytes;
	1841	size_t tmpOutSize;
	1842	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1843	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1844	free(tmpCompBytes);
	1845	}
	1846	else
	1847	{
	1848	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1849	}
	1850
	1851	//TODO
	1852	// if(outSize>dataLengthsizeof(double))
	1853	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1854
	1855	free_TightDataPointStorageD(tdps);
	1856	}
	1857
	1858	void SZ_compress_args_double_NoCkRnge_4D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1859	size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1)
	1860	{
	1861	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
	1862
	1863	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1864	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1865	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1866	{
	1867	unsigned char *tmpCompBytes;
	1868	size_t tmpOutSize;
	1869	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1870	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1871	free(tmpCompBytes);
	1872	}
	1873	else
	1874	{
	1875	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1876	}
	1877
	1878	//TODO
	1879	// if(outSize>dataLengthsizeof(double))
	1880	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1881
	1882	free_TightDataPointStorageD(tdps);
	1883	}
	1884
	1885
	1886	unsigned int optimize_intervals_double_1D_subblock(double *oriData, double realPrecision, size_t r1, size_t s1, size_t e1)
	1887	{
	1888	size_t dataLength = e1 - s1 + 1;
	1889	oriData = oriData + s1;
	1890
	1891	size_t i = 0;
	1892	unsigned long radiusIndex;
	1893	double pred_value = 0, pred_err;
	1894	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	1895	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	1896	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
	1897	for(i=2;i<dataLength;i++)
	1898	{
	1899	if(i%confparams_cpr->sampleDistance==0)
	1900	{
	1901	pred_value = 2*oriData[i-1] - oriData[i-2];
	1902	//pred_value = oriData[i-1];
	1903	pred_err = fabs(pred_value - oriData[i]);
	1904	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	1905	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	1906	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	1907	intervals[radiusIndex]++;
	1908	}
	1909	}
	1910	//compute the appropriate number
	1911	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	1912	size_t sum = 0;
	1913	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	1914	{
	1915	sum += intervals[i];
	1916	if(sum>targetCount)
	1917	break;
	1918	}
	1919
	1920	if(i>=confparams_cpr->maxRangeRadius)
	1921	i = confparams_cpr->maxRangeRadius-1;
	1922	unsigned int accIntervals = 2*(i+1);
	1923	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	1924
	1925	if(powerOf2<32)
	1926	powerOf2 = 32;
	1927
	1928	free(intervals);
	1929	return powerOf2;
	1930	}
	1931
	1932	unsigned int optimize_intervals_double_2D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
	1933	{
	1934	size_t R1 = e1 - s1 + 1;
	1935	size_t R2 = e2 - s2 + 1;
	1936
	1937	size_t i,j, index;
	1938	unsigned long radiusIndex;
	1939	double pred_value = 0, pred_err;
	1940	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	1941	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	1942	size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance;
	1943	for(i=s1+1;i<=e1;i++)
	1944	{
	1945	for(j=s2+1;j<=e2;j++)
	1946	{
	1947	if((i+j)%confparams_cpr->sampleDistance==0)
	1948	{
	1949	index = i*r2+j;
	1950	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
	1951	pred_err = fabs(pred_value - oriData[index]);
	1952	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	1953	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	1954	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	1955	intervals[radiusIndex]++;
	1956	}
	1957	}
	1958	}
	1959	//compute the appropriate number
	1960	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	1961	size_t sum = 0;
	1962	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	1963	{
	1964	sum += intervals[i];
	1965	if(sum>targetCount)
	1966	break;
	1967	}
	1968	if(i>=confparams_cpr->maxRangeRadius)
	1969	i = confparams_cpr->maxRangeRadius-1;
	1970	unsigned int accIntervals = 2*(i+1);
	1971	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	1972
	1973	if(powerOf2<32)
	1974	powerOf2 = 32;
	1975
	1976	free(intervals);
	1977	return powerOf2;
	1978	}
	1979
	1980	unsigned int optimize_intervals_double_3D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
	1981	{
	1982	size_t R1 = e1 - s1 + 1;
	1983	size_t R2 = e2 - s2 + 1;
	1984	size_t R3 = e3 - s3 + 1;
	1985
	1986	size_t r23 = r2*r3;
	1987
	1988	size_t i,j,k, index;
	1989	unsigned long radiusIndex;
	1990	double pred_value = 0, pred_err;
	1991	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	1992	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	1993	size_t totalSampleSize = R1R2R3/confparams_cpr->sampleDistance;
	1994	for(i=s1+1;i<=e1;i++)
	1995	{
	1996	for(j=s2+1;j<=e2;j++)
	1997	{
	1998	for(k=s3+1;k<=e3;k++)
	1999	{
	2000	if((i+j+k)%confparams_cpr->sampleDistance==0)
	2001	{
	2002	index = ir23+jr3+k;
	2003	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
	2004	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
	2005	pred_err = fabs(pred_value - oriData[index]);
	2006	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2007	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2008	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2009	intervals[radiusIndex]++;
	2010	}
	2011	}
	2012
	2013	}
	2014	}
	2015	//compute the appropriate number
	2016	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2017	size_t sum = 0;
	2018	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2019	{
	2020	sum += intervals[i];
	2021	if(sum>targetCount)
	2022	break;
	2023	}
	2024	if(i>=confparams_cpr->maxRangeRadius)
	2025	i = confparams_cpr->maxRangeRadius-1;
	2026
	2027	unsigned int accIntervals = 2*(i+1);
	2028	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2029
	2030	if(powerOf2<32)
	2031	powerOf2 = 32;
	2032
	2033	free(intervals);
	2034	return powerOf2;
	2035	}
	2036
	2037	unsigned int optimize_intervals_double_4D_subblock(double *oriData, double realPrecision,
	2038	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
	2039	{
	2040	size_t R1 = e1 - s1 + 1;
	2041	size_t R2 = e2 - s2 + 1;
	2042	size_t R3 = e3 - s3 + 1;
	2043	size_t R4 = e4 - s4 + 1;
	2044
	2045	size_t r34 = r3*r4;
	2046	size_t r234 = r2r3r4;
	2047
	2048	size_t i,j,k,l, index;
	2049	unsigned long radiusIndex;
	2050	double pred_value = 0, pred_err;
	2051	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	2052	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	2053	size_t totalSampleSize = R1R2R3*R4/confparams_cpr->sampleDistance;
	2054	for(i=s1+1;i<=e1;i++)
	2055	{
	2056	for(j=s2+1;j<=e2;j++)
	2057	{
	2058	for(k=s3+1;k<=e3;k++)
	2059	{
	2060	for(l=s4+1;l<=e4;l++)
	2061	{
	2062	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
	2063	{
	2064	index = ir234+jr34+k*r4+l;
	2065	pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34]
	2066	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
	2067	pred_err = fabs(pred_value - oriData[index]);
	2068	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2069	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2070	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2071	intervals[radiusIndex]++;
	2072	}
	2073	}
	2074	}
	2075
	2076	}
	2077	}
	2078	//compute the appropriate number
	2079	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2080	size_t sum = 0;
	2081	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2082	{
	2083	sum += intervals[i];
	2084	if(sum>targetCount)
	2085	break;
	2086	}
	2087	if(i>=confparams_cpr->maxRangeRadius)
	2088	i = confparams_cpr->maxRangeRadius-1;
	2089
	2090	unsigned int accIntervals = 2*(i+1);
	2091	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2092
	2093	if(powerOf2<32)
	2094	powerOf2 = 32;
	2095
	2096	free(intervals);
	2097	return powerOf2;
	2098	}
	2099
	2100	TightDataPointStorageD* SZ_compress_double_1D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2101	size_t r1, size_t s1, size_t e1)
	2102	{
	2103	size_t dataLength = e1 - s1 + 1;
	2104
	2105	unsigned int quantization_intervals;
	2106	if(exe_params->optQuantMode==1)
	2107	quantization_intervals = optimize_intervals_double_1D_subblock(oriData, realPrecision, r1, s1, e1);
	2108	else
	2109	quantization_intervals = exe_params->intvCapacity;
	2110	updateQuantizationInfo(quantization_intervals);
	2111
	2112	size_t i;
	2113	int reqLength;
	2114	double medianValue = medianValue_d;
	2115	short radExpo = getExponent_double(valueRangeSize/2);
	2116
	2117	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2118
	2119	int* type = (int) malloc(dataLengthsizeof(int));
	2120
	2121	double* spaceFillingValue = oriData + s1; //
	2122
	2123	DynamicIntArray *exactLeadNumArray;
	2124	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2125
	2126	DynamicByteArray *exactMidByteArray;
	2127	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2128
	2129	DynamicIntArray *resiBitArray;
	2130	new_DIA(&resiBitArray, DynArrayInitLen);
	2131
	2132	type[0] = 0;
	2133
	2134	unsigned char preDataBytes[8];
	2135	longToBytes_bigEndian(preDataBytes, 0);
	2136
	2137	int reqBytesLength = reqLength/8;
	2138	int resiBitsLength = reqLength%8;
	2139	double last3CmprsData[3] = {0};
	2140
	2141	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2142	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2143
	2144	//add the first data
	2145	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2146	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2147	memcpy(preDataBytes,vce->curBytes,8);
	2148	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2149	listAdd_double(last3CmprsData, vce->data);
	2150
	2151	//add the second data
	2152	type[1] = 0;
	2153	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2154	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2155	memcpy(preDataBytes,vce->curBytes,8);
	2156	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2157	listAdd_double(last3CmprsData, vce->data);
	2158
	2159	int state;
	2160	double checkRadius;
	2161	double curData;
	2162	double pred;
	2163	double predAbsErr;
	2164	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
	2165	double interval = 2*realPrecision;
	2166
	2167	for(i=2;i<dataLength;i++)
	2168	{
	2169	//printf("%.30G\n",last3CmprsData[0]);
	2170	curData = spaceFillingValue[i];
	2171	pred = 2*last3CmprsData[0] - last3CmprsData[1];
	2172	//pred = last3CmprsData[0];
	2173	predAbsErr = fabs(curData - pred);
	2174	if(predAbsErr<=checkRadius)
	2175	{
	2176	state = (predAbsErr/realPrecision+1)/2;
	2177	if(curData>=pred)
	2178	{
	2179	type[i] = exe_params->intvRadius+state;
	2180	pred = pred + state*interval;
	2181	}
	2182	else //curData<pred
	2183	{
	2184	type[i] = exe_params->intvRadius-state;
	2185	pred = pred - state*interval;
	2186	}
	2187	listAdd_double(last3CmprsData, pred);
	2188	continue;
	2189	}
	2190
	2191	//unpredictable data processing
	2192	type[i] = 0;
	2193	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2194	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2195	memcpy(preDataBytes,vce->curBytes,8);
	2196	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2197
	2198	listAdd_double(last3CmprsData, vce->data);
	2199	}//end of for
	2200
	2201	size_t exactDataNum = exactLeadNumArray->size;
	2202
	2203	TightDataPointStorageD* tdps;
	2204
	2205	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	2206	type, exactMidByteArray->array, exactMidByteArray->size,
	2207	exactLeadNumArray->array,
	2208	resiBitArray->array, resiBitArray->size,
	2209	resiBitsLength,
	2210	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2211
	2212	//free memory
	2213	free_DIA(exactLeadNumArray);
	2214	free_DIA(resiBitArray);
	2215	free(type);
	2216	free(vce);
	2217	free(lce);
	2218	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2219
	2220	return tdps;
	2221	}
	2222
	2223
	2224	TightDataPointStorageD* SZ_compress_double_2D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2225	size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
	2226	{
	2227	unsigned int quantization_intervals;
	2228	if(exe_params->optQuantMode==1)
	2229	{
	2230	quantization_intervals = optimize_intervals_double_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2);
	2231	updateQuantizationInfo(quantization_intervals);
	2232	}
	2233	else
	2234	quantization_intervals = exe_params->intvCapacity;
	2235
	2236	size_t i,j;
	2237	int reqLength;
	2238	double pred1D, pred2D;
	2239	double diff = 0.0;
	2240	double itvNum = 0;
	2241	double P0, P1;
	2242
	2243	size_t R1 = e1 - s1 + 1;
	2244	size_t R2 = e2 - s2 + 1;
	2245	size_t dataLength = R1*R2;
	2246
	2247	P0 = (double)malloc(R2sizeof(double));
	2248	memset(P0, 0, R2*sizeof(double));
	2249	P1 = (double)malloc(R2sizeof(double));
	2250	memset(P1, 0, R2*sizeof(double));
	2251
	2252	double medianValue = medianValue_d;
	2253	short radExpo = getExponent_double(valueRangeSize/2);
	2254	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2255
	2256	int* type = (int) malloc(dataLengthsizeof(int));
	2257
	2258	double* spaceFillingValue = oriData; //
	2259
	2260	DynamicIntArray *exactLeadNumArray;
	2261	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2262
	2263	DynamicByteArray *exactMidByteArray;
	2264	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2265
	2266	DynamicIntArray *resiBitArray;
	2267	new_DIA(&resiBitArray, DynArrayInitLen);
	2268
	2269	unsigned char preDataBytes[8];
	2270	longToBytes_bigEndian(preDataBytes, 0);
	2271
	2272	int reqBytesLength = reqLength/8;
	2273	int resiBitsLength = reqLength%8;
	2274
	2275	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2276	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2277
	2278	/* Process Row-s1 data s2*/
	2279	size_t gIndex;
	2280	size_t lIndex;
	2281
	2282	gIndex = s1*r2+s2;
	2283	lIndex = 0;
	2284
	2285	type[lIndex] = 0;
	2286	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2287	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2288	memcpy(preDataBytes,vce->curBytes,8);
	2289	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2290	P1[0] = vce->data;
	2291
	2292	/* Process Row-s1 data s2+1*/
	2293	gIndex = s1*r2+(s2+1);
	2294	lIndex = 1;
	2295
	2296	pred1D = P1[0];
	2297	diff = spaceFillingValue[gIndex] - pred1D;
	2298
	2299	itvNum = fabs(diff)/realPrecision + 1;
	2300
	2301	if (itvNum < exe_params->intvCapacity)
	2302	{
	2303	if (diff < 0) itvNum = -itvNum;
	2304	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2305	P1[1] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2306	}
	2307	else
	2308	{
	2309	type[lIndex] = 0;
	2310	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2311	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2312	memcpy(preDataBytes,vce->curBytes,8);
	2313	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2314	P1[1] = vce->data;
	2315	}
	2316
	2317	/* Process Row-s1 data s2+2 --> data e2 */
	2318	for (j = 2; j < R2; j++)
	2319	{
	2320	gIndex = s1*r2+(s2+j);
	2321	lIndex = j;
	2322
	2323	pred1D = 2*P1[j-1] - P1[j-2];
	2324	diff = spaceFillingValue[gIndex] - pred1D;
	2325
	2326	itvNum = fabs(diff)/realPrecision + 1;
	2327
	2328	if (itvNum < exe_params->intvCapacity)
	2329	{
	2330	if (diff < 0) itvNum = -itvNum;
	2331	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2332	P1[j] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2333	}
	2334	else
	2335	{
	2336	type[lIndex] = 0;
	2337	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2338	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2339	memcpy(preDataBytes,vce->curBytes,8);
	2340	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2341	P1[j] = vce->data;
	2342	}
	2343	}
	2344
	2345	/* Process Row-s1+1 --> Row-e1 */
	2346	for (i = 1; i < R1; i++)
	2347	{
	2348	/* Process row-s1+i data s2 */
	2349	gIndex = (s1+i)*r2+s2;
	2350	lIndex = i*R2;
	2351
	2352	pred1D = P1[0];
	2353	diff = spaceFillingValue[gIndex] - pred1D;
	2354
	2355	itvNum = fabs(diff)/realPrecision + 1;
	2356
	2357	if (itvNum < exe_params->intvCapacity)
	2358	{
	2359	if (diff < 0) itvNum = -itvNum;
	2360	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2361	P0[0] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2362	}
	2363	else
	2364	{
	2365	type[lIndex] = 0;
	2366	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2367	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2368	memcpy(preDataBytes,vce->curBytes,8);
	2369	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2370	P0[0] = vce->data;
	2371	}
	2372
	2373	/* Process row-s1+i data s2+1 --> e2 */
	2374	for (j = 1; j < R2; j++)
	2375	{
	2376	gIndex = (s1+i)*r2+(s2+j);
	2377	lIndex = i*R2+j;
	2378
	2379	pred2D = P0[j-1] + P1[j] - P1[j-1];
	2380	diff = spaceFillingValue[gIndex] - pred2D;
	2381
	2382	itvNum = fabs(diff)/realPrecision + 1;
	2383
	2384	if (itvNum < exe_params->intvCapacity)
	2385	{
	2386	if (diff < 0) itvNum = -itvNum;
	2387	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2388	P0[j] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2389	}
	2390	else
	2391	{
	2392	type[lIndex] = 0;
	2393	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2394	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2395	memcpy(preDataBytes,vce->curBytes,8);
	2396	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2397	P0[j] = vce->data;
	2398	}
	2399	}
	2400
	2401	double *Pt;
	2402	Pt = P1;
	2403	P1 = P0;
	2404	P0 = Pt;
	2405	}
	2406
	2407	free(P0);
	2408	free(P1);
	2409	size_t exactDataNum = exactLeadNumArray->size;
	2410
	2411	TightDataPointStorageD* tdps;
	2412
	2413	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	2414	type, exactMidByteArray->array, exactMidByteArray->size,
	2415	exactLeadNumArray->array,
	2416	resiBitArray->array, resiBitArray->size,
	2417	resiBitsLength,
	2418	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2419
	2420	//free memory
	2421	free_DIA(exactLeadNumArray);
	2422	free_DIA(resiBitArray);
	2423	free(type);
	2424	free(vce);
	2425	free(lce);
	2426	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2427
	2428	return tdps;
	2429	}
	2430
	2431	TightDataPointStorageD* SZ_compress_double_3D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2432	size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
	2433	{
	2434	unsigned int quantization_intervals;
	2435	if(exe_params->optQuantMode==1)
	2436	{
	2437	quantization_intervals = optimize_intervals_double_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3);
	2438	updateQuantizationInfo(quantization_intervals);
	2439	}
	2440	else
	2441	quantization_intervals = exe_params->intvCapacity;
	2442
	2443	size_t i,j,k;
	2444	int reqLength;
	2445	double pred1D, pred2D, pred3D;
	2446	double diff = 0.0;
	2447	double itvNum = 0;
	2448	double P0, P1;
	2449
	2450	size_t R1 = e1 - s1 + 1;
	2451	size_t R2 = e2 - s2 + 1;
	2452	size_t R3 = e3 - s3 + 1;
	2453	size_t dataLength = R1R2R3;
	2454
	2455	size_t r23 = r2*r3;
	2456	size_t R23 = R2*R3;
	2457
	2458	P0 = (double)malloc(R23sizeof(double));
	2459	P1 = (double)malloc(R23sizeof(double));
	2460
	2461	double medianValue = medianValue_d;
	2462	short radExpo = getExponent_double(valueRangeSize/2);
	2463	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2464
	2465	int* type = (int) malloc(dataLengthsizeof(int));
	2466
	2467	double* spaceFillingValue = oriData; //
	2468
	2469	DynamicIntArray *exactLeadNumArray;
	2470	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2471
	2472	DynamicByteArray *exactMidByteArray;
	2473	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2474
	2475	DynamicIntArray *resiBitArray;
	2476	new_DIA(&resiBitArray, DynArrayInitLen);
	2477
	2478	unsigned char preDataBytes[8];
	2479	longToBytes_bigEndian(preDataBytes, 0);
	2480
	2481	int reqBytesLength = reqLength/8;
	2482	int resiBitsLength = reqLength%8;
	2483
	2484	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2485	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2486
	2487
	2488	/////////////////////////// Process layer-s1 ///////////////////////////
	2489	/* Process Row-s2 data s3*/
	2490	size_t gIndex; //global index
	2491	size_t lIndex; //local index
	2492	size_t index2D; //local 2D index
	2493
	2494	gIndex = s1r23+s2r3+s3;
	2495	lIndex = 0;
	2496	index2D = 0;
	2497
	2498	type[lIndex] = 0;
	2499	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2500	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2501	memcpy(preDataBytes,vce->curBytes,8);
	2502	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2503	P1[index2D] = vce->data;
	2504
	2505	/* Process Row-s2 data s3+1*/
	2506	gIndex = s1r23+s2r3+s3+1;
	2507	lIndex = 1;
	2508	index2D = 1;
	2509
	2510	pred1D = P1[index2D-1];
	2511	diff = spaceFillingValue[gIndex] - pred1D;
	2512
	2513	itvNum = fabs(diff)/realPrecision + 1;
	2514
	2515	if (itvNum < exe_params->intvCapacity)
	2516	{
	2517	if (diff < 0) itvNum = -itvNum;
	2518	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2519	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2520	}
	2521	else
	2522	{
	2523	type[lIndex] = 0;
	2524	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2525	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2526	memcpy(preDataBytes,vce->curBytes,8);
	2527	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2528	P1[index2D] = vce->data;
	2529	}
	2530
	2531	/* Process Row-s2 data s3+2 --> data e3 */
	2532	for (j = 2; j < R3; j++)
	2533	{
	2534	gIndex = s1r23+s2r3+s3+j;
	2535	lIndex = j;
	2536	index2D = j;
	2537
	2538	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	2539	diff = spaceFillingValue[gIndex] - pred1D;
	2540
	2541	itvNum = fabs(diff)/realPrecision + 1;
	2542
	2543	if (itvNum < exe_params->intvCapacity)
	2544	{
	2545	if (diff < 0) itvNum = -itvNum;
	2546	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2547	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2548	}
	2549	else
	2550	{
	2551	type[lIndex] = 0;
	2552	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2553	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2554	memcpy(preDataBytes,vce->curBytes,8);
	2555	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2556	P1[index2D] = vce->data;
	2557	}
	2558	}
	2559
	2560	/* Process Row-s2+1 --> Row-e2 */
	2561	for (i = 1; i < R2; i++)
	2562	{
	2563	/* Process row-s2+i data s3 */
	2564	gIndex = s1r23+(s2+i)r3+s3;
	2565	lIndex = i*R3;
	2566	index2D = i*R3;
	2567
	2568	pred1D = P1[index2D-R3];
	2569	diff = spaceFillingValue[gIndex] - pred1D;
	2570
	2571	itvNum = fabs(diff)/realPrecision + 1;
	2572
	2573	if (itvNum < exe_params->intvCapacity)
	2574	{
	2575	if (diff < 0) itvNum = -itvNum;
	2576	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2577	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2578	}
	2579	else
	2580	{
	2581	type[lIndex] = 0;
	2582	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2583	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2584	memcpy(preDataBytes,vce->curBytes,8);
	2585	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2586	P1[index2D] = vce->data;
	2587	}
	2588
	2589	/* Process row-s2+i data s3+1 --> data e3*/
	2590	for (j = 1; j < R3; j++)
	2591	{
	2592	gIndex = s1r23+(s2+i)r3+s3+j;
	2593	lIndex = i*R3+j;
	2594	index2D = i*R3+j;
	2595
	2596	pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1];
	2597	diff = spaceFillingValue[gIndex] - pred2D;
	2598
	2599	itvNum = fabs(diff)/realPrecision + 1;
	2600
	2601	if (itvNum < exe_params->intvCapacity)
	2602	{
	2603	if (diff < 0) itvNum = -itvNum;
	2604	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2605	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2606	}
	2607	else
	2608	{
	2609	type[lIndex] = 0;
	2610	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2611	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2612	memcpy(preDataBytes,vce->curBytes,8);
	2613	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2614	P1[index2D] = vce->data;
	2615	}
	2616	}
	2617	}
	2618
	2619
	2620	/////////////////////////// Process layer-s1+1 --> layer-e1 ///////////////////////////
	2621
	2622	for (k = 1; k < R1; k++)
	2623	{
	2624	/* Process Row-s2 data s3*/
	2625	gIndex = (s1+k)r23+s2r3+s3;
	2626	lIndex = k*R23;
	2627	index2D = 0;
	2628
	2629	pred1D = P1[index2D];
	2630	diff = spaceFillingValue[gIndex] - pred1D;
	2631
	2632	itvNum = fabs(diff)/realPrecision + 1;
	2633
	2634	if (itvNum < exe_params->intvCapacity)
	2635	{
	2636	if (diff < 0) itvNum = -itvNum;
	2637	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2638	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2639	}
	2640	else
	2641	{
	2642	type[lIndex] = 0;
	2643	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2644	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2645	memcpy(preDataBytes,vce->curBytes,8);
	2646	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2647	P0[index2D] = vce->data;
	2648	}
	2649
	2650
	2651	/* Process Row-s2 data s3+1 --> data e3 */
	2652	for (j = 1; j < R3; j++)
	2653	{
	2654	gIndex = (s1+k)r23+s2r3+s3+j;
	2655	lIndex = k*R23+j;
	2656	index2D = j;
	2657
	2658	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	2659	diff = spaceFillingValue[gIndex] - pred2D;
	2660
	2661	itvNum = fabs(diff)/realPrecision + 1;
	2662
	2663	if (itvNum < exe_params->intvCapacity)
	2664	{
	2665	if (diff < 0) itvNum = -itvNum;
	2666	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2667	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2668	}
	2669	else
	2670	{
	2671	type[lIndex] = 0;
	2672	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2673	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2674	memcpy(preDataBytes,vce->curBytes,8);
	2675	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2676	P0[index2D] = vce->data;
	2677	}
	2678	}
	2679
	2680	/* Process Row-s2+1 --> Row-e2 */
	2681	for (i = 1; i < R2; i++)
	2682	{
	2683	/* Process Row-s2+i data s3 */
	2684	gIndex = (s1+k)r23+(s2+i)r3+s3;
	2685	lIndex = kR23+iR3;
	2686	index2D = i*R3;
	2687
	2688	pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3];
	2689	diff = spaceFillingValue[gIndex] - pred2D;
	2690
	2691	itvNum = fabs(diff)/realPrecision + 1;
	2692
	2693	if (itvNum < exe_params->intvCapacity)
	2694	{
	2695	if (diff < 0) itvNum = -itvNum;
	2696	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2697	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2698	}
	2699	else
	2700	{
	2701	type[lIndex] = 0;
	2702	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2703	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2704	memcpy(preDataBytes,vce->curBytes,8);
	2705	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2706	P0[index2D] = vce->data;
	2707	}
	2708
	2709	/* Process Row-s2+i data s3+1 --> data e3 */
	2710	for (j = 1; j < R3; j++)
	2711	{
	2712	gIndex = (s1+k)r23+(s2+i)r3+s3+j;
	2713	lIndex = kR23+iR3+j;
	2714	index2D = i*R3+j;
	2715
	2716	pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1];
	2717	diff = spaceFillingValue[gIndex] - pred3D;
	2718
	2719	itvNum = fabs(diff)/realPrecision + 1;
	2720
	2721	if (itvNum < exe_params->intvCapacity)
	2722	{
	2723	if (diff < 0) itvNum = -itvNum;
	2724	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2725	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2726	}
	2727	else
	2728	{
	2729	type[lIndex] = 0;
	2730	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2731	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2732	memcpy(preDataBytes,vce->curBytes,8);
	2733	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2734	P0[index2D] = vce->data;
	2735	}
	2736	}
	2737	}
	2738
	2739	double *Pt;
	2740	Pt = P1;
	2741	P1 = P0;
	2742	P0 = Pt;
	2743	}
	2744
	2745	free(P0);
	2746	free(P1);
	2747	size_t exactDataNum = exactLeadNumArray->size;
	2748
	2749	TightDataPointStorageD* tdps;
	2750
	2751	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	2752	type, exactMidByteArray->array, exactMidByteArray->size,
	2753	exactLeadNumArray->array,
	2754	resiBitArray->array, resiBitArray->size,
	2755	resiBitsLength,
	2756	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2757
	2758	//free memory
	2759	free_DIA(exactLeadNumArray);
	2760	free_DIA(resiBitArray);
	2761	free(type);
	2762	free(vce);
	2763	free(lce);
	2764	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2765
	2766	return tdps;
	2767	}
	2768
	2769	TightDataPointStorageD* SZ_compress_double_4D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2770	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
	2771	{
	2772	unsigned int quantization_intervals;
	2773	if(exe_params->optQuantMode==1)
	2774	{
	2775	quantization_intervals = optimize_intervals_double_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4);
	2776	updateQuantizationInfo(quantization_intervals);
	2777	}
	2778	else
	2779	quantization_intervals = exe_params->intvCapacity;
	2780
	2781	size_t i,j,k;
	2782	int reqLength;
	2783	double pred1D, pred2D, pred3D;
	2784	double diff = 0.0;
	2785	double itvNum = 0;
	2786	double P0, P1;
	2787
	2788	size_t R1 = e1 - s1 + 1;
	2789	size_t R2 = e2 - s2 + 1;
	2790	size_t R3 = e3 - s3 + 1;
	2791	size_t R4 = e4 - s4 + 1;
	2792
	2793	size_t dataLength = R1R2R3*R4;
	2794
	2795	size_t r34 = r3*r4;
	2796	size_t r234 = r2r3r4;
	2797	size_t R34 = R3*R4;
	2798	size_t R234 = R2R3R4;
	2799
	2800	P0 = (double)malloc(R34sizeof(double));
	2801	P1 = (double)malloc(R34sizeof(double));
	2802
	2803	double medianValue = medianValue_d;
	2804	short radExpo = getExponent_double(valueRangeSize/2);
	2805	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2806
	2807	int* type = (int) malloc(dataLengthsizeof(int));
	2808
	2809	double* spaceFillingValue = oriData; //
	2810
	2811	DynamicIntArray *exactLeadNumArray;
	2812	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2813
	2814	DynamicByteArray *exactMidByteArray;
	2815	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2816
	2817	DynamicIntArray *resiBitArray;
	2818	new_DIA(&resiBitArray, DynArrayInitLen);
	2819
	2820	unsigned char preDataBytes[8];
	2821	longToBytes_bigEndian(preDataBytes, 0);
	2822
	2823	int reqBytesLength = reqLength/8;
	2824	int resiBitsLength = reqLength%8;
	2825
	2826	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2827	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2828
	2829	size_t l;
	2830	for (l = 0; l < R1; l++)
	2831	{
	2832
	2833	/////////////////////////// Process layer-s2 ///////////////////////////
	2834	/* Process Row-s3 data s4*/
	2835	size_t gIndex; //global index
	2836	size_t lIndex; //local index
	2837	size_t index2D; //local 2D index
	2838
	2839	gIndex = (s1+l)r234+s2r34+s3*r4+s4;
	2840	lIndex = l*R234;
	2841	index2D = 0;
	2842
	2843	type[lIndex] = 0;
	2844	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2845	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2846	memcpy(preDataBytes,vce->curBytes,8);
	2847	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2848	P1[index2D] = vce->data;
	2849
	2850	/* Process Row-s3 data s4+1*/
	2851	gIndex = (s1+l)r234+s2r34+s3*r4+s4+1;
	2852	lIndex = l*R234+1;
	2853	index2D = 1;
	2854
	2855	pred1D = P1[index2D-1];
	2856	diff = spaceFillingValue[gIndex] - pred1D;
	2857
	2858	itvNum = fabs(diff)/realPrecision + 1;
	2859
	2860	if (itvNum < exe_params->intvCapacity)
	2861	{
	2862	if (diff < 0) itvNum = -itvNum;
	2863	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2864	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2865	}
	2866	else
	2867	{
	2868	type[lIndex] = 0;
	2869	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2870	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2871	memcpy(preDataBytes,vce->curBytes,8);
	2872	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2873	P1[index2D] = vce->data;
	2874	}
	2875
	2876	/* Process Row-s3 data s4+2 --> data e4 */
	2877	for (j = 2; j < R4; j++)
	2878	{
	2879	gIndex = (s1+l)r234+s2r34+s3*r4+s4+j;
	2880	lIndex = l*R234+j;
	2881	index2D = j;
	2882
	2883	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	2884	diff = spaceFillingValue[gIndex] - pred1D;
	2885
	2886	itvNum = fabs(diff)/realPrecision + 1;
	2887
	2888	if (itvNum < exe_params->intvCapacity)
	2889	{
	2890	if (diff < 0) itvNum = -itvNum;
	2891	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2892	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2893	}
	2894	else
	2895	{
	2896	type[lIndex] = 0;
	2897	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2898	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2899	memcpy(preDataBytes,vce->curBytes,8);
	2900	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2901	P1[index2D] = vce->data;
	2902	}
	2903	}
	2904
	2905	/* Process Row-s3+1 --> Row-e3 */
	2906	for (i = 1; i < R3; i++)
	2907	{
	2908	/* Process row-s2+i data s3 */
	2909	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4;
	2910	lIndex = lR234+iR4;
	2911	index2D = i*R4;
	2912
	2913	pred1D = P1[index2D-R4];
	2914	diff = spaceFillingValue[gIndex] - pred1D;
	2915
	2916	itvNum = fabs(diff)/realPrecision + 1;
	2917
	2918	if (itvNum < exe_params->intvCapacity)
	2919	{
	2920	if (diff < 0) itvNum = -itvNum;
	2921	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2922	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2923	}
	2924	else
	2925	{
	2926	type[lIndex] = 0;
	2927	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2928	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2929	memcpy(preDataBytes,vce->curBytes,8);
	2930	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2931	P1[index2D] = vce->data;
	2932	}
	2933
	2934	/* Process row-s3+i data s4+1 --> data e4*/
	2935	for (j = 1; j < R4; j++)
	2936	{
	2937	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4+j;
	2938	lIndex = lR234+iR4+j;
	2939	index2D = i*R4+j;
	2940
	2941	pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1];
	2942	diff = spaceFillingValue[gIndex] - pred2D;
	2943
	2944	itvNum = fabs(diff)/realPrecision + 1;
	2945
	2946	if (itvNum < exe_params->intvCapacity)
	2947	{
	2948	if (diff < 0) itvNum = -itvNum;
	2949	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2950	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2951	}
	2952	else
	2953	{
	2954	type[lIndex] = 0;
	2955	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2956	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2957	memcpy(preDataBytes,vce->curBytes,8);
	2958	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2959	P1[index2D] = vce->data;
	2960	}
	2961	}
	2962	}
	2963
	2964
	2965	/////////////////////////// Process layer-s2+1 --> layer-e2 ///////////////////////////
	2966
	2967	for (k = 1; k < R2; k++)
	2968	{
	2969	/* Process Row-s3 data s4*/
	2970	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4;
	2971	lIndex = lR234+kR34;
	2972	index2D = 0;
	2973
	2974	pred1D = P1[index2D];
	2975	diff = spaceFillingValue[gIndex] - pred1D;
	2976
	2977	itvNum = fabs(diff)/realPrecision + 1;
	2978
	2979	if (itvNum < exe_params->intvCapacity)
	2980	{
	2981	if (diff < 0) itvNum = -itvNum;
	2982	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2983	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2984	}
	2985	else
	2986	{
	2987	type[lIndex] = 0;
	2988	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2989	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2990	memcpy(preDataBytes,vce->curBytes,8);
	2991	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2992	P0[index2D] = vce->data;
	2993	}
	2994
	2995
	2996	/* Process Row-s3 data s4+1 --> data e4 */
	2997	for (j = 1; j < R4; j++)
	2998	{
	2999	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4+j;
	3000	lIndex = lR234+kR34+j;
	3001	index2D = j;
	3002
	3003	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	3004	diff = spaceFillingValue[gIndex] - pred2D;
	3005
	3006	itvNum = fabs(diff)/realPrecision + 1;
	3007
	3008	if (itvNum < exe_params->intvCapacity)
	3009	{
	3010	if (diff < 0) itvNum = -itvNum;
	3011	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3012	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3013	}
	3014	else
	3015	{
	3016	type[lIndex] = 0;
	3017	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3018	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3019	memcpy(preDataBytes,vce->curBytes,8);
	3020	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3021	P0[index2D] = vce->data;
	3022	}
	3023	}
	3024
	3025	/* Process Row-s3+1 --> Row-e3 */
	3026	for (i = 1; i < R3; i++)
	3027	{
	3028	/* Process Row-s3+i data s4 */
	3029	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4;
	3030	lIndex = lR234+kR34+i*R4;
	3031	index2D = i*R4;
	3032
	3033	pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4];
	3034	diff = spaceFillingValue[gIndex] - pred2D;
	3035
	3036	itvNum = fabs(diff)/realPrecision + 1;
	3037
	3038	if (itvNum < exe_params->intvCapacity)
	3039	{
	3040	if (diff < 0) itvNum = -itvNum;
	3041	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3042	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3043	}
	3044	else
	3045	{
	3046	type[lIndex] = 0;
	3047	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3048	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3049	memcpy(preDataBytes,vce->curBytes,8);
	3050	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3051	P0[index2D] = vce->data;
	3052	}
	3053
	3054	/* Process Row-s3+i data s4+1 --> data e4 */
	3055	for (j = 1; j < R4; j++)
	3056	{
	3057	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4+j;
	3058	lIndex = lR234+kR34+i*R4+j;
	3059	index2D = i*R4+j;
	3060
	3061	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
	3062
	3063	pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1];
	3064	diff = spaceFillingValue[gIndex] - pred3D;
	3065
	3066	itvNum = fabs(diff)/realPrecision + 1;
	3067
	3068	if (itvNum < exe_params->intvCapacity)
	3069	{
	3070	if (diff < 0) itvNum = -itvNum;
	3071	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3072	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3073	}
	3074	else
	3075	{
	3076	type[lIndex] = 0;
	3077	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3078	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3079	memcpy(preDataBytes,vce->curBytes,8);
	3080	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3081	P0[index2D] = vce->data;
	3082	}
	3083	}
	3084	}
	3085
	3086	double *Pt;
	3087	Pt = P1;
	3088	P1 = P0;
	3089	P0 = Pt;
	3090	}
	3091	}
	3092
	3093	free(P0);
	3094	free(P1);
	3095	size_t exactDataNum = exactLeadNumArray->size;
	3096
	3097	TightDataPointStorageD* tdps;
	3098
	3099	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	3100	type, exactMidByteArray->array, exactMidByteArray->size,
	3101	exactLeadNumArray->array,
	3102	resiBitArray->array, resiBitArray->size,
	3103	resiBitsLength,
	3104	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	3105
	3106	//free memory
	3107	free_DIA(exactLeadNumArray);
	3108	free_DIA(resiBitArray);
	3109	free(type);
	3110	free(vce);
	3111	free(lce);
	3112	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	3113
	3114	return tdps;
	3115	}
	3116
	3117	/**
	3118	*
	3119	* This is a fast implementation for optimize_intervals_double_3D()
	3120	* */
	3121	unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision){
	3122	size_t i;
	3123	size_t radiusIndex;
	3124	size_t r23=r2*r3;
	3125	double pred_value = 0, pred_err;
	3126	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3127	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	3128	size_t totalSampleSize = 0;
	3129
	3130	size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
	3131	size_t offset_count_2;
	3132	double * data_pos = oriData + r23 + r3 + offset_count;
	3133	size_t n1_count = 1, n2_count = 1; // count i,j sum
	3134	size_t len = r1 * r2 * r3;
	3135	while(data_pos - oriData < len){
	3136	totalSampleSize++;
	3137	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
	3138	pred_err = fabs(pred_value - *data_pos);
	3139	radiusIndex = (pred_err/realPrecision+1)/2;
	3140	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3141	{
	3142	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3143	}
	3144	intervals[radiusIndex]++;
	3145	offset_count += confparams_cpr->sampleDistance;
	3146	if(offset_count >= r3){
	3147	n2_count ++;
	3148	if(n2_count == r2){
	3149	n1_count ++;
	3150	n2_count = 1;
	3151	data_pos += r3;
	3152	}
	3153	offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance;
	3154	data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
	3155	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
	3156	if(offset_count == 0) offset_count ++;
	3157	}
	3158	else data_pos += confparams_cpr->sampleDistance;
	3159	}
	3160	//compute the appropriate number
	3161	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3162	size_t sum = 0;
	3163	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3164	{
	3165	sum += intervals[i];
	3166	if(sum>targetCount)
	3167	break;
	3168	}
	3169	if(i>=confparams_cpr->maxRangeRadius)
	3170	i = confparams_cpr->maxRangeRadius-1;
	3171	unsigned int accIntervals = 2*(i+1);
	3172	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3173
	3174	if(powerOf2<32)
	3175	powerOf2 = 32;
	3176	free(intervals);
	3177	return powerOf2;
	3178	}
	3179
	3180	unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision)
	3181	{
	3182	size_t i;
	3183	size_t radiusIndex;
	3184	double pred_value = 0, pred_err;
	3185	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3186	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
[9ee2ce3]	3187	size_t totalSampleSize = 0;
[2c47b73]	3188
	3189	size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
	3190	size_t offset_count_2;
	3191	double * data_pos = oriData + r2 + offset_count;
	3192	size_t n1_count = 1; // count i sum
	3193	size_t len = r1 * r2;
	3194	while(data_pos - oriData < len){
	3195	totalSampleSize++;
	3196	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
	3197	pred_err = fabs(pred_value - *data_pos);
	3198	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3199	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3200	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3201	intervals[radiusIndex]++;
	3202
	3203	offset_count += confparams_cpr->sampleDistance;
	3204	if(offset_count >= r2){
	3205	n1_count ++;
	3206	offset_count_2 = n1_count % confparams_cpr->sampleDistance;
	3207	data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
	3208	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
	3209	if(offset_count == 0) offset_count ++;
	3210	}
	3211	else data_pos += confparams_cpr->sampleDistance;
	3212	}
	3213
	3214	//compute the appropriate number
	3215	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3216	size_t sum = 0;
	3217	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3218	{
	3219	sum += intervals[i];
	3220	if(sum>targetCount)
	3221	break;
	3222	}
	3223	if(i>=confparams_cpr->maxRangeRadius)
	3224	i = confparams_cpr->maxRangeRadius-1;
	3225	unsigned int accIntervals = 2*(i+1);
	3226	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3227
	3228	if(powerOf2<32)
	3229	powerOf2 = 32;
	3230
	3231	free(intervals);
	3232	return powerOf2;
	3233	}
	3234
	3235	unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision)
	3236	{
	3237	size_t i = 0, radiusIndex;
	3238	double pred_value = 0, pred_err;
	3239	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3240	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
[9ee2ce3]	3241	size_t totalSampleSize = 0;
[2c47b73]	3242
	3243	double * data_pos = oriData + 2;
	3244	while(data_pos - oriData < dataLength){
	3245	totalSampleSize++;
	3246	pred_value = data_pos[-1];
	3247	pred_err = fabs(pred_value - *data_pos);
	3248	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3249	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3250	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3251	intervals[radiusIndex]++;
	3252
	3253	data_pos += confparams_cpr->sampleDistance;
	3254	}
	3255	//compute the appropriate number
	3256	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3257	size_t sum = 0;
	3258	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3259	{
	3260	sum += intervals[i];
	3261	if(sum>targetCount)
	3262	break;
	3263	}
	3264	if(i>=confparams_cpr->maxRangeRadius)
	3265	i = confparams_cpr->maxRangeRadius-1;
	3266
	3267	unsigned int accIntervals = 2*(i+1);
	3268	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3269
	3270	if(powerOf2<32)
	3271	powerOf2 = 32;
	3272
	3273	free(intervals);
	3274	return powerOf2;
	3275	}
[9ee2ce3]	3276
	3277	/The above code is for sz 1.4.13; the following code is for sz 2.0/
	3278	unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
	3279	{
	3280	double mean = 0.0;
	3281	size_t len = r1 * r2;
	3282	size_t mean_distance = (int) (sqrt(len));
	3283
	3284	double * data_pos = oriData;
	3285	size_t mean_count = 0;
	3286	while(data_pos - oriData < len){
	3287	mean += *data_pos;
	3288	mean_count ++;
	3289	data_pos += mean_distance;
	3290	}
	3291	if(mean_count > 0) mean /= mean_count;
	3292	size_t range = 8192;
	3293	size_t radius = 4096;
	3294	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
	3295	memset(freq_intervals, 0, range*sizeof(size_t));
	3296
	3297	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
	3298	int sampleDistance = confparams_cpr->sampleDistance;
	3299	double predThreshold = confparams_cpr->predThreshold;
	3300
	3301	size_t i;
	3302	size_t radiusIndex;
	3303	double pred_value = 0, pred_err;
	3304	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
	3305	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
	3306
	3307	double mean_diff;
	3308	ptrdiff_t freq_index;
	3309	size_t freq_count = 0;
	3310	size_t n1_count = 1;
	3311	size_t offset_count = sampleDistance - 1;
	3312	size_t offset_count_2 = 0;
	3313	size_t sample_count = 0;
	3314	data_pos = oriData + r2 + offset_count;
	3315	while(data_pos - oriData < len){
	3316	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
	3317	pred_err = fabs(pred_value - *data_pos);
	3318	if(pred_err < realPrecision) freq_count ++;
	3319	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3320	if(radiusIndex>=maxRangeRadius)
	3321	radiusIndex = maxRangeRadius - 1;
	3322	intervals[radiusIndex]++;
	3323
	3324	mean_diff = *data_pos - mean;
	3325	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
	3326	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
	3327	if(freq_index <= 0){
	3328	freq_intervals[0] ++;
	3329	}
	3330	else if(freq_index >= range){
	3331	freq_intervals[range - 1] ++;
	3332	}
	3333	else{
	3334	freq_intervals[freq_index] ++;
	3335	}
	3336	offset_count += sampleDistance;
	3337	if(offset_count >= r2){
	3338	n1_count ++;
	3339	offset_count_2 = n1_count % sampleDistance;
	3340	data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
	3341	offset_count = (sampleDistance - offset_count_2);
	3342	if(offset_count == 0) offset_count ++;
	3343	}
	3344	else data_pos += sampleDistance;
	3345	sample_count ++;
	3346	}
	3347	max_freq = freq_count 1.0/ sample_count;
	3348
	3349	//compute the appropriate number
	3350	size_t targetCount = sample_count*predThreshold;
	3351	size_t sum = 0;
	3352	for(i=0;i<maxRangeRadius;i++)
	3353	{
	3354	sum += intervals[i];
	3355	if(sum>targetCount)
	3356	break;
	3357	}
	3358	if(i>=maxRangeRadius)
	3359	i = maxRangeRadius-1;
	3360	unsigned int accIntervals = 2*(i+1);
	3361	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3362
	3363	if(powerOf2<32)
	3364	powerOf2 = 32;
	3365
	3366	// collect frequency
	3367	size_t max_sum = 0;
	3368	size_t max_index = 0;
	3369	size_t tmp_sum;
	3370	size_t * freq_pos = freq_intervals + 1;
	3371	for(size_t i=1; i<range-2; i++){
	3372	tmp_sum = freq_pos[0] + freq_pos[1];
	3373	if(tmp_sum > max_sum){
	3374	max_sum = tmp_sum;
	3375	max_index = i;
	3376	}
	3377	freq_pos ++;
	3378	}
	3379	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
	3380	mean_freq = max_sum 1.0 / sample_count;
	3381
	3382	free(freq_intervals);
	3383	free(intervals);
	3384	return powerOf2;
	3385	}
	3386
	3387	unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
	3388	{
	3389	double mean = 0.0;
	3390	size_t len = r1 * r2 * r3;
	3391	size_t mean_distance = (int) (sqrt(len));
	3392	double * data_pos = oriData;
	3393	size_t offset_count = 0;
	3394	size_t offset_count_2 = 0;
	3395	size_t mean_count = 0;
	3396	while(data_pos - oriData < len){
	3397	mean += *data_pos;
	3398	mean_count ++;
	3399	data_pos += mean_distance;
	3400	offset_count += mean_distance;
	3401	offset_count_2 += mean_distance;
	3402	if(offset_count >= r3){
	3403	offset_count = 0;
	3404	data_pos -= 1;
	3405	}
	3406	if(offset_count_2 >= r2 * r3){
	3407	offset_count_2 = 0;
	3408	data_pos -= 1;
	3409	}
	3410	}
	3411	if(mean_count > 0) mean /= mean_count;
	3412	size_t range = 8192;
	3413	size_t radius = 4096;
	3414	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
	3415	memset(freq_intervals, 0, range*sizeof(size_t));
	3416
	3417	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
	3418	int sampleDistance = confparams_cpr->sampleDistance;
	3419	double predThreshold = confparams_cpr->predThreshold;
	3420
	3421	size_t i;
	3422	size_t radiusIndex;
	3423	size_t r23=r2*r3;
	3424	double pred_value = 0, pred_err;
	3425	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
	3426	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
	3427
	3428	double mean_diff;
	3429	ptrdiff_t freq_index;
	3430	size_t freq_count = 0;
	3431	size_t sample_count = 0;
	3432
	3433	offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
	3434	data_pos = oriData + r23 + r3 + offset_count;
	3435	size_t n1_count = 1, n2_count = 1; // count i,j sum
	3436
	3437	while(data_pos - oriData < len){
	3438
	3439	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
	3440	pred_err = fabs(pred_value - *data_pos);
	3441	if(pred_err < realPrecision) freq_count ++;
	3442	radiusIndex = (pred_err/realPrecision+1)/2;
	3443	if(radiusIndex>=maxRangeRadius)
	3444	{
	3445	radiusIndex = maxRangeRadius - 1;
	3446	}
	3447	intervals[radiusIndex]++;
	3448
	3449	mean_diff = *data_pos - mean;
	3450	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
	3451	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
	3452	if(freq_index <= 0){
	3453	freq_intervals[0] ++;
	3454	}
	3455	else if(freq_index >= range){
	3456	freq_intervals[range - 1] ++;
	3457	}
	3458	else{
	3459	freq_intervals[freq_index] ++;
	3460	}
	3461	offset_count += sampleDistance;
	3462	if(offset_count >= r3){
	3463	n2_count ++;
	3464	if(n2_count == r2){
	3465	n1_count ++;
	3466	n2_count = 1;
	3467	data_pos += r3;
	3468	}
	3469	offset_count_2 = (n1_count + n2_count) % sampleDistance;
	3470	data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
	3471	offset_count = (sampleDistance - offset_count_2);
	3472	if(offset_count == 0) offset_count ++;
	3473	}
	3474	else data_pos += sampleDistance;
	3475	sample_count ++;
	3476	}
	3477	max_freq = freq_count 1.0/ sample_count;
	3478
	3479	//compute the appropriate number
	3480	size_t targetCount = sample_count*predThreshold;
	3481	size_t sum = 0;
	3482	for(i=0;i<maxRangeRadius;i++)
	3483	{
	3484	sum += intervals[i];
	3485	if(sum>targetCount)
	3486	break;
	3487	}
	3488	if(i>=maxRangeRadius)
	3489	i = maxRangeRadius-1;
	3490	unsigned int accIntervals = 2*(i+1);
	3491	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3492
	3493	if(powerOf2<32)
	3494	powerOf2 = 32;
	3495	// collect frequency
	3496	size_t max_sum = 0;
	3497	size_t max_index = 0;
	3498	size_t tmp_sum;
	3499	size_t * freq_pos = freq_intervals + 1;
	3500	for(size_t i=1; i<range-2; i++){
	3501	tmp_sum = freq_pos[0] + freq_pos[1];
	3502	if(tmp_sum > max_sum){
	3503	max_sum = tmp_sum;
	3504	max_index = i;
	3505	}
	3506	freq_pos ++;
	3507	}
	3508	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
	3509	mean_freq = max_sum 1.0 / sample_count;
	3510
	3511	free(freq_intervals);
	3512	free(intervals);
	3513	return powerOf2;
	3514	}
	3515
	3516	#define MIN(a, b) a<b? a : b
	3517	unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, double realPrecision, size_t comp_size){
	3518
	3519	unsigned int quantization_intervals;
	3520	double sz_sample_correct_freq = -1;//0.5; //-1
	3521	double dense_pos;
	3522	double mean_flush_freq;
	3523	unsigned char use_mean = 0;
	3524
	3525	if(exe_params->optQuantMode==1)
	3526	{
	3527	quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
	3528	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
	3529	updateQuantizationInfo(quantization_intervals);
	3530	}
	3531	else{
	3532	quantization_intervals = exe_params->intvCapacity;
	3533	}
	3534
	3535	// calculate block dims
	3536	size_t num_x, num_y;
	3537	size_t block_size = 16;
	3538
	3539	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
	3540	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
	3541
	3542	size_t split_index_x, split_index_y;
	3543	size_t early_blockcount_x, early_blockcount_y;
	3544	size_t late_blockcount_x, late_blockcount_y;
	3545	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
	3546	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
	3547
	3548	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
	3549	size_t num_blocks = num_x * num_y;
	3550	size_t num_elements = r1 * r2;
	3551
	3552	size_t dim0_offset = r2;
	3553
	3554	int * result_type = (int ) malloc(num_elements sizeof(int));
	3555	size_t unpred_data_max_size = max_num_block_elements;
	3556	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
	3557	size_t total_unpred = 0;
	3558	size_t unpredictable_count;
	3559	double * data_pos = oriData;
	3560	int * type = result_type;
	3561	size_t offset_x, offset_y;
	3562	size_t current_blockcount_x, current_blockcount_y;
	3563
	3564	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
	3565	double * reg_params_pos = reg_params;
	3566	// move regression part out
	3567	size_t params_offset_b = num_blocks;
	3568	size_t params_offset_c = 2*num_blocks;
	3569	for(size_t i=0; i<num_x; i++){
	3570	for(size_t j=0; j<num_y; j++){
	3571	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	3572	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	3573	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	3574	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	3575
	3576	data_pos = oriData + offset_x * dim0_offset + offset_y;
	3577
	3578	{
	3579	double * cur_data_pos = data_pos;
	3580	double fx = 0.0;
	3581	double fy = 0.0;
	3582	double f = 0;
	3583	double sum_x;
	3584	double curData;
	3585	for(size_t i=0; i<current_blockcount_x; i++){
	3586	sum_x = 0;
	3587	for(size_t j=0; j<current_blockcount_y; j++){
	3588	curData = *cur_data_pos;
	3589	sum_x += curData;
	3590	fy += curData * j;
	3591	cur_data_pos ++;
	3592	}
	3593	fx += sum_x * i;
	3594	f += sum_x;
	3595	cur_data_pos += dim0_offset - current_blockcount_y;
	3596	}
	3597	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
	3598	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
	3599	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
	3600	reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
	3601	}
	3602
	3603	reg_params_pos ++;
	3604	}
	3605	}
	3606
	3607	//Compress coefficient arrays
	3608	double precision_a, precision_b, precision_c;
	3609	double rel_param_err = 0.15/3;
	3610	precision_a = rel_param_err * realPrecision / late_blockcount_x;
	3611	precision_b = rel_param_err * realPrecision / late_blockcount_y;
	3612	precision_c = rel_param_err * realPrecision;
	3613
	3614	double mean = 0;
	3615	use_mean = 0;
	3616	if(use_mean){
	3617	// compute mean
	3618	double sum = 0.0;
	3619	size_t mean_count = 0;
	3620	for(size_t i=0; i<num_elements; i++){
	3621	if(fabs(oriData[i] - dense_pos) < realPrecision){
	3622	sum += oriData[i];
	3623	mean_count ++;
	3624	}
	3625	}
	3626	if(mean_count > 0) mean = sum / mean_count;
	3627	}
	3628
	3629
	3630	double tmp_realPrecision = realPrecision;
	3631
	3632	// use two prediction buffers for higher performance
	3633	double * unpredictable_data = result_unpredictable_data;
	3634	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
	3635	memset(indicator, 0, num_blocks * sizeof(unsigned char));
	3636	size_t reg_count = 0;
	3637	size_t strip_dim_0 = early_blockcount_x + 1;
	3638	size_t strip_dim_1 = r2 + 1;
	3639	size_t strip_dim0_offset = strip_dim_1;
	3640	unsigned char * indicator_pos = indicator;
	3641	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
	3642	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
	3643	memset(prediction_buffer_1, 0, prediction_buffer_size);
	3644	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
	3645	memset(prediction_buffer_2, 0, prediction_buffer_size);
	3646	double * cur_pb_buf = prediction_buffer_1;
	3647	double * next_pb_buf = prediction_buffer_2;
	3648	double * cur_pb_buf_pos;
	3649	double * next_pb_buf_pos;
	3650	int intvCapacity = exe_params->intvCapacity;
	3651	int intvRadius = exe_params->intvRadius;
	3652	int use_reg = 0;
	3653
	3654	reg_params_pos = reg_params;
	3655	// compress the regression coefficients on the fly
	3656	double last_coeffcients[3] = {0.0};
	3657	int coeff_intvCapacity_sz = 65536;
	3658	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
	3659	int * coeff_type[3];
	3660	int * coeff_result_type = (int ) malloc(num_blocks3*sizeof(int));
	3661	double * coeff_unpred_data[3];
	3662	double * coeff_unpredictable_data = (double ) malloc(num_blocks3*sizeof(double));
	3663	double precision[3];
	3664	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
	3665	for(int i=0; i<3; i++){
	3666	coeff_type[i] = coeff_result_type + i * num_blocks;
	3667	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
	3668	}
	3669	int coeff_index = 0;
	3670	unsigned int coeff_unpredictable_count[3] = {0};
	3671	if(use_mean){
	3672	type = result_type;
	3673	int intvCapacity_sz = intvCapacity - 2;
	3674	for(size_t i=0; i<num_x; i++){
	3675	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	3676	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	3677	data_pos = oriData + offset_x * dim0_offset;
	3678
	3679	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
	3680	next_pb_buf_pos = next_pb_buf + 1;
	3681	double * pb_pos = cur_pb_buf_pos;
	3682	double * next_pb_pos = next_pb_buf_pos;
	3683
	3684	for(size_t j=0; j<num_y; j++){
	3685	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	3686	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	3687
	3688	/sampling: decide which predictor to use (regression or lorenzo)/
	3689	{
	3690	double * cur_data_pos;
	3691	double curData;
	3692	double pred_reg, pred_sz;
	3693	double err_sz = 0.0, err_reg = 0.0;
	3694	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
	3695	// [1, 9] [3, 7] [7, 3] [9, 1]
	3696	int count = 0;
	3697	for(int i=1; i<current_blockcount_x; i+=2){
	3698	cur_data_pos = data_pos + i * dim0_offset + i;
	3699	curData = *cur_data_pos;
	3700	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	3701	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
	3702
	3703	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
	3704
	3705	err_reg += fabs(pred_reg - curData);
	3706
	3707	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
	3708	curData = *cur_data_pos;
	3709	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	3710	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
	3711	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
	3712
	3713	err_reg += fabs(pred_reg - curData);
	3714
	3715	count += 2;
	3716	}
	3717
	3718	use_reg = (err_reg < err_sz);
	3719	}
	3720	if(use_reg)
	3721	{
	3722	{
	3723	/predict coefficients in current block via previous reg_block/
	3724	double cur_coeff;
	3725	double diff, itvNum;
	3726	for(int e=0; e<3; e++){
	3727	cur_coeff = reg_params_pos[e*num_blocks];
	3728	diff = cur_coeff - last_coeffcients[e];
	3729	itvNum = fabs(diff)/precision[e] + 1;
	3730	if (itvNum < coeff_intvCapacity_sz){
	3731	if (diff < 0) itvNum = -itvNum;
	3732	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	3733	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	3734	//ganrantee comporession error against the case of machine-epsilon
	3735	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	3736	coeff_type[e][coeff_index] = 0;
	3737	last_coeffcients[e] = cur_coeff;
	3738	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	3739	}
	3740	}
	3741	else{
	3742	coeff_type[e][coeff_index] = 0;
	3743	last_coeffcients[e] = cur_coeff;
	3744	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	3745	}
	3746	}
	3747	coeff_index ++;
	3748	}
	3749	double curData;
	3750	double pred;
	3751	double itvNum;
	3752	double diff;
	3753	size_t index = 0;
	3754	size_t block_unpredictable_count = 0;
	3755	double * cur_data_pos = data_pos;
	3756	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	3757	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	3758	curData = *cur_data_pos;
	3759	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3760	diff = curData - pred;
	3761	itvNum = fabs(diff)/realPrecision + 1;
	3762	if (itvNum < intvCapacity){
	3763	if (diff < 0) itvNum = -itvNum;
	3764	type[index] = (int) (itvNum/2) + intvRadius;
	3765	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3766	//ganrantee comporession error against the case of machine-epsilon
	3767	if(fabs(curData - pred)>realPrecision){
	3768	type[index] = 0;
	3769	pred = curData;
	3770	unpredictable_data[block_unpredictable_count ++] = curData;
	3771	}
	3772	}
	3773	else{
	3774	type[index] = 0;
	3775	pred = curData;
	3776	unpredictable_data[block_unpredictable_count ++] = curData;
	3777	}
	3778	index ++;
	3779	cur_data_pos ++;
	3780	}
	3781	/dealing with the last jj (boundary)/
	3782	{
	3783	size_t jj = current_blockcount_y - 1;
	3784	curData = *cur_data_pos;
	3785	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3786	diff = curData - pred;
	3787	itvNum = fabs(diff)/realPrecision + 1;
	3788	if (itvNum < intvCapacity){
	3789	if (diff < 0) itvNum = -itvNum;
	3790	type[index] = (int) (itvNum/2) + intvRadius;
	3791	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3792	//ganrantee comporession error against the case of machine-epsilon
	3793	if(fabs(curData - pred)>realPrecision){
	3794	type[index] = 0;
	3795	pred = curData;
	3796	unpredictable_data[block_unpredictable_count ++] = curData;
	3797	}
	3798	}
	3799	else{
	3800	type[index] = 0;
	3801	pred = curData;
	3802	unpredictable_data[block_unpredictable_count ++] = curData;
	3803	}
	3804
	3805	// assign value to block surfaces
	3806	pb_pos[ii * strip_dim0_offset + jj] = pred;
	3807	index ++;
	3808	cur_data_pos ++;
	3809	}
	3810	cur_data_pos += dim0_offset - current_blockcount_y;
	3811	}
	3812	/dealing with the last ii (boundary)/
	3813	{
	3814	size_t ii = current_blockcount_x - 1;
	3815	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	3816	curData = *cur_data_pos;
	3817	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3818	diff = curData - pred;
	3819	itvNum = fabs(diff)/realPrecision + 1;
	3820	if (itvNum < intvCapacity){
	3821	if (diff < 0) itvNum = -itvNum;
	3822	type[index] = (int) (itvNum/2) + intvRadius;
	3823	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3824	//ganrantee comporession error against the case of machine-epsilon
	3825	if(fabs(curData - pred)>realPrecision){
	3826	type[index] = 0;
	3827	pred = curData;
	3828	unpredictable_data[block_unpredictable_count ++] = curData;
	3829	}
	3830	}
	3831	else{
	3832	type[index] = 0;
	3833	pred = curData;
	3834	unpredictable_data[block_unpredictable_count ++] = curData;
	3835	}
	3836	// assign value to next prediction buffer
	3837	next_pb_pos[jj] = pred;
	3838	index ++;
	3839	cur_data_pos ++;
	3840	}
	3841	/dealing with the last jj (boundary)/
	3842	{
	3843	size_t jj = current_blockcount_y - 1;
	3844	curData = *cur_data_pos;
	3845	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3846	diff = curData - pred;
	3847	itvNum = fabs(diff)/realPrecision + 1;
	3848	if (itvNum < intvCapacity){
	3849	if (diff < 0) itvNum = -itvNum;
	3850	type[index] = (int) (itvNum/2) + intvRadius;
	3851	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3852	//ganrantee comporession error against the case of machine-epsilon
	3853	if(fabs(curData - pred)>realPrecision){
	3854	type[index] = 0;
	3855	pred = curData;
	3856	unpredictable_data[block_unpredictable_count ++] = curData;
	3857	}
	3858	}
	3859	else{
	3860	type[index] = 0;
	3861	pred = curData;
	3862	unpredictable_data[block_unpredictable_count ++] = curData;
	3863	}
	3864
	3865	// assign value to block surfaces
	3866	pb_pos[ii * strip_dim0_offset + jj] = pred;
	3867	// assign value to next prediction buffer
	3868	next_pb_pos[jj] = pred;
	3869
	3870	index ++;
	3871	cur_data_pos ++;
	3872	}
	3873	} // end ii == -1
	3874	unpredictable_count = block_unpredictable_count;
	3875	total_unpred += unpredictable_count;
	3876	unpredictable_data += unpredictable_count;
	3877	reg_count ++;
	3878	}// end use_reg
	3879	else{
	3880	// use SZ
	3881	// SZ predication
	3882	unpredictable_count = 0;
	3883	double * cur_pb_pos = pb_pos;
	3884	double * cur_data_pos = data_pos;
	3885	double curData;
	3886	double pred2D;
	3887	double itvNum, diff;
	3888	size_t index = 0;
	3889	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	3890	for(size_t jj=0; jj<current_blockcount_y; jj++){
	3891	curData = *cur_data_pos;
	3892	if(fabs(curData - mean) <= realPrecision){
	3893	// adjust type[index] to intvRadius for coherence with freq in reg
	3894	type[index] = intvRadius;
	3895	*cur_pb_pos = mean;
	3896	}
	3897	else
	3898	{
	3899	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	3900	diff = curData - pred2D;
	3901	itvNum = fabs(diff)/realPrecision + 1;
	3902	if (itvNum < intvCapacity_sz){
	3903	if (diff < 0) itvNum = -itvNum;
	3904	type[index] = (int) (itvNum/2) + intvRadius;
	3905	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	3906	if(type[index] <= intvRadius) type[index] -= 1;
	3907	//ganrantee comporession error against the case of machine-epsilon
	3908	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	3909	type[index] = 0;
	3910	*cur_pb_pos = curData;
	3911	unpredictable_data[unpredictable_count ++] = curData;
	3912	}
	3913	}
	3914	else{
	3915	type[index] = 0;
	3916	*cur_pb_pos = curData;
	3917	unpredictable_data[unpredictable_count ++] = curData;
	3918	}
	3919	}
	3920	index ++;
	3921	cur_pb_pos ++;
	3922	cur_data_pos ++;
	3923	}
	3924	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
	3925	cur_data_pos += dim0_offset - current_blockcount_y;
	3926	}
	3927	/dealing with the last ii (boundary)/
	3928	{
	3929	// ii == current_blockcount_x - 1
	3930	for(size_t jj=0; jj<current_blockcount_y; jj++){
	3931	curData = *cur_data_pos;
	3932	if(fabs(curData - mean) <= realPrecision){
	3933	// adjust type[index] to intvRadius for coherence with freq in reg
	3934	type[index] = intvRadius;
	3935	*cur_pb_pos = mean;
	3936	}
	3937	else
	3938	{
	3939	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	3940	diff = curData - pred2D;
	3941	itvNum = fabs(diff)/realPrecision + 1;
	3942	if (itvNum < intvCapacity_sz){
	3943	if (diff < 0) itvNum = -itvNum;
	3944	type[index] = (int) (itvNum/2) + intvRadius;
	3945	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	3946	if(type[index] <= intvRadius) type[index] -= 1;
	3947	//ganrantee comporession error against the case of machine-epsilon
	3948	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	3949	type[index] = 0;
	3950	*cur_pb_pos = curData;
	3951	unpredictable_data[unpredictable_count ++] = curData;
	3952	}
	3953	}
	3954	else{
	3955	type[index] = 0;
	3956	*cur_pb_pos = curData;
	3957	unpredictable_data[unpredictable_count ++] = curData;
	3958	}
	3959	}
	3960	next_pb_pos[jj] = *cur_pb_pos;
	3961	index ++;
	3962	cur_pb_pos ++;
	3963	cur_data_pos ++;
	3964	}
	3965	}
	3966	total_unpred += unpredictable_count;
	3967	unpredictable_data += unpredictable_count;
	3968	// change indicator
	3969	indicator_pos[j] = 1;
	3970	}// end SZ
	3971	reg_params_pos ++;
	3972	data_pos += current_blockcount_y;
	3973	pb_pos += current_blockcount_y;
	3974	next_pb_pos += current_blockcount_y;
	3975	type += current_blockcount_x * current_blockcount_y;
	3976	}// end j
	3977	indicator_pos += num_y;
	3978	double * tmp;
	3979	tmp = cur_pb_buf;
	3980	cur_pb_buf = next_pb_buf;
	3981	next_pb_buf = tmp;
	3982	}// end i
	3983	}// end use mean
	3984	else{
	3985	type = result_type;
	3986	int intvCapacity_sz = intvCapacity - 2;
	3987	for(size_t i=0; i<num_x; i++){
	3988	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	3989	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	3990	data_pos = oriData + offset_x * dim0_offset;
	3991
	3992	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
	3993	next_pb_buf_pos = next_pb_buf + 1;
	3994	double * pb_pos = cur_pb_buf_pos;
	3995	double * next_pb_pos = next_pb_buf_pos;
	3996
	3997	for(size_t j=0; j<num_y; j++){
	3998	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	3999	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4000	/sampling/
	4001	{
	4002	// sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
	4003	double * cur_data_pos;
	4004	double curData;
	4005	double pred_reg, pred_sz;
	4006	double err_sz = 0.0, err_reg = 0.0;
	4007	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
	4008	// [1, 9] [3, 7] [7, 3] [9, 1]
	4009	int count = 0;
	4010	for(int i=1; i<current_blockcount_x; i+=2){
	4011	cur_data_pos = data_pos + i * dim0_offset + i;
	4012	curData = *cur_data_pos;
	4013	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4014	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
	4015	err_sz += fabs(pred_sz - curData);
	4016	err_reg += fabs(pred_reg - curData);
	4017
	4018	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
	4019	curData = *cur_data_pos;
	4020	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4021	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
	4022	err_sz += fabs(pred_sz - curData);
	4023	err_reg += fabs(pred_reg - curData);
	4024
	4025	count += 2;
	4026	}
	4027	err_sz += realPrecision * count * 0.81;
	4028	use_reg = (err_reg < err_sz);
	4029
	4030	}
	4031	if(use_reg)
	4032	{
	4033	{
	4034	/predict coefficients in current block via previous reg_block/
	4035	double cur_coeff;
	4036	double diff, itvNum;
	4037	for(int e=0; e<3; e++){
	4038	cur_coeff = reg_params_pos[e*num_blocks];
	4039	diff = cur_coeff - last_coeffcients[e];
	4040	itvNum = fabs(diff)/precision[e] + 1;
	4041	if (itvNum < coeff_intvCapacity_sz){
	4042	if (diff < 0) itvNum = -itvNum;
	4043	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	4044	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	4045	//ganrantee comporession error against the case of machine-epsilon
	4046	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	4047	coeff_type[e][coeff_index] = 0;
	4048	last_coeffcients[e] = cur_coeff;
	4049	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4050	}
	4051	}
	4052	else{
	4053	coeff_type[e][coeff_index] = 0;
	4054	last_coeffcients[e] = cur_coeff;
	4055	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4056	}
	4057	}
	4058	coeff_index ++;
	4059	}
	4060	double curData;
	4061	double pred;
	4062	double itvNum;
	4063	double diff;
	4064	size_t index = 0;
	4065	size_t block_unpredictable_count = 0;
	4066	double * cur_data_pos = data_pos;
	4067	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4068	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4069	curData = *cur_data_pos;
	4070	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4071	diff = curData - pred;
	4072	itvNum = fabs(diff)/realPrecision + 1;
	4073	if (itvNum < intvCapacity){
	4074	if (diff < 0) itvNum = -itvNum;
	4075	type[index] = (int) (itvNum/2) + intvRadius;
	4076	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4077	//ganrantee comporession error against the case of machine-epsilon
	4078	if(fabs(curData - pred)>realPrecision){
	4079	type[index] = 0;
	4080	pred = curData;
	4081	unpredictable_data[block_unpredictable_count ++] = curData;
	4082	}
	4083	}
	4084	else{
	4085	type[index] = 0;
	4086	pred = curData;
	4087	unpredictable_data[block_unpredictable_count ++] = curData;
	4088	}
	4089	index ++;
	4090	cur_data_pos ++;
	4091	}
	4092	/dealing with the last jj (boundary)/
	4093	{
	4094	// jj == current_blockcount_y - 1
	4095	size_t jj = current_blockcount_y - 1;
	4096	curData = *cur_data_pos;
	4097	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4098	diff = curData - pred;
	4099	itvNum = fabs(diff)/realPrecision + 1;
	4100	if (itvNum < intvCapacity){
	4101	if (diff < 0) itvNum = -itvNum;
	4102	type[index] = (int) (itvNum/2) + intvRadius;
	4103	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4104	//ganrantee comporession error against the case of machine-epsilon
	4105	if(fabs(curData - pred)>realPrecision){
	4106	type[index] = 0;
	4107	pred = curData;
	4108	unpredictable_data[block_unpredictable_count ++] = curData;
	4109	}
	4110	}
	4111	else{
	4112	type[index] = 0;
	4113	pred = curData;
	4114	unpredictable_data[block_unpredictable_count ++] = curData;
	4115	}
	4116
	4117	// assign value to block surfaces
	4118	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4119	index ++;
	4120	cur_data_pos ++;
	4121	}
	4122	cur_data_pos += dim0_offset - current_blockcount_y;
	4123	}
	4124	/dealing with the last ii (boundary)/
	4125	{
	4126	size_t ii = current_blockcount_x - 1;
	4127	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4128	curData = *cur_data_pos;
	4129	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4130	diff = curData - pred;
	4131	itvNum = fabs(diff)/realPrecision + 1;
	4132	if (itvNum < intvCapacity){
	4133	if (diff < 0) itvNum = -itvNum;
	4134	type[index] = (int) (itvNum/2) + intvRadius;
	4135	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4136	//ganrantee comporession error against the case of machine-epsilon
	4137	if(fabs(curData - pred)>realPrecision){
	4138	type[index] = 0;
	4139	pred = curData;
	4140	unpredictable_data[block_unpredictable_count ++] = curData;
	4141	}
	4142	}
	4143	else{
	4144	type[index] = 0;
	4145	pred = curData;
	4146	unpredictable_data[block_unpredictable_count ++] = curData;
	4147	}
	4148	// assign value to next prediction buffer
	4149	next_pb_pos[jj] = pred;
	4150	index ++;
	4151	cur_data_pos ++;
	4152	}
	4153	/dealing with the last jj (boundary)/
	4154	{
	4155	// jj == current_blockcount_y - 1
	4156	size_t jj = current_blockcount_y - 1;
	4157	curData = *cur_data_pos;
	4158	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4159	diff = curData - pred;
	4160	itvNum = fabs(diff)/realPrecision + 1;
	4161	if (itvNum < intvCapacity){
	4162	if (diff < 0) itvNum = -itvNum;
	4163	type[index] = (int) (itvNum/2) + intvRadius;
	4164	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4165	//ganrantee comporession error against the case of machine-epsilon
	4166	if(fabs(curData - pred)>realPrecision){
	4167	type[index] = 0;
	4168	pred = curData;
	4169	unpredictable_data[block_unpredictable_count ++] = curData;
	4170	}
	4171	}
	4172	else{
	4173	type[index] = 0;
	4174	pred = curData;
	4175	unpredictable_data[block_unpredictable_count ++] = curData;
	4176	}
	4177
	4178	// assign value to block surfaces
	4179	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4180	// assign value to next prediction buffer
	4181	next_pb_pos[jj] = pred;
	4182
	4183	index ++;
	4184	cur_data_pos ++;
	4185	}
	4186	} // end ii == -1
	4187	unpredictable_count = block_unpredictable_count;
	4188	total_unpred += unpredictable_count;
	4189	unpredictable_data += unpredictable_count;
	4190	reg_count ++;
	4191	}// end use_reg
	4192	else{
	4193	// use SZ
	4194	// SZ predication
	4195	unpredictable_count = 0;
	4196	double * cur_pb_pos = pb_pos;
	4197	double * cur_data_pos = data_pos;
	4198	double curData;
	4199	double pred2D;
	4200	double itvNum, diff;
	4201	size_t index = 0;
	4202	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4203	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4204	curData = *cur_data_pos;
	4205
	4206	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4207	diff = curData - pred2D;
	4208	itvNum = fabs(diff)/realPrecision + 1;
	4209	if (itvNum < intvCapacity_sz){
	4210	if (diff < 0) itvNum = -itvNum;
	4211	type[index] = (int) (itvNum/2) + intvRadius;
	4212	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4213	//ganrantee comporession error against the case of machine-epsilon
	4214	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4215	type[index] = 0;
	4216	*cur_pb_pos = curData;
	4217	unpredictable_data[unpredictable_count ++] = curData;
	4218	}
	4219	}
	4220	else{
	4221	type[index] = 0;
	4222	*cur_pb_pos = curData;
	4223	unpredictable_data[unpredictable_count ++] = curData;
	4224	}
	4225
	4226	index ++;
	4227	cur_pb_pos ++;
	4228	cur_data_pos ++;
	4229	}
	4230	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
	4231	cur_data_pos += dim0_offset - current_blockcount_y;
	4232	}
	4233	/dealing with the last ii (boundary)/
	4234	{
	4235	// ii == current_blockcount_x - 1
	4236	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4237	curData = *cur_data_pos;
	4238
	4239	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4240	diff = curData - pred2D;
	4241	itvNum = fabs(diff)/realPrecision + 1;
	4242	if (itvNum < intvCapacity_sz){
	4243	if (diff < 0) itvNum = -itvNum;
	4244	type[index] = (int) (itvNum/2) + intvRadius;
	4245	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4246	//ganrantee comporession error against the case of machine-epsilon
	4247	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4248	type[index] = 0;
	4249	*cur_pb_pos = curData;
	4250	unpredictable_data[unpredictable_count ++] = curData;
	4251	}
	4252	}
	4253	else{
	4254	type[index] = 0;
	4255	*cur_pb_pos = curData;
	4256	unpredictable_data[unpredictable_count ++] = curData;
	4257	}
	4258	next_pb_pos[jj] = *cur_pb_pos;
	4259	index ++;
	4260	cur_pb_pos ++;
	4261	cur_data_pos ++;
	4262	}
	4263	}
	4264	total_unpred += unpredictable_count;
	4265	unpredictable_data += unpredictable_count;
	4266	// change indicator
	4267	indicator_pos[j] = 1;
	4268	}// end SZ
	4269	reg_params_pos ++;
	4270	data_pos += current_blockcount_y;
	4271	pb_pos += current_blockcount_y;
	4272	next_pb_pos += current_blockcount_y;
	4273	type += current_blockcount_x * current_blockcount_y;
	4274	}// end j
	4275	indicator_pos += num_y;
	4276	double * tmp;
	4277	tmp = cur_pb_buf;
	4278	cur_pb_buf = next_pb_buf;
	4279	next_pb_buf = tmp;
	4280	}// end i
	4281	}
	4282	free(prediction_buffer_1);
	4283	free(prediction_buffer_2);
	4284
	4285	int stateNum = 2*quantization_intervals;
	4286	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	4287
	4288	size_t nodeCount = 0;
	4289	size_t i = 0;
	4290	init(huffmanTree, result_type, num_elements);
	4291	for (i = 0; i < stateNum; i++)
	4292	if (huffmanTree->code[i]) nodeCount++;
	4293	nodeCount = nodeCount*2-1;
	4294
	4295	unsigned char *treeBytes;
	4296	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	4297
	4298	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
	4299	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
	4300	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
	4301	unsigned char * result_pos = result;
	4302	initRandomAccessBytes(result_pos);
	4303	result_pos += meta_data_offset;
	4304
	4305	sizeToBytes(result_pos, num_elements);
	4306	result_pos += exe_params->SZ_SIZE_TYPE;
	4307
	4308	intToBytes_bigEndian(result_pos, block_size);
	4309	result_pos += sizeof(int);
	4310	doubleToBytes(result_pos, realPrecision);
	4311	result_pos += sizeof(double);
	4312	intToBytes_bigEndian(result_pos, quantization_intervals);
	4313	result_pos += sizeof(int);
	4314	intToBytes_bigEndian(result_pos, treeByteSize);
	4315	result_pos += sizeof(int);
	4316	intToBytes_bigEndian(result_pos, nodeCount);
	4317	result_pos += sizeof(int);
	4318	memcpy(result_pos, treeBytes, treeByteSize);
	4319	result_pos += treeByteSize;
	4320	free(treeBytes);
	4321
	4322	memcpy(result_pos, &use_mean, sizeof(unsigned char));
	4323	result_pos += sizeof(unsigned char);
	4324	memcpy(result_pos, &mean, sizeof(double));
	4325	result_pos += sizeof(double);
	4326
	4327	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
	4328	result_pos += indicator_size;
	4329
	4330	//convert the lead/mid/resi to byte stream
	4331	if(reg_count>0){
	4332	for(int e=0; e<3; e++){
	4333	int stateNum = 2*coeff_intvCapacity_sz;
	4334	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	4335	size_t nodeCount = 0;
	4336	init(huffmanTree, coeff_type[e], reg_count);
	4337	size_t i = 0;
	4338	for (i = 0; i < huffmanTree->stateNum; i++)
	4339	if (huffmanTree->code[i]) nodeCount++;
	4340	nodeCount = nodeCount*2-1;
	4341	unsigned char *treeBytes;
	4342	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	4343	doubleToBytes(result_pos, precision[e]);
	4344	result_pos += sizeof(double);
	4345	intToBytes_bigEndian(result_pos, coeff_intvRadius);
	4346	result_pos += sizeof(int);
	4347	intToBytes_bigEndian(result_pos, treeByteSize);
	4348	result_pos += sizeof(int);
	4349	intToBytes_bigEndian(result_pos, nodeCount);
	4350	result_pos += sizeof(int);
	4351	memcpy(result_pos, treeBytes, treeByteSize);
	4352	result_pos += treeByteSize;
	4353	free(treeBytes);
	4354	size_t typeArray_size = 0;
	4355	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
	4356	sizeToBytes(result_pos, typeArray_size);
	4357	result_pos += sizeof(size_t) + typeArray_size;
	4358	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
	4359	result_pos += sizeof(int);
	4360	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
	4361	result_pos += coeff_unpredictable_count[e]*sizeof(double);
	4362	SZ_ReleaseHuffman(huffmanTree);
	4363	}
	4364	}
	4365	free(coeff_result_type);
	4366	free(coeff_unpredictable_data);
	4367
	4368	//record the number of unpredictable data and also store them
	4369	memcpy(result_pos, &total_unpred, sizeof(size_t));
	4370	result_pos += sizeof(size_t);
	4371	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
	4372	result_pos += total_unpred * sizeof(double);
	4373	size_t typeArray_size = 0;
	4374	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
	4375	result_pos += typeArray_size;
	4376
	4377	size_t totalEncodeSize = result_pos - result;
	4378	free(indicator);
	4379	free(result_unpredictable_data);
	4380	free(result_type);
	4381	free(reg_params);
	4382
	4383	SZ_ReleaseHuffman(huffmanTree);
	4384	*comp_size = totalEncodeSize;
	4385
	4386	return result;
	4387	}
	4388
	4389	unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
	4390
	4391	unsigned int quantization_intervals;
	4392	double sz_sample_correct_freq = -1;//0.5; //-1
	4393	double dense_pos;
	4394	double mean_flush_freq;
	4395	unsigned char use_mean = 0;
	4396
	4397	// calculate block dims
	4398	size_t num_x, num_y, num_z;
	4399	size_t block_size = 6;
	4400	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
	4401	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
	4402	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
	4403
	4404	size_t split_index_x, split_index_y, split_index_z;
	4405	size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
	4406	size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
	4407	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
	4408	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
	4409	SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
	4410
	4411	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
	4412	size_t num_blocks = num_x * num_y * num_z;
	4413	size_t num_elements = r1 * r2 * r3;
	4414
	4415	size_t dim0_offset = r2 * r3;
	4416	size_t dim1_offset = r3;
	4417
	4418	int * result_type = (int ) malloc(num_elements sizeof(int));
	4419	size_t unpred_data_max_size = max_num_block_elements;
	4420	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
	4421	size_t total_unpred = 0;
	4422	size_t unpredictable_count;
	4423	size_t max_unpred_count = 0;
	4424	double * data_pos = oriData;
	4425	int * type = result_type;
	4426	size_t type_offset;
	4427	size_t offset_x, offset_y, offset_z;
	4428	size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
	4429
	4430	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
	4431	double * reg_params_pos = reg_params;
	4432	// move regression part out
	4433	size_t params_offset_b = num_blocks;
	4434	size_t params_offset_c = 2*num_blocks;
	4435	size_t params_offset_d = 3*num_blocks;
	4436	for(size_t i=0; i<num_x; i++){
	4437	for(size_t j=0; j<num_y; j++){
	4438	for(size_t k=0; k<num_z; k++){
	4439	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4440	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4441	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	4442	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4443	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4444	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
	4445
	4446	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
	4447	/Calculate regression coefficients/
	4448	{
	4449	double * cur_data_pos = data_pos;
	4450	double fx = 0.0;
	4451	double fy = 0.0;
	4452	double fz = 0.0;
	4453	double f = 0;
	4454	double sum_x, sum_y;
	4455	double curData;
	4456	for(size_t i=0; i<current_blockcount_x; i++){
	4457	sum_x = 0;
	4458	for(size_t j=0; j<current_blockcount_y; j++){
	4459	sum_y = 0;
	4460	for(size_t k=0; k<current_blockcount_z; k++){
	4461	curData = *cur_data_pos;
	4462	// f += curData;
	4463	// fx += curData * i;
	4464	// fy += curData * j;
	4465	// fz += curData * k;
	4466	sum_y += curData;
	4467	fz += curData * k;
	4468	cur_data_pos ++;
	4469	}
	4470	fy += sum_y * j;
	4471	sum_x += sum_y;
	4472	cur_data_pos += dim1_offset - current_blockcount_z;
	4473	}
	4474	fx += sum_x * i;
	4475	f += sum_x;
	4476	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	4477	}
	4478	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
	4479	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
	4480	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
	4481	reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
	4482	reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
	4483	}
	4484	reg_params_pos ++;
	4485	}
	4486	}
	4487	}
	4488
	4489	//Compress coefficient arrays
	4490	double precision_a, precision_b, precision_c, precision_d;
	4491	double rel_param_err = 0.025;
	4492	precision_a = rel_param_err * realPrecision / late_blockcount_x;
	4493	precision_b = rel_param_err * realPrecision / late_blockcount_y;
	4494	precision_c = rel_param_err * realPrecision / late_blockcount_z;
	4495	precision_d = rel_param_err * realPrecision;
	4496
	4497	if(exe_params->optQuantMode==1)
	4498	{
	4499	quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
	4500	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
	4501	updateQuantizationInfo(quantization_intervals);
	4502	}
	4503	else{
	4504	quantization_intervals = exe_params->intvCapacity;
	4505	}
	4506
	4507	double mean = 0;
	4508	if(use_mean){
	4509	// compute mean
	4510	double sum = 0.0;
	4511	size_t mean_count = 0;
	4512	for(size_t i=0; i<num_elements; i++){
	4513	if(fabs(oriData[i] - dense_pos) < realPrecision){
	4514	sum += oriData[i];
	4515	mean_count ++;
	4516	}
	4517	}
	4518	if(mean_count > 0) mean = sum / mean_count;
	4519	}
	4520
	4521	double tmp_realPrecision = realPrecision;
	4522
	4523	// use two prediction buffers for higher performance
	4524	double * unpredictable_data = result_unpredictable_data;
	4525	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
	4526	memset(indicator, 0, num_blocks * sizeof(unsigned char));
	4527	size_t reg_count = 0;
	4528	size_t strip_dim_0 = early_blockcount_x + 1;
	4529	size_t strip_dim_1 = r2 + 1;
	4530	size_t strip_dim_2 = r3 + 1;
	4531	size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
	4532	size_t strip_dim1_offset = strip_dim_2;
	4533	unsigned char * indicator_pos = indicator;
	4534
	4535	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
	4536	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
	4537	memset(prediction_buffer_1, 0, prediction_buffer_size);
	4538	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
	4539	memset(prediction_buffer_2, 0, prediction_buffer_size);
	4540	double * cur_pb_buf = prediction_buffer_1;
	4541	double * next_pb_buf = prediction_buffer_2;
	4542	double * cur_pb_buf_pos;
	4543	double * next_pb_buf_pos;
	4544	int intvCapacity = exe_params->intvCapacity;
	4545	int intvRadius = exe_params->intvRadius;
	4546	int use_reg = 0;
	4547	double noise = realPrecision * 1.22;
	4548
	4549	reg_params_pos = reg_params;
	4550	// compress the regression coefficients on the fly
	4551	double last_coeffcients[4] = {0.0};
	4552	int coeff_intvCapacity_sz = 65536;
	4553	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
	4554	int * coeff_type[4];
	4555	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
	4556	double * coeff_unpred_data[4];
	4557	double * coeff_unpredictable_data = (double ) malloc(num_blocks4*sizeof(double));
	4558	double precision[4];
	4559	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
	4560	for(int i=0; i<4; i++){
	4561	coeff_type[i] = coeff_result_type + i * num_blocks;
	4562	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
	4563	}
	4564	int coeff_index = 0;
	4565	unsigned int coeff_unpredictable_count[4] = {0};
	4566
	4567	if(use_mean){
	4568	int intvCapacity_sz = intvCapacity - 2;
	4569	for(size_t i=0; i<num_x; i++){
	4570	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4571	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4572	for(size_t j=0; j<num_y; j++){
	4573	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4574	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4575	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
	4576	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
	4577	type = result_type + type_offset;
	4578
	4579	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
	4580	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
	4581	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
	4582
	4583	size_t current_blockcount_z;
	4584	double * pb_pos = cur_pb_buf_pos;
	4585	double * next_pb_pos = next_pb_buf_pos;
	4586	size_t strip_unpredictable_count = 0;
	4587	for(size_t k=0; k<num_z; k++){
	4588	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	4589
	4590	/sampling and decide which predictor/
	4591	{
	4592	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	4593	double * cur_data_pos;
	4594	double curData;
	4595	double pred_reg, pred_sz;
	4596	double err_sz = 0.0, err_reg = 0.0;
	4597	int bmi = 0;
	4598	if(i>0 && j>0 && k>0){
	4599	for(int i=0; i<block_size; i++){
	4600	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4601	curData = *cur_data_pos;
	4602	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4603	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4604	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4605	err_reg += fabs(pred_reg - curData);
	4606
	4607	bmi = block_size - i;
	4608	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4609	curData = *cur_data_pos;
	4610	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4611	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4612	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4613	err_reg += fabs(pred_reg - curData);
	4614
	4615	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4616	curData = *cur_data_pos;
	4617	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4618	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4619	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4620	err_reg += fabs(pred_reg - curData);
	4621
	4622	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4623	curData = *cur_data_pos;
	4624	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4625	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4626	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4627	err_reg += fabs(pred_reg - curData);
	4628	}
	4629	}
	4630	else{
	4631	for(int i=1; i<block_size; i++){
	4632	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4633	curData = *cur_data_pos;
	4634	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4635	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4636	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4637	err_reg += fabs(pred_reg - curData);
	4638
	4639	bmi = block_size - i;
	4640	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4641	curData = *cur_data_pos;
	4642	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4643	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4644	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4645	err_reg += fabs(pred_reg - curData);
	4646
	4647	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4648	curData = *cur_data_pos;
	4649	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4650	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4651	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4652	err_reg += fabs(pred_reg - curData);
	4653
	4654	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4655	curData = *cur_data_pos;
	4656	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4657	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4658	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4659	err_reg += fabs(pred_reg - curData);
	4660
	4661	}
	4662	}
	4663	use_reg = (err_reg < err_sz);
	4664	}
	4665	if(use_reg){
	4666	{
	4667	/predict coefficients in current block via previous reg_block/
	4668	double cur_coeff;
	4669	double diff, itvNum;
	4670	for(int e=0; e<4; e++){
	4671	cur_coeff = reg_params_pos[e*num_blocks];
	4672	diff = cur_coeff - last_coeffcients[e];
	4673	itvNum = fabs(diff)/precision[e] + 1;
	4674	if (itvNum < coeff_intvCapacity_sz){
	4675	if (diff < 0) itvNum = -itvNum;
	4676	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	4677	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	4678	//ganrantee comporession error against the case of machine-epsilon
	4679	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	4680	coeff_type[e][coeff_index] = 0;
	4681	last_coeffcients[e] = cur_coeff;
	4682	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4683	}
	4684	}
	4685	else{
	4686	coeff_type[e][coeff_index] = 0;
	4687	last_coeffcients[e] = cur_coeff;
	4688	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4689	}
	4690	}
	4691	coeff_index ++;
	4692	}
	4693	double curData;
	4694	double pred;
	4695	double itvNum;
	4696	double diff;
	4697	size_t index = 0;
	4698	size_t block_unpredictable_count = 0;
	4699	double * cur_data_pos = data_pos;
	4700	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4701	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4702	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4703	curData = *cur_data_pos;
	4704	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	4705	diff = curData - pred;
	4706	itvNum = fabs(diff)/tmp_realPrecision + 1;
	4707	if (itvNum < intvCapacity){
	4708	if (diff < 0) itvNum = -itvNum;
	4709	type[index] = (int) (itvNum/2) + intvRadius;
	4710	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	4711	//ganrantee comporession error against the case of machine-epsilon
	4712	if(fabs(curData - pred)>tmp_realPrecision){
	4713	type[index] = 0;
	4714	pred = curData;
	4715	unpredictable_data[block_unpredictable_count ++] = curData;
	4716	}
	4717	}
	4718	else{
	4719	type[index] = 0;
	4720	pred = curData;
	4721	unpredictable_data[block_unpredictable_count ++] = curData;
	4722	}
	4723	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	4724	// assign value to block surfaces
	4725	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	4726	}
	4727	index ++;
	4728	cur_data_pos ++;
	4729	}
	4730	cur_data_pos += dim1_offset - current_blockcount_z;
	4731	}
	4732	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	4733	}
	4734	/dealing with the last ii (boundary)/
	4735	{
	4736	// ii == current_blockcount_x - 1
	4737	size_t ii = current_blockcount_x - 1;
	4738	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4739	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4740	curData = *cur_data_pos;
	4741	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	4742	diff = curData - pred;
	4743	itvNum = fabs(diff)/tmp_realPrecision + 1;
	4744	if (itvNum < intvCapacity){
	4745	if (diff < 0) itvNum = -itvNum;
	4746	type[index] = (int) (itvNum/2) + intvRadius;
	4747	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	4748	//ganrantee comporession error against the case of machine-epsilon
	4749	if(fabs(curData - pred)>tmp_realPrecision){
	4750	type[index] = 0;
	4751	pred = curData;
	4752	unpredictable_data[block_unpredictable_count ++] = curData;
	4753	}
	4754	}
	4755	else{
	4756	type[index] = 0;
	4757	pred = curData;
	4758	unpredictable_data[block_unpredictable_count ++] = curData;
	4759	}
	4760
	4761	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	4762	// assign value to block surfaces
	4763	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	4764	}
	4765	// assign value to next prediction buffer
	4766	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
	4767	index ++;
	4768	cur_data_pos ++;
	4769	}
	4770	cur_data_pos += dim1_offset - current_blockcount_z;
	4771	}
	4772	}
	4773	unpredictable_count = block_unpredictable_count;
	4774	strip_unpredictable_count += unpredictable_count;
	4775	unpredictable_data += unpredictable_count;
	4776
	4777	reg_count ++;
	4778	}
	4779	else{
	4780	// use SZ
	4781	// SZ predication
	4782	unpredictable_count = 0;
	4783	double * cur_pb_pos = pb_pos;
	4784	double * cur_data_pos = data_pos;
	4785	double curData;
	4786	double pred3D;
	4787	double itvNum, diff;
	4788	size_t index = 0;
	4789	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4790	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4791	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4792
	4793	curData = *cur_data_pos;
	4794	if(fabs(curData - mean) <= realPrecision){
	4795	// adjust type[index] to intvRadius for coherence with freq in reg
	4796	type[index] = intvRadius;
	4797	*cur_pb_pos = mean;
	4798	}
	4799	else
	4800	{
	4801	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	4802	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	4803	diff = curData - pred3D;
	4804	itvNum = fabs(diff)/realPrecision + 1;
	4805	if (itvNum < intvCapacity_sz){
	4806	if (diff < 0) itvNum = -itvNum;
	4807	type[index] = (int) (itvNum/2) + intvRadius;
	4808	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4809	if(type[index] <= intvRadius) type[index] -= 1;
	4810	//ganrantee comporession error against the case of machine-epsilon
	4811	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4812	type[index] = 0;
	4813	*cur_pb_pos = curData;
	4814	unpredictable_data[unpredictable_count ++] = curData;
	4815	}
	4816	}
	4817	else{
	4818	type[index] = 0;
	4819	*cur_pb_pos = curData;
	4820	unpredictable_data[unpredictable_count ++] = curData;
	4821	}
	4822	}
	4823	index ++;
	4824	cur_pb_pos ++;
	4825	cur_data_pos ++;
	4826	}
	4827	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	4828	cur_data_pos += dim1_offset - current_blockcount_z;
	4829	}
	4830	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
	4831	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	4832	}
	4833	/dealing with the last ii (boundary)/
	4834	{
	4835	// ii == current_blockcount_x - 1
	4836	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4837	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4838
	4839	curData = *cur_data_pos;
	4840	if(fabs(curData - mean) <= realPrecision){
	4841	// adjust type[index] to intvRadius for coherence with freq in reg
	4842	type[index] = intvRadius;
	4843	*cur_pb_pos = mean;
	4844	}
	4845	else
	4846	{
	4847	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	4848	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	4849	diff = curData - pred3D;
	4850	itvNum = fabs(diff)/realPrecision + 1;
	4851	if (itvNum < intvCapacity_sz){
	4852	if (diff < 0) itvNum = -itvNum;
	4853	type[index] = (int) (itvNum/2) + intvRadius;
	4854	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4855	if(type[index] <= intvRadius) type[index] -= 1;
	4856	//ganrantee comporession error against the case of machine-epsilon
	4857	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4858	type[index] = 0;
	4859	*cur_pb_pos = curData;
	4860	unpredictable_data[unpredictable_count ++] = curData;
	4861	}
	4862	}
	4863	else{
	4864	type[index] = 0;
	4865	*cur_pb_pos = curData;
	4866	unpredictable_data[unpredictable_count ++] = curData;
	4867	}
	4868	}
	4869	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
	4870	index ++;
	4871	cur_pb_pos ++;
	4872	cur_data_pos ++;
	4873	}
	4874	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	4875	cur_data_pos += dim1_offset - current_blockcount_z;
	4876	}
	4877	}
	4878	strip_unpredictable_count += unpredictable_count;
	4879	unpredictable_data += unpredictable_count;
	4880	// change indicator
	4881	indicator_pos[k] = 1;
	4882	}// end SZ
	4883
	4884	reg_params_pos ++;
	4885	data_pos += current_blockcount_z;
	4886	pb_pos += current_blockcount_z;
	4887	next_pb_pos += current_blockcount_z;
	4888	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
	4889
	4890	} // end k
	4891
	4892	if(strip_unpredictable_count > max_unpred_count){
	4893	max_unpred_count = strip_unpredictable_count;
	4894	}
	4895	total_unpred += strip_unpredictable_count;
	4896	indicator_pos += num_z;
	4897	}// end j
	4898	double * tmp;
	4899	tmp = cur_pb_buf;
	4900	cur_pb_buf = next_pb_buf;
	4901	next_pb_buf = tmp;
	4902	}// end i
	4903	}
	4904	else{
	4905	int intvCapacity_sz = intvCapacity - 2;
	4906	for(size_t i=0; i<num_x; i++){
	4907	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4908	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4909
	4910	for(size_t j=0; j<num_y; j++){
	4911	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4912	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4913	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
	4914	// copy bottom plane from plane buffer
	4915	// memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double));
	4916	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
	4917	type = result_type + type_offset;
	4918
	4919	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
	4920	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
	4921	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
	4922
	4923	size_t current_blockcount_z;
	4924	double * pb_pos = cur_pb_buf_pos;
	4925	double * next_pb_pos = next_pb_buf_pos;
	4926	size_t strip_unpredictable_count = 0;
	4927	for(size_t k=0; k<num_z; k++){
	4928	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	4929	/sampling/
	4930	{
	4931	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	4932	double * cur_data_pos;
	4933	double curData;
	4934	double pred_reg, pred_sz;
	4935	double err_sz = 0.0, err_reg = 0.0;
	4936	int bmi;
	4937	if(i>0 && j>0 && k>0){
	4938	for(int i=0; i<block_size; i++){
	4939	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4940	curData = *cur_data_pos;
	4941	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4942	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4943	err_sz += fabs(pred_sz - curData) + noise;
	4944	err_reg += fabs(pred_reg - curData);
	4945
	4946	bmi = block_size - i;
	4947	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4948	curData = *cur_data_pos;
	4949	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4950	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4951	err_sz += fabs(pred_sz - curData) + noise;
	4952	err_reg += fabs(pred_reg - curData);
	4953
	4954	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4955	curData = *cur_data_pos;
	4956	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4957	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4958	err_sz += fabs(pred_sz - curData) + noise;
	4959	err_reg += fabs(pred_reg - curData);
	4960
	4961	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4962	curData = *cur_data_pos;
	4963	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4964	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4965	err_sz += fabs(pred_sz - curData) + noise;
	4966	err_reg += fabs(pred_reg - curData);
	4967	}
	4968	}
	4969	else{
	4970	for(int i=1; i<block_size; i++){
	4971	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4972	curData = *cur_data_pos;
	4973	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4974	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4975	err_sz += fabs(pred_sz - curData) + noise;
	4976	err_reg += fabs(pred_reg - curData);
	4977
	4978	bmi = block_size - i;
	4979	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4980	curData = *cur_data_pos;
	4981	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4982	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4983	err_sz += fabs(pred_sz - curData) + noise;
	4984	err_reg += fabs(pred_reg - curData);
	4985
	4986	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4987	curData = *cur_data_pos;
	4988	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4989	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4990	err_sz += fabs(pred_sz - curData) + noise;
	4991	err_reg += fabs(pred_reg - curData);
	4992
	4993	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4994	curData = *cur_data_pos;
	4995	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4996	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4997	err_sz += fabs(pred_sz - curData) + noise;
	4998	err_reg += fabs(pred_reg - curData);
	4999	}
	5000	}
	5001	use_reg = (err_reg < err_sz);
	5002
	5003	}
	5004	if(use_reg)
	5005	{
	5006	{
	5007	/predict coefficients in current block via previous reg_block/
	5008	double cur_coeff;
	5009	double diff, itvNum;
	5010	for(int e=0; e<4; e++){
	5011	cur_coeff = reg_params_pos[e*num_blocks];
	5012	diff = cur_coeff - last_coeffcients[e];
	5013	itvNum = fabs(diff)/precision[e] + 1;
	5014	if (itvNum < coeff_intvCapacity_sz){
	5015	if (diff < 0) itvNum = -itvNum;
	5016	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	5017	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	5018	//ganrantee comporession error against the case of machine-epsilon
	5019	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	5020	coeff_type[e][coeff_index] = 0;
	5021	last_coeffcients[e] = cur_coeff;
	5022	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5023	}
	5024	}
	5025	else{
	5026	coeff_type[e][coeff_index] = 0;
	5027	last_coeffcients[e] = cur_coeff;
	5028	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5029	}
	5030	}
	5031	coeff_index ++;
	5032	}
	5033	double curData;
	5034	double pred;
	5035	double itvNum;
	5036	double diff;
	5037	size_t index = 0;
	5038	size_t block_unpredictable_count = 0;
	5039	double * cur_data_pos = data_pos;
	5040	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5041	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5042	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5043
	5044	curData = *cur_data_pos;
	5045	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5046	diff = curData - pred;
	5047	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5048	if (itvNum < intvCapacity){
	5049	if (diff < 0) itvNum = -itvNum;
	5050	type[index] = (int) (itvNum/2) + intvRadius;
	5051	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5052	//ganrantee comporession error against the case of machine-epsilon
	5053	if(fabs(curData - pred)>tmp_realPrecision){
	5054	type[index] = 0;
	5055	pred = curData;
	5056	unpredictable_data[block_unpredictable_count ++] = curData;
	5057	}
	5058	}
	5059	else{
	5060	type[index] = 0;
	5061	pred = curData;
	5062	unpredictable_data[block_unpredictable_count ++] = curData;
	5063	}
	5064
	5065	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5066	// assign value to block surfaces
	5067	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5068	}
	5069	index ++;
	5070	cur_data_pos ++;
	5071	}
	5072	cur_data_pos += dim1_offset - current_blockcount_z;
	5073	}
	5074	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5075	}
	5076	/dealing with the last ii (boundary)/
	5077	{
	5078	// ii == current_blockcount_x - 1
	5079	size_t ii = current_blockcount_x - 1;
	5080	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5081	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5082	curData = *cur_data_pos;
	5083	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5084	diff = curData - pred;
	5085	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5086	if (itvNum < intvCapacity){
	5087	if (diff < 0) itvNum = -itvNum;
	5088	type[index] = (int) (itvNum/2) + intvRadius;
	5089	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5090	//ganrantee comporession error against the case of machine-epsilon
	5091	if(fabs(curData - pred)>tmp_realPrecision){
	5092	type[index] = 0;
	5093	pred = curData;
	5094	unpredictable_data[block_unpredictable_count ++] = curData;
	5095	}
	5096	}
	5097	else{
	5098	type[index] = 0;
	5099	pred = curData;
	5100	unpredictable_data[block_unpredictable_count ++] = curData;
	5101	}
	5102
	5103	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5104	// assign value to block surfaces
	5105	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5106	}
	5107	// assign value to next prediction buffer
	5108	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
	5109	index ++;
	5110	cur_data_pos ++;
	5111	}
	5112	cur_data_pos += dim1_offset - current_blockcount_z;
	5113	}
	5114	}
	5115	unpredictable_count = block_unpredictable_count;
	5116	strip_unpredictable_count += unpredictable_count;
	5117	unpredictable_data += unpredictable_count;
	5118	reg_count ++;
	5119	}
	5120	else{
	5121	// use SZ
	5122	// SZ predication
	5123	unpredictable_count = 0;
	5124	double * cur_pb_pos = pb_pos;
	5125	double * cur_data_pos = data_pos;
	5126	double curData;
	5127	double pred3D;
	5128	double itvNum, diff;
	5129	size_t index = 0;
	5130	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5131	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5132	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5133
	5134	curData = *cur_data_pos;
	5135	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	5136	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	5137	diff = curData - pred3D;
	5138	itvNum = fabs(diff)/realPrecision + 1;
	5139	if (itvNum < intvCapacity_sz){
	5140	if (diff < 0) itvNum = -itvNum;
	5141	type[index] = (int) (itvNum/2) + intvRadius;
	5142	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	5143	//ganrantee comporession error against the case of machine-epsilon
	5144	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	5145	type[index] = 0;
	5146	*cur_pb_pos = curData;
	5147	unpredictable_data[unpredictable_count ++] = curData;
	5148	}
	5149	}
	5150	else{
	5151	type[index] = 0;
	5152	*cur_pb_pos = curData;
	5153	unpredictable_data[unpredictable_count ++] = curData;
	5154	}
	5155	index ++;
	5156	cur_pb_pos ++;
	5157	cur_data_pos ++;
	5158	}
	5159	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	5160	cur_data_pos += dim1_offset - current_blockcount_z;
	5161	}
	5162	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
	5163	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5164	}
	5165	/dealing with the last ii (boundary)/
	5166	{
	5167	// ii == current_blockcount_x - 1
	5168	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5169	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5170
	5171	curData = *cur_data_pos;
	5172	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	5173	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	5174	diff = curData - pred3D;
	5175	itvNum = fabs(diff)/realPrecision + 1;
	5176	if (itvNum < intvCapacity_sz){
	5177	if (diff < 0) itvNum = -itvNum;
	5178	type[index] = (int) (itvNum/2) + intvRadius;
	5179	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	5180	//ganrantee comporession error against the case of machine-epsilon
	5181	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	5182	type[index] = 0;
	5183	*cur_pb_pos = curData;
	5184	unpredictable_data[unpredictable_count ++] = curData;
	5185	}
	5186	}
	5187	else{
	5188	type[index] = 0;
	5189	*cur_pb_pos = curData;
	5190	unpredictable_data[unpredictable_count ++] = curData;
	5191	}
	5192	// assign value to next prediction buffer
	5193	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
	5194	index ++;
	5195	cur_pb_pos ++;
	5196	cur_data_pos ++;
	5197	}
	5198	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	5199	cur_data_pos += dim1_offset - current_blockcount_z;
	5200	}
	5201	}
	5202	strip_unpredictable_count += unpredictable_count;
	5203	unpredictable_data += unpredictable_count;
	5204	// change indicator
	5205	indicator_pos[k] = 1;
	5206	}// end SZ
	5207
	5208	reg_params_pos ++;
	5209	data_pos += current_blockcount_z;
	5210	pb_pos += current_blockcount_z;
	5211	next_pb_pos += current_blockcount_z;
	5212	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
	5213
	5214	}
	5215
	5216	if(strip_unpredictable_count > max_unpred_count){
	5217	max_unpred_count = strip_unpredictable_count;
	5218	}
	5219	total_unpred += strip_unpredictable_count;
	5220	indicator_pos += num_z;
	5221	}
	5222	double * tmp;
	5223	tmp = cur_pb_buf;
	5224	cur_pb_buf = next_pb_buf;
	5225	next_pb_buf = tmp;
	5226	}
	5227	}
	5228
	5229	free(prediction_buffer_1);
	5230	free(prediction_buffer_2);
	5231
	5232	int stateNum = 2*quantization_intervals;
	5233	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	5234
	5235	size_t nodeCount = 0;
	5236	init(huffmanTree, result_type, num_elements);
	5237	size_t i = 0;
	5238	for (i = 0; i < huffmanTree->stateNum; i++)
	5239	if (huffmanTree->code[i]) nodeCount++;
	5240	nodeCount = nodeCount*2-1;
	5241
	5242	unsigned char *treeBytes;
	5243	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	5244
	5245	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
	5246	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
	5247	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
	5248	unsigned char * result_pos = result;
	5249	initRandomAccessBytes(result_pos);
	5250
	5251	result_pos += meta_data_offset;
	5252
	5253	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
	5254	result_pos += exe_params->SZ_SIZE_TYPE;
	5255
	5256	intToBytes_bigEndian(result_pos, block_size);
	5257	result_pos += sizeof(int);
	5258	doubleToBytes(result_pos, realPrecision);
	5259	result_pos += sizeof(double);
	5260	intToBytes_bigEndian(result_pos, quantization_intervals);
	5261	result_pos += sizeof(int);
	5262	intToBytes_bigEndian(result_pos, treeByteSize);
	5263	result_pos += sizeof(int);
	5264	intToBytes_bigEndian(result_pos, nodeCount);
	5265	result_pos += sizeof(int);
	5266	memcpy(result_pos, treeBytes, treeByteSize);
	5267	result_pos += treeByteSize;
	5268	free(treeBytes);
	5269
	5270	memcpy(result_pos, &use_mean, sizeof(unsigned char));
	5271	result_pos += sizeof(unsigned char);
	5272	memcpy(result_pos, &mean, sizeof(double));
	5273	result_pos += sizeof(double);
	5274	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
	5275	result_pos += indicator_size;
	5276
	5277	//convert the lead/mid/resi to byte stream
	5278	if(reg_count > 0){
	5279	for(int e=0; e<4; e++){
	5280	int stateNum = 2*coeff_intvCapacity_sz;
	5281	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	5282	size_t nodeCount = 0;
	5283	init(huffmanTree, coeff_type[e], reg_count);
	5284	size_t i = 0;
	5285	for (i = 0; i < huffmanTree->stateNum; i++)
	5286	if (huffmanTree->code[i]) nodeCount++;
	5287	nodeCount = nodeCount*2-1;
	5288	unsigned char *treeBytes;
	5289	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	5290	doubleToBytes(result_pos, precision[e]);
	5291	result_pos += sizeof(double);
	5292	intToBytes_bigEndian(result_pos, coeff_intvRadius);
	5293	result_pos += sizeof(int);
	5294	intToBytes_bigEndian(result_pos, treeByteSize);
	5295	result_pos += sizeof(int);
	5296	intToBytes_bigEndian(result_pos, nodeCount);
	5297	result_pos += sizeof(int);
	5298	memcpy(result_pos, treeBytes, treeByteSize);
	5299	result_pos += treeByteSize;
	5300	free(treeBytes);
	5301	size_t typeArray_size = 0;
	5302	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
	5303	sizeToBytes(result_pos, typeArray_size);
	5304	result_pos += sizeof(size_t) + typeArray_size;
	5305	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
	5306	result_pos += sizeof(int);
	5307	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
	5308	result_pos += coeff_unpredictable_count[e]*sizeof(double);
	5309	SZ_ReleaseHuffman(huffmanTree);
	5310	}
	5311	}
	5312	free(coeff_result_type);
	5313	free(coeff_unpredictable_data);
	5314
	5315	//record the number of unpredictable data and also store them
	5316	memcpy(result_pos, &total_unpred, sizeof(size_t));
	5317	result_pos += sizeof(size_t);
	5318	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
	5319	result_pos += total_unpred * sizeof(double);
	5320	size_t typeArray_size = 0;
	5321	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
	5322	result_pos += typeArray_size;
	5323	size_t totalEncodeSize = result_pos - result;
	5324	free(indicator);
	5325	free(result_unpredictable_data);
	5326	free(result_type);
	5327	free(reg_params);
	5328
	5329
	5330	SZ_ReleaseHuffman(huffmanTree);
	5331	*comp_size = totalEncodeSize;
	5332	return result;
	5333	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: