Context Navigation

source: thirdparty/SZ/sz/src/sz_double.c @ 9ee2ce3

Revision 9ee2ce3, 186.1 KB checked in by Hal Finkel <hfinkel@…>, 6 years ago (diff)
importing new SZ files
Property mode set to `100644`

Rev	Line
[2c47b73]	1	/**
	2	* @file sz_double.c
	3	* @author Sheng Di and Dingwen Tao
	4	* @date Aug, 2016
	5	* @brief SZ_Init, Compression and Decompression functions
	6	* (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
	7	* See COPYRIGHT in top-level directory.
	8	*/
	9
	10
	11	#include <stdio.h>
	12	#include <stdlib.h>
	13	#include <string.h>
	14	#include <unistd.h>
	15	#include <math.h>
	16	#include "sz.h"
	17	#include "CompressElement.h"
	18	#include "DynamicByteArray.h"
	19	#include "DynamicIntArray.h"
	20	#include "TightDataPointStorageD.h"
	21	#include "sz_double.h"
	22	#include "sz_double_pwr.h"
	23	#include "szd_double.h"
	24	#include "szd_double_pwr.h"
	25	#include "zlib.h"
	26	#include "rw.h"
	27	#include "sz_double_ts.h"
[9ee2ce3]	28	#include "utility.h"
[2c47b73]	29
	30	unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize)
	31	{
	32	outSize = dataLengthsizeof(double);
	33	unsigned char* out = (unsigned char)malloc(dataLengthsizeof(double));
	34	memcpy(out, data, dataLength*sizeof(double));
	35	return out;
	36	}
	37
	38	void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue)
	39	{
	40	short reqExpo = getPrecisionReqLength_double(realPrecision);
	41	*reqLength = 12+radExpo - reqExpo; //radExpo-reqExpo == reqMantiLength
	42	if(*reqLength<12)
	43	*reqLength = 12;
	44	if(*reqLength>64)
	45	{
	46	*reqLength = 64;
	47	*medianValue = 0;
	48	}
	49	}
	50
	51	unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision)
	52	{
	53	size_t i = 0, radiusIndex;
	54	double pred_value = 0, pred_err;
	55	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	56	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	57	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
	58	for(i=2;i<dataLength;i++)
	59	{
	60	if(i%confparams_cpr->sampleDistance==0)
	61	{
	62	//pred_value = 2*oriData[i-1] - oriData[i-2];
	63	pred_value = oriData[i-1];
	64	pred_err = fabs(pred_value - oriData[i]);
	65	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	66	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	67	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	68	intervals[radiusIndex]++;
	69	}
	70	}
	71	//compute the appropriate number
	72	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	73	size_t sum = 0;
	74	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	75	{
	76	sum += intervals[i];
	77	if(sum>targetCount)
	78	break;
	79	}
	80
	81	if(i>=confparams_cpr->maxRangeRadius)
	82	i = confparams_cpr->maxRangeRadius-1;
	83	unsigned int accIntervals = 2*(i+1);
	84	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	85
	86	if(powerOf2<32)
	87	powerOf2 = 32;
	88
	89	free(intervals);
	90	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
	91	return powerOf2;
	92	}
	93
	94	unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision)
	95	{
	96	size_t i,j, index;
	97	size_t radiusIndex;
	98	double pred_value = 0, pred_err;
	99	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	100	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	101	size_t totalSampleSize = (r1-1)*(r2-1)/confparams_cpr->sampleDistance;
	102	for(i=1;i<r1;i++)
	103	{
	104	for(j=1;j<r2;j++)
	105	{
	106	if((i+j)%confparams_cpr->sampleDistance==0)
	107	{
	108	index = i*r2+j;
	109	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
	110	pred_err = fabs(pred_value - oriData[index]);
	111	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	112	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	113	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	114	intervals[radiusIndex]++;
	115	}
	116	}
	117	}
	118	//compute the appropriate number
	119	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	120	size_t sum = 0;
	121	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	122	{
	123	sum += intervals[i];
	124	if(sum>targetCount)
	125	break;
	126	}
	127	if(i>=confparams_cpr->maxRangeRadius)
	128	i = confparams_cpr->maxRangeRadius-1;
	129	unsigned int accIntervals = 2*(i+1);
	130	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	131	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
	132
	133	if(powerOf2<32)
	134	powerOf2 = 32;
	135
	136	free(intervals);
	137	return powerOf2;
	138	}
	139
	140	unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
	141	{
	142	size_t i,j,k, index;
	143	size_t radiusIndex;
	144	size_t r23=r2*r3;
	145	double pred_value = 0, pred_err;
	146	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	147	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	148	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)/confparams_cpr->sampleDistance;
	149	for(i=1;i<r1;i++)
	150	{
	151	for(j=1;j<r2;j++)
	152	{
	153	for(k=1;k<r3;k++)
	154	{
	155	if((i+j+k)%confparams_cpr->sampleDistance==0)
	156	{
	157	index = ir23+jr3+k;
	158	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
	159	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
	160	pred_err = fabs(pred_value - oriData[index]);
	161	radiusIndex = (pred_err/realPrecision+1)/2;
	162	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	163	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	164	intervals[radiusIndex]++;
	165	}
	166	}
	167
	168	}
	169	}
	170	//compute the appropriate number
	171	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	172	size_t sum = 0;
	173	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	174	{
	175	sum += intervals[i];
	176	if(sum>targetCount)
	177	break;
	178	}
	179	if(i>=confparams_cpr->maxRangeRadius)
	180	i = confparams_cpr->maxRangeRadius-1;
	181
	182	unsigned int accIntervals = 2*(i+1);
	183	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	184
	185	if(powerOf2<32)
	186	powerOf2 = 32;
	187
	188	free(intervals);
	189	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
	190	return powerOf2;
	191	}
	192
	193	unsigned int optimize_intervals_double_4D(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision)
	194	{
	195	size_t i,j,k,l, index;
	196	size_t radiusIndex;
	197	size_t r234=r2r3r4;
	198	size_t r34=r3*r4;
	199	double pred_value = 0, pred_err;
	200	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	201	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	202	size_t totalSampleSize = (r1-1)(r2-1)(r3-1)*(r4-1)/confparams_cpr->sampleDistance;
	203	for(i=1;i<r1;i++)
	204	{
	205	for(j=1;j<r2;j++)
	206	{
	207	for(k=1;k<r3;k++)
	208	{
	209	for (l=1;l<r4;l++)
	210	{
	211	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
	212	{
	213	index = ir234+jr34+k*r4+l;
	214	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r34]
	215	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
	216	pred_err = fabs(pred_value - oriData[index]);
	217	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	218	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	219	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	220	intervals[radiusIndex]++;
	221	}
	222	}
	223	}
	224	}
	225	}
	226	//compute the appropriate number
	227	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	228	size_t sum = 0;
	229	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	230	{
	231	sum += intervals[i];
	232	if(sum>targetCount)
	233	break;
	234	}
	235	if(i>=confparams_cpr->maxRangeRadius)
	236	i = confparams_cpr->maxRangeRadius-1;
	237
	238	unsigned int accIntervals = 2*(i+1);
	239	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	240
	241	if(powerOf2<32)
	242	powerOf2 = 32;
	243
	244	free(intervals);
	245	return powerOf2;
	246	}
	247
	248	TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData,
	249	size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d)
	250	{
	251	#ifdef HAVE_TIMECMPR
	252	double* decData = NULL;
	253	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	254	decData = (double*)(multisteps->hist_data);
	255	#endif
	256
	257	unsigned int quantization_intervals;
	258	if(exe_params->optQuantMode==1)
	259	quantization_intervals = optimize_intervals_double_1D_opt(oriData, dataLength, realPrecision);
	260	else
	261	quantization_intervals = exe_params->intvCapacity;
	262	updateQuantizationInfo(quantization_intervals);
	263
	264	size_t i;
	265	int reqLength;
	266	double medianValue = medianValue_d;
	267	short radExpo = getExponent_double(valueRangeSize/2);
	268
	269	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	270
	271	int* type = (int) malloc(dataLengthsizeof(int));
	272
	273	double* spaceFillingValue = oriData; //
	274
	275	DynamicIntArray *exactLeadNumArray;
	276	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	277
	278	DynamicByteArray *exactMidByteArray;
	279	new_DBA(&exactMidByteArray, DynArrayInitLen);
	280
	281	DynamicIntArray *resiBitArray;
	282	new_DIA(&resiBitArray, DynArrayInitLen);
	283
	284	unsigned char preDataBytes[8];
	285	longToBytes_bigEndian(preDataBytes, 0);
	286
	287	int reqBytesLength = reqLength/8;
	288	int resiBitsLength = reqLength%8;
	289	double last3CmprsData[3] = {0};
	290
	291	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	292	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	293
	294	//add the first data
	295	type[0] = 0;
	296	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	297	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	298	memcpy(preDataBytes,vce->curBytes,8);
	299	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	300	listAdd_double(last3CmprsData, vce->data);
	301	#ifdef HAVE_TIMECMPR
	302	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	303	decData[0] = vce->data;
	304	#endif
	305
	306	//add the second data
	307	type[1] = 0;
	308	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	309	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	310	memcpy(preDataBytes,vce->curBytes,8);
	311	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	312	listAdd_double(last3CmprsData, vce->data);
	313	#ifdef HAVE_TIMECMPR
	314	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	315	decData[1] = vce->data;
	316	#endif
	317	int state;
	318	double checkRadius;
	319	double curData;
	320	double pred;
	321	double predAbsErr;
	322	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
	323	double interval = 2*realPrecision;
	324
	325	for(i=2;i<dataLength;i++)
	326	{
	327	//printf("%.30G\n",last3CmprsData[0]);
	328	curData = spaceFillingValue[i];
	329	//pred = 2*last3CmprsData[0] - last3CmprsData[1];
	330	pred = last3CmprsData[0];
	331	predAbsErr = fabs(curData - pred);
[9ee2ce3]	332	if(predAbsErr<checkRadius)
[2c47b73]	333	{
	334	state = (predAbsErr/realPrecision+1)/2;
	335	if(curData>=pred)
	336	{
	337	type[i] = exe_params->intvRadius+state;
	338	pred = pred + state*interval;
	339	}
	340	else //curData<pred
	341	{
	342	type[i] = exe_params->intvRadius-state;
	343	pred = pred - state*interval;
	344	}
	345	listAdd_double(last3CmprsData, pred);
	346	#ifdef HAVE_TIMECMPR
	347	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	348	decData[i] = pred;
	349	#endif
	350	continue;
	351	}
	352
	353	//unpredictable data processing
	354	type[i] = 0;
	355	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	356	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	357	memcpy(preDataBytes,vce->curBytes,8);
	358	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	359
	360	listAdd_double(last3CmprsData, vce->data);
	361	#ifdef HAVE_TIMECMPR
	362	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	363	decData[i] = vce->data;
	364	#endif
	365
	366	}//end of for
	367
	368	int exactDataNum = exactLeadNumArray->size;
	369
	370	TightDataPointStorageD* tdps;
	371
	372	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	373	type, exactMidByteArray->array, exactMidByteArray->size,
	374	exactLeadNumArray->array,
	375	resiBitArray->array, resiBitArray->size,
	376	resiBitsLength,
	377	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	378
	379	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	380	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	381
	382	//free memory
	383	free_DIA(exactLeadNumArray);
	384	free_DIA(resiBitArray);
	385	free(type);
	386	free(vce);
	387	free(lce);
	388	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	389
	390	return tdps;
	391	}
	392
	393	void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, TightDataPointStorageD* tdps,
	394	unsigned char** newByteData, size_t *outSize)
	395	{
	396	int doubleSize = sizeof(double);
	397	size_t k = 0, i;
	398	tdps->isLossless = 1;
	399	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength;
	400	newByteData = (unsigned char)malloc(totalByteLength);
	401
	402	unsigned char dsLengthBytes[8];
	403	for (i = 0; i < 3; i++)//3
	404	(*newByteData)[k++] = versionNumber[i];
	405
	406	if(exe_params->SZ_SIZE_TYPE==4)//1
	407	(*newByteData)[k++] = 16; //00010000
	408	else
	409	(*newByteData)[k++] = 80; //01010000: 01000000 indicates the SZ_SIZE_TYPE=8
	410
	411	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
	412	k = k + MetaDataByteLength;
	413
	414	sizeToBytes(dsLengthBytes,dataLength);
	415	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//ST: 4 or 8
	416	(*newByteData)[k++] = dsLengthBytes[i];
	417
	418	if(sysEndianType==BIG_ENDIAN_SYSTEM)
	419	memcpy((newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLengthdoubleSize);
	420	else
	421	{
	422	unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
	423	for(i=0;i<dataLength;i++,p+=doubleSize)
	424	doubleToBytes(p, oriData[i]);
	425	}
	426	*outSize = totalByteLength;
	427	}
	428
	429
	430	char SZ_compress_args_double_NoCkRngeNoGzip_1D(unsigned char** newByteData, double *oriData,
	431	size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d)
	432	{
	433	char compressionType = 0;
	434	TightDataPointStorageD* tdps = NULL;
	435	#ifdef HAVE_TIMECMPR
	436	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	437	{
	438	int timestep = sz_tsc->currentStep;
	439	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	440	{
	441	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
	442	compressionType = 1; //time-series based compression
	443	}
	444	else
	445	{
	446	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
	447	compressionType = 0; //snapshot-based compression
	448	multisteps->lastSnapshotStep = timestep;
	449	}
	450	}
	451	else
	452	#endif
	453	tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
	454
	455	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	456
	457	if(outSize>dataLengthsizeof(double))
	458	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	459
	460	free_TightDataPointStorageD(tdps);
	461	return compressionType;
	462	}
	463
	464	TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_d)
	465	{
	466	#ifdef HAVE_TIMECMPR
	467	double* decData = NULL;
	468	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	469	decData = (double*)(multisteps->hist_data);
	470	#endif
	471
	472	unsigned int quantization_intervals;
	473	if(exe_params->optQuantMode==1)
	474	{
	475	quantization_intervals = optimize_intervals_double_2D_opt(oriData, r1, r2, realPrecision);
	476	updateQuantizationInfo(quantization_intervals);
	477	}
	478	else
	479	quantization_intervals = exe_params->intvCapacity;
	480	size_t i,j;
	481	int reqLength;
	482	double pred1D, pred2D;
	483	double diff = 0.0;
	484	double itvNum = 0;
	485	double P0, P1;
	486
	487	size_t dataLength = r1*r2;
	488
	489	P0 = (double)malloc(r2sizeof(double));
	490	memset(P0, 0, r2*sizeof(double));
	491	P1 = (double)malloc(r2sizeof(double));
	492	memset(P1, 0, r2*sizeof(double));
	493
	494	double medianValue = medianValue_d;
	495	short radExpo = getExponent_double(valueRangeSize/2);
	496	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	497
	498	int* type = (int) malloc(dataLengthsizeof(int));
	499	//type[dataLength]=0;
	500
	501	double* spaceFillingValue = oriData; //
	502
	503	DynamicIntArray *exactLeadNumArray;
	504	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	505
	506	DynamicByteArray *exactMidByteArray;
	507	new_DBA(&exactMidByteArray, DynArrayInitLen);
	508
	509	DynamicIntArray *resiBitArray;
	510	new_DIA(&resiBitArray, DynArrayInitLen);
	511
	512	type[0] = 0;
	513
	514	unsigned char preDataBytes[8];
	515	longToBytes_bigEndian(preDataBytes, 0);
	516
	517	int reqBytesLength = reqLength/8;
	518	int resiBitsLength = reqLength%8;
	519
	520	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	521	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	522
	523	/* Process Row-0 data 0*/
	524	type[0] = 0;
	525	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	526	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	527	memcpy(preDataBytes,vce->curBytes,8);
	528	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	529	P1[0] = vce->data;
	530	#ifdef HAVE_TIMECMPR
	531	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	532	decData[0] = vce->data;
	533	#endif
	534
	535	/* Process Row-0 data 1*/
	536	pred1D = P1[0];
	537	diff = spaceFillingValue[1] - pred1D;
	538
	539	itvNum = fabs(diff)/realPrecision + 1;
	540
	541	if (itvNum < exe_params->intvCapacity)
	542	{
	543	if (diff < 0) itvNum = -itvNum;
	544	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	545	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	546	}
	547	else
	548	{
	549	type[1] = 0;
	550	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	551	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	552	memcpy(preDataBytes,vce->curBytes,8);
	553	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	554	P1[1] = vce->data;
	555	}
	556	#ifdef HAVE_TIMECMPR
	557	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	558	decData[1] = P1[1];
	559	#endif
	560
	561	/* Process Row-0 data 2 --> data r2-1 */
	562	for (j = 2; j < r2; j++)
	563	{
	564	pred1D = 2*P1[j-1] - P1[j-2];
	565	diff = spaceFillingValue[j] - pred1D;
	566
	567	itvNum = fabs(diff)/realPrecision + 1;
	568
	569	if (itvNum < exe_params->intvCapacity)
	570	{
	571	if (diff < 0) itvNum = -itvNum;
	572	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	573	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	574	}
	575	else
	576	{
	577	type[j] = 0;
	578	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	579	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	580	memcpy(preDataBytes,vce->curBytes,8);
	581	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	582	P1[j] = vce->data;
	583	}
	584	#ifdef HAVE_TIMECMPR
	585	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	586	decData[j] = P1[j];
	587	#endif
	588	}
	589
	590	/* Process Row-1 --> Row-r1-1 */
	591	size_t index;
	592	for (i = 1; i < r1; i++)
	593	{
	594	/* Process row-i data 0 */
	595	index = i*r2;
	596	pred1D = P1[0];
	597	diff = spaceFillingValue[index] - pred1D;
	598
	599	itvNum = fabs(diff)/realPrecision + 1;
	600
	601	if (itvNum < exe_params->intvCapacity)
	602	{
	603	if (diff < 0) itvNum = -itvNum;
	604	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	605	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	606	}
	607	else
	608	{
	609	type[index] = 0;
	610	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	611	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	612	memcpy(preDataBytes,vce->curBytes,8);
	613	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	614	P0[0] = vce->data;
	615	}
	616	#ifdef HAVE_TIMECMPR
	617	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	618	decData[index] = P0[0];
	619	#endif
	620
	621	/* Process row-i data 1 --> r2-1*/
	622	for (j = 1; j < r2; j++)
	623	{
	624	index = i*r2+j;
	625	pred2D = P0[j-1] + P1[j] - P1[j-1];
	626
	627	diff = spaceFillingValue[index] - pred2D;
	628
	629	itvNum = fabs(diff)/realPrecision + 1;
	630
	631	if (itvNum < exe_params->intvCapacity)
	632	{
	633	if (diff < 0) itvNum = -itvNum;
	634	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	635	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	636	}
	637	else
	638	{
	639	type[index] = 0;
	640	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	641	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	642	memcpy(preDataBytes,vce->curBytes,8);
	643	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	644	P0[j] = vce->data;
	645	}
	646	#ifdef HAVE_TIMECMPR
	647	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	648	decData[index] = P0[j];
	649	#endif
	650	}
	651
	652	double *Pt;
	653	Pt = P1;
	654	P1 = P0;
	655	P0 = Pt;
	656	}
	657
	658	if(r2!=1)
	659	free(P0);
	660	free(P1);
	661	size_t exactDataNum = exactLeadNumArray->size;
	662
	663	TightDataPointStorageD* tdps;
	664
	665	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	666	type, exactMidByteArray->array, exactMidByteArray->size,
	667	exactLeadNumArray->array,
	668	resiBitArray->array, resiBitArray->size,
	669	resiBitsLength,
	670	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	671
	672	/* int sum =0;
	673	for(i=0;i<dataLength;i++)
	674	if(type[i]==0) sum++;
	675	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);
	676
	677	for(i=0;i<dataLength;i++)
	678	printf("%d ", type[i]);
	679	printf("\n");*/
	680
	681	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	682	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	683
	684	// for(i = 3800;i<3844;i++)
	685	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
	686
	687	//free memory
	688	free_DIA(exactLeadNumArray);
	689	free_DIA(resiBitArray);
	690	free(type);
	691	free(vce);
	692	free(lce);
	693	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	694
	695	return tdps;
	696	}
	697
	698	/**
	699	*
	700	* Note: @r1 is high dimension
	701	* @r2 is low dimension
	702	* */
	703	char SZ_compress_args_double_NoCkRngeNoGzip_2D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
	704	{
	705	size_t dataLength = r1*r2;
	706	char compressionType = 0;
	707	TightDataPointStorageD* tdps = NULL;
	708	#ifdef HAVE_TIMECMPR
	709	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	710	{
	711	int timestep = sz_tsc->currentStep;
	712	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	713	{
	714	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
	715	compressionType = 1; //time-series based compression
	716	}
	717	else
	718	{
	719	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
	720	compressionType = 0; //snapshot-based compression
	721	multisteps->lastSnapshotStep = timestep;
	722	}
	723	}
	724	else
	725	#endif
	726	tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
	727
	728	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	729
	730	if(outSize>dataLengthsizeof(double))
	731	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	732
	733	free_TightDataPointStorageD(tdps);
	734	return compressionType;
	735	}
	736
	737	TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_d)
	738	{
	739	#ifdef HAVE_TIMECMPR
	740	double* decData = NULL;
	741	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	742	decData = (double*)(multisteps->hist_data);
	743	#endif
	744
	745	unsigned int quantization_intervals;
	746	if(exe_params->optQuantMode==1)
	747	{
	748	quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision);
	749	updateQuantizationInfo(quantization_intervals);
	750	}
	751	else
	752	quantization_intervals = exe_params->intvCapacity;
	753	size_t i,j,k;
	754	int reqLength;
	755	double pred1D, pred2D, pred3D;
	756	double diff = 0.0;
	757	double itvNum = 0;
	758	double P0, P1;
	759
	760	size_t dataLength = r1r2r3;
	761
	762	size_t r23 = r2*r3;
	763
	764	P0 = (double)malloc(r23sizeof(double));
	765	P1 = (double)malloc(r23sizeof(double));
	766
	767	double medianValue = medianValue_d;
	768	short radExpo = getExponent_double(valueRangeSize/2);
	769	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	770
	771	int* type = (int) malloc(dataLengthsizeof(int));
	772	//type[dataLength]=0;
	773
	774	double* spaceFillingValue = oriData; //
	775
	776	DynamicIntArray *exactLeadNumArray;
	777	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	778
	779	DynamicByteArray *exactMidByteArray;
	780	new_DBA(&exactMidByteArray, DynArrayInitLen);
	781
	782	DynamicIntArray *resiBitArray;
	783	new_DIA(&resiBitArray, DynArrayInitLen);
	784
	785	type[0] = 0;
	786
	787	unsigned char preDataBytes[8];
	788	longToBytes_bigEndian(preDataBytes, 0);
	789
	790	int reqBytesLength = reqLength/8;
	791	int resiBitsLength = reqLength%8;
	792
	793	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	794	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	795
	796
	797	/////////////////////////// Process layer-0 ///////////////////////////
	798	/* Process Row-0 data 0*/
	799	type[0] = 0;
	800	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	801	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	802	memcpy(preDataBytes,vce->curBytes,8);
	803	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	804	P1[0] = vce->data;
	805	#ifdef HAVE_TIMECMPR
	806	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	807	decData[0] = P1[0];
	808	#endif
	809
	810	/* Process Row-0 data 1*/
	811	pred1D = P1[0];
	812	diff = spaceFillingValue[1] - pred1D;
	813
	814	itvNum = fabs(diff)/realPrecision + 1;
	815
	816	if (itvNum < exe_params->intvCapacity)
	817	{
	818	if (diff < 0) itvNum = -itvNum;
	819	type[1] = (int) (itvNum/2) + exe_params->intvRadius;
	820	P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
	821	}
	822	else
	823	{
	824	type[1] = 0;
	825	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	826	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	827	memcpy(preDataBytes,vce->curBytes,8);
	828	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	829	P1[1] = vce->data;
	830	}
	831	#ifdef HAVE_TIMECMPR
	832	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	833	decData[1] = P1[1];
	834	#endif
	835
	836	/* Process Row-0 data 2 --> data r3-1 */
	837	for (j = 2; j < r3; j++)
	838	{
	839	pred1D = 2*P1[j-1] - P1[j-2];
	840	diff = spaceFillingValue[j] - pred1D;
	841
	842	itvNum = fabs(diff)/realPrecision + 1;
	843
	844	if (itvNum < exe_params->intvCapacity)
	845	{
	846	if (diff < 0) itvNum = -itvNum;
	847	type[j] = (int) (itvNum/2) + exe_params->intvRadius;
	848	P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
	849	}
	850	else
	851	{
	852	type[j] = 0;
	853	compressSingleDoubleValue(vce, spaceFillingValue[j], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	854	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	855	memcpy(preDataBytes,vce->curBytes,8);
	856	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	857	P1[j] = vce->data;
	858	}
	859	#ifdef HAVE_TIMECMPR
	860	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	861	decData[j] = P1[j];
	862	#endif
	863	}
	864
	865	/* Process Row-1 --> Row-r2-1 */
	866	size_t index;
	867	for (i = 1; i < r2; i++)
	868	{
	869	/* Process row-i data 0 */
	870	index = i*r3;
	871	pred1D = P1[index-r3];
	872	diff = spaceFillingValue[index] - pred1D;
	873
	874	itvNum = fabs(diff)/realPrecision + 1;
	875
	876	if (itvNum < exe_params->intvCapacity)
	877	{
	878	if (diff < 0) itvNum = -itvNum;
	879	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	880	P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	881	}
	882	else
	883	{
	884	type[index] = 0;
	885	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	886	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	887	memcpy(preDataBytes,vce->curBytes,8);
	888	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	889	P1[index] = vce->data;
	890	}
	891	#ifdef HAVE_TIMECMPR
	892	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	893	decData[index] = P1[index];
	894	#endif
	895
	896	/* Process row-i data 1 --> data r3-1*/
	897	for (j = 1; j < r3; j++)
	898	{
	899	index = i*r3+j;
	900	pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
	901
	902	diff = spaceFillingValue[index] - pred2D;
	903
	904	itvNum = fabs(diff)/realPrecision + 1;
	905
	906	if (itvNum < exe_params->intvCapacity)
	907	{
	908	if (diff < 0) itvNum = -itvNum;
	909	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	910	P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	911	}
	912	else
	913	{
	914	type[index] = 0;
	915	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	916	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	917	memcpy(preDataBytes,vce->curBytes,8);
	918	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	919	P1[index] = vce->data;
	920	}
	921	#ifdef HAVE_TIMECMPR
	922	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	923	decData[index] = P1[index];
	924	#endif
	925	}
	926	}
	927
	928
	929	/////////////////////////// Process layer-1 --> layer-r1-1 ///////////////////////////
	930
	931	for (k = 1; k < r1; k++)
	932	{
	933	/* Process Row-0 data 0*/
	934	index = k*r23;
	935	pred1D = P1[0];
	936	diff = spaceFillingValue[index] - pred1D;
	937
	938	itvNum = fabs(diff)/realPrecision + 1;
	939
	940	if (itvNum < exe_params->intvCapacity)
	941	{
	942	if (diff < 0) itvNum = -itvNum;
	943	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	944	P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	945	}
	946	else
	947	{
	948	type[index] = 0;
	949	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	950	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	951	memcpy(preDataBytes,vce->curBytes,8);
	952	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	953	P0[0] = vce->data;
	954	}
	955	#ifdef HAVE_TIMECMPR
	956	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	957	decData[index] = P0[0];
	958	#endif
	959
	960	/* Process Row-0 data 1 --> data r3-1 */
	961	for (j = 1; j < r3; j++)
	962	{
	963	//index = kr2r3+j;
	964	index ++;
	965	pred2D = P0[j-1] + P1[j] - P1[j-1];
	966	diff = spaceFillingValue[index] - pred2D;
	967
	968	itvNum = fabs(diff)/realPrecision + 1;
	969
	970	if (itvNum < exe_params->intvCapacity)
	971	{
	972	if (diff < 0) itvNum = -itvNum;
	973	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	974	P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	975	}
	976	else
	977	{
	978	type[index] = 0;
	979	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	980	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	981	memcpy(preDataBytes,vce->curBytes,8);
	982	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	983	P0[j] = vce->data;
	984	}
	985	#ifdef HAVE_TIMECMPR
	986	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	987	decData[index] = P0[j];
	988	#endif
	989	}
	990
	991	/* Process Row-1 --> Row-r2-1 */
	992	size_t index2D;
	993	for (i = 1; i < r2; i++)
	994	{
	995	/* Process Row-i data 0 */
	996	index = kr23 + ir3;
	997	index2D = i*r3;
	998	pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
	999	diff = spaceFillingValue[index] - pred2D;
	1000
	1001	itvNum = fabs(diff)/realPrecision + 1;
	1002
	1003	if (itvNum < exe_params->intvCapacity)
	1004	{
	1005	if (diff < 0) itvNum = -itvNum;
	1006	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1007	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1008	}
	1009	else
	1010	{
	1011	type[index] = 0;
	1012	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1013	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1014	memcpy(preDataBytes,vce->curBytes,8);
	1015	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1016	P0[index2D] = vce->data;
	1017	}
	1018	#ifdef HAVE_TIMECMPR
	1019	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1020	decData[index] = P0[index2D];
	1021	#endif
	1022
	1023	/* Process Row-i data 1 --> data r3-1 */
	1024	for (j = 1; j < r3; j++)
	1025	{
	1026	//index = kr2r3 + i*r3 + j;
	1027	index ++;
	1028	index2D = i*r3 + j;
	1029	pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
	1030	diff = spaceFillingValue[index] - pred3D;
	1031
	1032	itvNum = fabs(diff)/realPrecision + 1;
	1033
	1034	if (itvNum < exe_params->intvCapacity)
	1035	{
	1036	if (diff < 0) itvNum = -itvNum;
	1037	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1038	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1039	}
	1040	else
	1041	{
	1042	type[index] = 0;
	1043	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1044	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1045	memcpy(preDataBytes,vce->curBytes,8);
	1046	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1047	P0[index2D] = vce->data;
	1048	}
	1049	#ifdef HAVE_TIMECMPR
	1050	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1051	decData[index] = P0[index2D];
	1052	#endif
	1053	}
	1054	}
	1055
	1056	double *Pt;
	1057	Pt = P1;
	1058	P1 = P0;
	1059	P0 = Pt;
	1060	}
	1061	if(r23!=1)
	1062	free(P0);
	1063	free(P1);
	1064	size_t exactDataNum = exactLeadNumArray->size;
	1065
	1066	TightDataPointStorageD* tdps;
	1067
	1068	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	1069	type, exactMidByteArray->array, exactMidByteArray->size,
	1070	exactLeadNumArray->array,
	1071	resiBitArray->array, resiBitArray->size,
	1072	resiBitsLength,
	1073	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	1074
	1075	// printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
	1076	// exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
	1077
	1078	// for(i = 3800;i<3844;i++)
	1079	// printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
	1080
	1081	//free memory
	1082	free_DIA(exactLeadNumArray);
	1083	free_DIA(resiBitArray);
	1084	free(type);
	1085	free(vce);
	1086	free(lce);
	1087	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	1088
	1089	return tdps;
	1090	}
	1091
	1092
	1093	char SZ_compress_args_double_NoCkRngeNoGzip_3D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
	1094	{
	1095	size_t dataLength = r1r2r3;
	1096	char compressionType = 0;
	1097	TightDataPointStorageD* tdps = NULL;
	1098	#ifdef HAVE_TIMECMPR
	1099	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1100	{
	1101	int timestep = sz_tsc->currentStep;
	1102	if(timestep % confparams_cpr->snapshotCmprStep != 0)
	1103	{
	1104	tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
	1105	compressionType = 1; //time-series based compression
	1106	}
	1107	else
	1108	{
	1109	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
	1110	compressionType = 0; //snapshot-based compression
	1111	multisteps->lastSnapshotStep = timestep;
	1112	}
	1113	}
	1114	else
	1115	#endif
	1116	tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
	1117
	1118	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	1119
	1120	if(outSize>dataLengthsizeof(double))
	1121	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1122
	1123	free_TightDataPointStorageD(tdps);
	1124	return compressionType;
	1125	}
	1126
	1127	TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, double valueRangeSize, double medianValue_d)
	1128	{
	1129	unsigned int quantization_intervals;
	1130	if(exe_params->optQuantMode==1)
	1131	{
	1132	quantization_intervals = optimize_intervals_double_4D(oriData, r1, r2, r3, r4, realPrecision);
	1133	updateQuantizationInfo(quantization_intervals);
	1134	}
	1135	else
	1136	quantization_intervals = exe_params->intvCapacity;
	1137
	1138	size_t i,j,k;
	1139	int reqLength;
	1140	double pred1D, pred2D, pred3D;
	1141	double diff = 0.0;
	1142	double itvNum = 0;
	1143	double P0, P1;
	1144
	1145	size_t dataLength = r1r2r3*r4;
	1146
	1147	size_t r234 = r2r3r4;
	1148	size_t r34 = r3*r4;
	1149
	1150	P0 = (double)malloc(r34sizeof(double));
	1151	P1 = (double)malloc(r34sizeof(double));
	1152
	1153	double medianValue = medianValue_d;
	1154	short radExpo = getExponent_double(valueRangeSize/2);
	1155	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	1156
	1157	int* type = (int) malloc(dataLengthsizeof(int));
	1158
	1159	double* spaceFillingValue = oriData; //
	1160
	1161	DynamicIntArray *exactLeadNumArray;
	1162	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	1163
	1164	DynamicByteArray *exactMidByteArray;
	1165	new_DBA(&exactMidByteArray, DynArrayInitLen);
	1166
	1167	DynamicIntArray *resiBitArray;
	1168	new_DIA(&resiBitArray, DynArrayInitLen);
	1169
	1170	unsigned char preDataBytes[8];
	1171	longToBytes_bigEndian(preDataBytes, 0);
	1172
	1173	int reqBytesLength = reqLength/8;
	1174	int resiBitsLength = reqLength%8;
	1175
	1176	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	1177	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	1178
	1179
	1180	size_t l;
	1181	for (l = 0; l < r1; l++)
	1182	{
	1183
	1184	/////////////////////////// Process layer-0 ///////////////////////////
	1185	/* Process Row-0 data 0*/
	1186	size_t index = l*r234;
	1187	size_t index2D = 0;
	1188
	1189	type[index] = 0;
	1190	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1191	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1192	memcpy(preDataBytes,vce->curBytes,8);
	1193	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1194	P1[index2D] = vce->data;
	1195
	1196	/* Process Row-0 data 1*/
	1197	index = l*r234+1;
	1198	index2D = 1;
	1199
	1200	pred1D = P1[index2D-1];
	1201	diff = spaceFillingValue[index] - pred1D;
	1202
	1203	itvNum = fabs(diff)/realPrecision + 1;
	1204
	1205	if (itvNum < exe_params->intvCapacity)
	1206	{
	1207	if (diff < 0) itvNum = -itvNum;
	1208	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1209	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1210	}
	1211	else
	1212	{
	1213	type[index] = 0;
	1214	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1215	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1216	memcpy(preDataBytes,vce->curBytes,8);
	1217	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1218	P1[index2D] = vce->data;
	1219	}
	1220
	1221	/* Process Row-0 data 2 --> data r4-1 */
	1222	for (j = 2; j < r4; j++)
	1223	{
	1224	index = l*r234+j;
	1225	index2D = j;
	1226
	1227	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	1228	diff = spaceFillingValue[index] - pred1D;
	1229
	1230	itvNum = fabs(diff)/realPrecision + 1;
	1231
	1232	if (itvNum < exe_params->intvCapacity)
	1233	{
	1234	if (diff < 0) itvNum = -itvNum;
	1235	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1236	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1237	}
	1238	else
	1239	{
	1240	type[index] = 0;
	1241	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1242	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1243	memcpy(preDataBytes,vce->curBytes,8);
	1244	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1245	P1[index2D] = vce->data;
	1246	}
	1247	}
	1248
	1249	/* Process Row-1 --> Row-r3-1 */
	1250	for (i = 1; i < r3; i++)
	1251	{
	1252	/* Process row-i data 0 */
	1253	index = lr234+ir4;
	1254	index2D = i*r4;
	1255
	1256	pred1D = P1[index2D-r4];
	1257	diff = spaceFillingValue[index] - pred1D;
	1258
	1259	itvNum = fabs(diff)/realPrecision + 1;
	1260
	1261	if (itvNum < exe_params->intvCapacity)
	1262	{
	1263	if (diff < 0) itvNum = -itvNum;
	1264	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1265	P1[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1266	}
	1267	else
	1268	{
	1269	type[index] = 0;
	1270	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1271	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1272	memcpy(preDataBytes,vce->curBytes,8);
	1273	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1274	P1[index2D] = vce->data;
	1275	}
	1276
	1277	/* Process row-i data 1 --> data r4-1*/
	1278	for (j = 1; j < r4; j++)
	1279	{
	1280	index = lr234+ir4+j;
	1281	index2D = i*r4+j;
	1282
	1283	pred2D = P1[index2D-1] + P1[index2D-r4] - P1[index2D-r4-1];
	1284
	1285	diff = spaceFillingValue[index] - pred2D;
	1286
	1287	itvNum = fabs(diff)/realPrecision + 1;
	1288
	1289	if (itvNum < exe_params->intvCapacity)
	1290	{
	1291	if (diff < 0) itvNum = -itvNum;
	1292	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1293	P1[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1294	}
	1295	else
	1296	{
	1297	type[index] = 0;
	1298	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1299	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1300	memcpy(preDataBytes,vce->curBytes,8);
	1301	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1302	P1[index2D] = vce->data;
	1303	}
	1304	}
	1305	}
	1306
	1307
	1308	/////////////////////////// Process layer-1 --> layer-r2-1 ///////////////////////////
	1309
	1310	for (k = 1; k < r2; k++)
	1311	{
	1312	/* Process Row-0 data 0*/
	1313	index = lr234+kr34;
	1314	index2D = 0;
	1315
	1316	pred1D = P1[index2D];
	1317	diff = spaceFillingValue[index] - pred1D;
	1318
	1319	itvNum = fabs(diff)/realPrecision + 1;
	1320
	1321	if (itvNum < exe_params->intvCapacity)
	1322	{
	1323	if (diff < 0) itvNum = -itvNum;
	1324	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1325	P0[index2D] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1326	}
	1327	else
	1328	{
	1329	type[index] = 0;
	1330	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1331	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1332	memcpy(preDataBytes,vce->curBytes,8);
	1333	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1334	P0[index2D] = vce->data;
	1335	}
	1336
	1337
	1338	/* Process Row-0 data 1 --> data r4-1 */
	1339	for (j = 1; j < r4; j++)
	1340	{
	1341	index = lr234+kr34+j;
	1342	index2D = j;
	1343
	1344	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	1345	diff = spaceFillingValue[index] - pred2D;
	1346
	1347	itvNum = fabs(diff)/realPrecision + 1;
	1348
	1349	if (itvNum < exe_params->intvCapacity)
	1350	{
	1351	if (diff < 0) itvNum = -itvNum;
	1352	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1353	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1354	}
	1355	else
	1356	{
	1357	type[index] = 0;
	1358	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1359	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1360	memcpy(preDataBytes,vce->curBytes,8);
	1361	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1362	P0[index2D] = vce->data;
	1363	}
	1364	}
	1365
	1366	/* Process Row-1 --> Row-r3-1 */
	1367	for (i = 1; i < r3; i++)
	1368	{
	1369	/* Process Row-i data 0 */
	1370	index = lr234+kr34+i*r4;
	1371	index2D = i*r4;
	1372
	1373	pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4];
	1374	diff = spaceFillingValue[index] - pred2D;
	1375
	1376	itvNum = fabs(diff)/realPrecision + 1;
	1377
	1378	if (itvNum < exe_params->intvCapacity)
	1379	{
	1380	if (diff < 0) itvNum = -itvNum;
	1381	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1382	P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1383	}
	1384	else
	1385	{
	1386	type[index] = 0;
	1387	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1388	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1389	memcpy(preDataBytes,vce->curBytes,8);
	1390	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1391	P0[index2D] = vce->data;
	1392	}
	1393
	1394	/* Process Row-i data 1 --> data r4-1 */
	1395	for (j = 1; j < r4; j++)
	1396	{
	1397	index = lr234+kr34+i*r4+j;
	1398	index2D = i*r4+j;
	1399
	1400	pred3D = P0[index2D-1] + P0[index2D-r4]+ P1[index2D] - P0[index2D-r4-1] - P1[index2D-r4] - P1[index2D-1] + P1[index2D-r4-1];
	1401	diff = spaceFillingValue[index] - pred3D;
	1402
	1403
	1404	itvNum = fabs(diff)/realPrecision + 1;
	1405
	1406	if (itvNum < exe_params->intvCapacity)
	1407	{
	1408	if (diff < 0) itvNum = -itvNum;
	1409	type[index] = (int) (itvNum/2) + exe_params->intvRadius;
	1410	P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
	1411	}
	1412	else
	1413	{
	1414	type[index] = 0;
	1415	compressSingleDoubleValue(vce, spaceFillingValue[index], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	1416	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	1417	memcpy(preDataBytes,vce->curBytes,8);
	1418	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	1419	P0[index2D] = vce->data;
	1420	}
	1421	}
	1422	}
	1423
	1424	double *Pt;
	1425	Pt = P1;
	1426	P1 = P0;
	1427	P0 = Pt;
	1428	}
	1429	}
	1430
	1431	free(P0);
	1432	free(P1);
	1433	size_t exactDataNum = exactLeadNumArray->size;
	1434
	1435	TightDataPointStorageD* tdps;
	1436
	1437	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	1438	type, exactMidByteArray->array, exactMidByteArray->size,
	1439	exactLeadNumArray->array,
	1440	resiBitArray->array, resiBitArray->size,
	1441	resiBitsLength,
	1442	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	1443
	1444	//free memory
	1445	free_DIA(exactLeadNumArray);
	1446	free_DIA(resiBitArray);
	1447	free(type);
	1448	free(vce);
	1449	free(lce);
	1450	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	1451
	1452	return tdps;
	1453	}
	1454
	1455
	1456	char SZ_compress_args_double_NoCkRngeNoGzip_4D(unsigned char** newByteData, double oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d)
	1457	{
	1458	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, medianValue_d);
	1459
	1460	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
	1461
	1462	size_t dataLength = r1r2r3*r4;
	1463	if(outSize>dataLengthsizeof(double))
	1464	SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1465
	1466	free_TightDataPointStorageD(tdps);
	1467	return 0;
	1468	}
	1469
	1470	void SZ_compress_args_double_withinRange(unsigned char** newByteData, double oriData, size_t dataLength, size_t outSize)
	1471	{
	1472	TightDataPointStorageD* tdps = (TightDataPointStorageD*) malloc(sizeof(TightDataPointStorageD));
	1473	tdps->rtypeArray = NULL;
	1474	tdps->typeArray = NULL;
	1475	tdps->leadNumArray = NULL;
	1476	tdps->residualMidBits = NULL;
	1477
	1478	tdps->allSameData = 1;
	1479	tdps->dataSeriesLength = dataLength;
	1480	tdps->exactMidBytes = (unsigned char)malloc(sizeof(unsigned char)8);
	1481	tdps->pwrErrBoundBytes = NULL;
	1482	tdps->isLossless = 0;
	1483	double value = oriData[0];
	1484	doubleToBytes(tdps->exactMidBytes, value);
	1485	tdps->exactMidBytes_size = 8;
	1486
	1487	size_t tmpOutSize;
	1488	//unsigned char *tmpByteData;
	1489	convertTDPStoFlatBytes_double(tdps, newByteData, &tmpOutSize);
	1490	//convertTDPStoFlatBytes_double(tdps, &tmpByteData, &tmpOutSize);
	1491
	1492	//newByteData = (unsigned char)malloc(sizeof(unsigned char)*16); //for floating-point data (1+3+4+4)
	1493	//memcpy(*newByteData, tmpByteData, 16);
	1494	*outSize = tmpOutSize;//12==3+1+8(double_size)+MetaDataByteLength
	1495	free_TightDataPointStorageD(tdps);
	1496	}
	1497
	1498	int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData,
	1499	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
	1500	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
	1501	{
	1502	int status = SZ_SCES;
	1503	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
	1504	double valueRangeSize = 0, medianValue = 0;
	1505
	1506	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
	1507	double max = min+valueRangeSize;
	1508	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1509
	1510	if(valueRangeSize <= realPrecision)
	1511	{
	1512	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
	1513	}
	1514	else
	1515	{
	1516	if(r5==0&&r4==0&&r3==0&&r2==0)
	1517	{
	1518	if(errBoundMode>=PW_REL)
	1519	{
[9ee2ce3]	1520	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
	1521	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
[2c47b73]	1522	}
	1523	else
	1524	SZ_compress_args_double_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1525	}
	1526	else if(r5==0&&r4==0&&r3==0)
	1527	{
	1528	if(errBoundMode>=PW_REL)
	1529	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(newByteData, oriData, realPrecision, r2, r1, outSize, min, max);
	1530	else
	1531	SZ_compress_args_double_NoCkRngeNoGzip_2D(newByteData, oriData, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1532	}
	1533	else if(r5==0&&r4==0)
	1534	{
	1535	if(errBoundMode>=PW_REL)
	1536	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r3, r2, r1, outSize, min, max);
	1537	else
	1538	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1539	}
	1540	else if(r5==0)
	1541	{
	1542	if(errBoundMode>=PW_REL)
	1543	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(newByteData, oriData, realPrecision, r4*r3, r2, r1, outSize, min, max);
	1544	else
	1545	SZ_compress_args_double_NoCkRngeNoGzip_3D(newByteData, oriData, r4*r3, r2, r1, realPrecision, outSize, valueRangeSize, medianValue);
	1546	}
	1547	}
	1548	return status;
	1549	}
	1550
	1551	int SZ_compress_args_double(unsigned char** newByteData, double *oriData,
	1552	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
	1553	int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
	1554	{
	1555	confparams_cpr->errorBoundMode = errBoundMode;
	1556	if(errBoundMode==PW_REL)
	1557	{
	1558	confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
	1559	//confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
	1560	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE && r3 != 0 )
	1561	{
	1562	printf("Error: Current version doesn't support 3D data compression with point-wise relative error bound being based on pwrType=AVG\n");
	1563	exit(0);
	1564	return SZ_NSCS;
	1565	}
[9ee2ce3]	1566	}
[2c47b73]	1567
	1568	int status = SZ_SCES;
	1569	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
	1570
	1571	if(dataLength <= MIN_NUM_OF_ELEMENTS)
	1572	{
	1573	*newByteData = SZ_skip_compress_double(oriData, dataLength, outSize);
	1574	return status;
	1575	}
	1576
	1577	double valueRangeSize = 0, medianValue = 0;
	1578
	1579	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
	1580	double max = min+valueRangeSize;
	1581
	1582	double realPrecision = 0;
	1583
	1584	if(confparams_cpr->errorBoundMode==PSNR)
	1585	{
	1586	confparams_cpr->errorBoundMode = ABS;
	1587	realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromPSNR(confparams_cpr->psnr, (double)confparams_cpr->predThreshold, valueRangeSize);
	1588	}
	1589	else
	1590	realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1591
	1592	if(valueRangeSize <= realPrecision)
	1593	{
	1594	SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
	1595	}
	1596	else
	1597	{
	1598	size_t tmpOutSize = 0;
	1599	unsigned char* tmpByteData;
	1600	if (r2==0)
	1601	{
	1602	if(confparams_cpr->errorBoundMode>=PW_REL)
	1603	{
[9ee2ce3]	1604	SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r1, &tmpOutSize, min, max);
	1605	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(&tmpByteData, oriData, r1, absErr_Bound, relBoundRatio, pwRelBoundRatio, valueRangeSize, medianValue, &tmpOutSize);
[2c47b73]	1606	}
	1607	else
	1608	#ifdef HAVE_TIMECMPR
[9ee2ce3]	1609	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
[2c47b73]	1610	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1611	else
	1612	#endif
	1613	SZ_compress_args_double_NoCkRngeNoGzip_1D(&tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1614	}
	1615	else
	1616	if (r3==0)
	1617	{
	1618	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1619	SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1620	else
	1621	#ifdef HAVE_TIMECMPR
	1622	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1623	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1624	else
	1625	#endif
[9ee2ce3]	1626	{
	1627	if(sz_with_regression == SZ_NO_REGRESSION)
	1628	SZ_compress_args_double_NoCkRngeNoGzip_2D(&tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1629	else
	1630	tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
	1631	}
[2c47b73]	1632	}
	1633	else
	1634	if (r4==0)
	1635	{
	1636	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1637	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r3, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1638	else
	1639	#ifdef HAVE_TIMECMPR
	1640	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1641	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1642	else
	1643	#endif
[9ee2ce3]	1644	{
	1645	if(sz_with_regression == SZ_NO_REGRESSION)
	1646	SZ_compress_args_double_NoCkRngeNoGzip_3D(&tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1647	else
	1648	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
	1649	}
	1650
	1651
[2c47b73]	1652	}
	1653	else
	1654	if (r5==0)
	1655	{
	1656	if(confparams_cpr->errorBoundMode>=PW_REL)
[9ee2ce3]	1657	SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
[2c47b73]	1658	else
	1659	#ifdef HAVE_TIMECMPR
	1660	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
	1661	multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1662	else
[9ee2ce3]	1663	#endif
	1664	{
	1665	if(sz_with_regression == SZ_NO_REGRESSION)
	1666	SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
	1667	else
	1668	tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
	1669	}
	1670
[2c47b73]	1671	}
	1672	else
	1673	{
	1674	printf("Error: doesn't support 5 dimensions for now.\n");
	1675	status = SZ_DERR;
	1676	}
	1677
	1678	//Call Gzip to do the further compression.
	1679	if(confparams_cpr->szMode==SZ_BEST_SPEED)
	1680	{
	1681	*outSize = tmpOutSize;
	1682	*newByteData = tmpByteData;
	1683	}
	1684	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1685	{
[9ee2ce3]	1686	*outSize = sz_lossless_compress(confparams_cpr->losslessCompressor, confparams_cpr->gzipMode, tmpByteData, tmpOutSize, newByteData);
[2c47b73]	1687	free(tmpByteData);
	1688	}
	1689	else
	1690	{
	1691	printf("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1692	status = SZ_MERR;
	1693	}
	1694	}
	1695
	1696	return status;
	1697	}
	1698
	1699	//TODO
	1700	int SZ_compress_args_double_subblock(unsigned char* compressedBytes, double *oriData,
	1701	size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
	1702	size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
	1703	size_t e5, size_t e4, size_t e3, size_t e2, size_t e1,
	1704	size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio)
	1705	{
	1706	int status = SZ_SCES;
	1707	double valueRangeSize = 0, medianValue = 0;
	1708	computeRangeSize_double_subblock(oriData, &valueRangeSize, &medianValue, r5, r4, r3, r2, r1, s5, s4, s3, s2, s1, e5, e4, e3, e2, e1);
	1709
	1710	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
	1711
	1712	if(valueRangeSize <= realPrecision)
	1713	{
	1714	//TODO
	1715	//SZ_compress_args_double_withinRange_subblock();
	1716	}
	1717	else
	1718	{
	1719	if (r2==0)
	1720	{
	1721	//TODO
	1722	if(errBoundMode==PW_REL)
	1723	{
	1724	//TODO
	1725	//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_subblock();
	1726	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1727	}
	1728	else
	1729	SZ_compress_args_double_NoCkRnge_1D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r1, s1, e1);
	1730	}
	1731	else
	1732	if (r3==0)
	1733	{
	1734	if(errBoundMode==PW_REL)
	1735	{
	1736	//TODO
	1737	//SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_subblock();
	1738	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1739	}
	1740	else
	1741	SZ_compress_args_double_NoCkRnge_2D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r2, r1, s2, s1, e2, e1);
	1742	}
	1743	else
	1744	if (r4==0)
	1745	{
	1746	if(errBoundMode==PW_REL)
	1747	{
	1748	//TODO
	1749	//SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_subblock();
	1750	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1751	}
	1752	else
	1753	SZ_compress_args_double_NoCkRnge_3D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r3, r2, r1, s3, s2, s1, e3, e2, e1);
	1754	}
	1755	else
	1756	if (r5==0)
	1757	{
	1758	if(errBoundMode==PW_REL)
	1759	{
	1760	//TODO
	1761	//SZ_compress_args_double_NoCkRngeNoGzip_4D_pwr_subblock();
	1762	printf ("Current subblock version does not support point-wise relative error bound.\n");
	1763	}
	1764	else
	1765	SZ_compress_args_double_NoCkRnge_4D_subblock(compressedBytes, oriData, realPrecision, outSize, valueRangeSize, medianValue, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
	1766	}
	1767	else
	1768	{
	1769	printf("Error: doesn't support 5 dimensions for now.\n");
	1770	status = SZ_DERR; //dimension error
	1771	}
	1772	}
	1773	return status;
	1774	}
	1775
	1776	void SZ_compress_args_double_NoCkRnge_1D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1777	size_t r1, size_t s1, size_t e1)
	1778	{
	1779	TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r1, s1, e1);
	1780
	1781	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1782	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1783	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1784	{
	1785	unsigned char *tmpCompBytes;
	1786	size_t tmpOutSize;
	1787	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1788	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1789	free(tmpCompBytes);
	1790	}
	1791	else
	1792	{
	1793	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1794	}
	1795
	1796	//TODO
	1797	// if(outSize>dataLengthsizeof(double))
	1798	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1799
	1800	free_TightDataPointStorageD(tdps);
	1801	}
	1802
	1803	void SZ_compress_args_double_NoCkRnge_2D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1804	size_t r2, size_t r1, size_t s2, size_t s1, size_t e2, size_t e1)
	1805	{
	1806	TightDataPointStorageD* tdps = SZ_compress_double_2D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r2, r1, s2, s1, e2, e1);
	1807
	1808	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1809	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1810	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1811	{
	1812	unsigned char *tmpCompBytes;
	1813	size_t tmpOutSize;
	1814	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1815	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1816	free(tmpCompBytes);
	1817	}
	1818	else
	1819	{
	1820	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1821	}
	1822
	1823	//TODO
	1824	// if(outSize>dataLengthsizeof(double))
	1825	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1826
	1827	free_TightDataPointStorageD(tdps);
	1828	}
	1829
	1830	void SZ_compress_args_double_NoCkRnge_3D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1831	size_t r3, size_t r2, size_t r1, size_t s3, size_t s2, size_t s1, size_t e3, size_t e2, size_t e1)
	1832	{
	1833	TightDataPointStorageD* tdps = SZ_compress_double_3D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r3, r2, r1, s3, s2, s1, e3, e2, e1);
	1834
	1835	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1836	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1837	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1838	{
	1839	unsigned char *tmpCompBytes;
	1840	size_t tmpOutSize;
	1841	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1842	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1843	free(tmpCompBytes);
	1844	}
	1845	else
	1846	{
	1847	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1848	}
	1849
	1850	//TODO
	1851	// if(outSize>dataLengthsizeof(double))
	1852	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1853
	1854	free_TightDataPointStorageD(tdps);
	1855	}
	1856
	1857	void SZ_compress_args_double_NoCkRnge_4D_subblock(unsigned char* compressedBytes, double oriData, double realPrecision, size_t outSize, double valueRangeSize, double medianValue_d,
	1858	size_t r4, size_t r3, size_t r2, size_t r1, size_t s4, size_t s3, size_t s2, size_t s1, size_t e4, size_t e3, size_t e2, size_t e1)
	1859	{
	1860	TightDataPointStorageD* tdps = SZ_compress_double_4D_MDQ_subblock(oriData, realPrecision, valueRangeSize, medianValue_d, r4, r3, r2, r1, s4, s3, s2, s1, e4, e3, e2, e1);
	1861
	1862	if (confparams_cpr->szMode==SZ_BEST_SPEED)
	1863	convertTDPStoFlatBytes_double_args(tdps, compressedBytes, outSize);
	1864	else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION \|\| confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION)
	1865	{
	1866	unsigned char *tmpCompBytes;
	1867	size_t tmpOutSize;
	1868	convertTDPStoFlatBytes_double(tdps, &tmpCompBytes, &tmpOutSize);
	1869	*outSize = zlib_compress3(tmpCompBytes, tmpOutSize, compressedBytes, confparams_cpr->gzipMode);
	1870	free(tmpCompBytes);
	1871	}
	1872	else
	1873	{
	1874	printf ("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
	1875	}
	1876
	1877	//TODO
	1878	// if(outSize>dataLengthsizeof(double))
	1879	// SZ_compress_args_double_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
	1880
	1881	free_TightDataPointStorageD(tdps);
	1882	}
	1883
	1884
	1885	unsigned int optimize_intervals_double_1D_subblock(double *oriData, double realPrecision, size_t r1, size_t s1, size_t e1)
	1886	{
	1887	size_t dataLength = e1 - s1 + 1;
	1888	oriData = oriData + s1;
	1889
	1890	size_t i = 0;
	1891	unsigned long radiusIndex;
	1892	double pred_value = 0, pred_err;
	1893	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	1894	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	1895	size_t totalSampleSize = dataLength/confparams_cpr->sampleDistance;
	1896	for(i=2;i<dataLength;i++)
	1897	{
	1898	if(i%confparams_cpr->sampleDistance==0)
	1899	{
	1900	pred_value = 2*oriData[i-1] - oriData[i-2];
	1901	//pred_value = oriData[i-1];
	1902	pred_err = fabs(pred_value - oriData[i]);
	1903	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	1904	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	1905	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	1906	intervals[radiusIndex]++;
	1907	}
	1908	}
	1909	//compute the appropriate number
	1910	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	1911	size_t sum = 0;
	1912	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	1913	{
	1914	sum += intervals[i];
	1915	if(sum>targetCount)
	1916	break;
	1917	}
	1918
	1919	if(i>=confparams_cpr->maxRangeRadius)
	1920	i = confparams_cpr->maxRangeRadius-1;
	1921	unsigned int accIntervals = 2*(i+1);
	1922	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	1923
	1924	if(powerOf2<32)
	1925	powerOf2 = 32;
	1926
	1927	free(intervals);
	1928	return powerOf2;
	1929	}
	1930
	1931	unsigned int optimize_intervals_double_2D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
	1932	{
	1933	size_t R1 = e1 - s1 + 1;
	1934	size_t R2 = e2 - s2 + 1;
	1935
	1936	size_t i,j, index;
	1937	unsigned long radiusIndex;
	1938	double pred_value = 0, pred_err;
	1939	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	1940	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	1941	size_t totalSampleSize = R1*R2/confparams_cpr->sampleDistance;
	1942	for(i=s1+1;i<=e1;i++)
	1943	{
	1944	for(j=s2+1;j<=e2;j++)
	1945	{
	1946	if((i+j)%confparams_cpr->sampleDistance==0)
	1947	{
	1948	index = i*r2+j;
	1949	pred_value = oriData[index-1] + oriData[index-r2] - oriData[index-r2-1];
	1950	pred_err = fabs(pred_value - oriData[index]);
	1951	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	1952	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	1953	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	1954	intervals[radiusIndex]++;
	1955	}
	1956	}
	1957	}
	1958	//compute the appropriate number
	1959	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	1960	size_t sum = 0;
	1961	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	1962	{
	1963	sum += intervals[i];
	1964	if(sum>targetCount)
	1965	break;
	1966	}
	1967	if(i>=confparams_cpr->maxRangeRadius)
	1968	i = confparams_cpr->maxRangeRadius-1;
	1969	unsigned int accIntervals = 2*(i+1);
	1970	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	1971
	1972	if(powerOf2<32)
	1973	powerOf2 = 32;
	1974
	1975	free(intervals);
	1976	return powerOf2;
	1977	}
	1978
	1979	unsigned int optimize_intervals_double_3D_subblock(double *oriData, double realPrecision, size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
	1980	{
	1981	size_t R1 = e1 - s1 + 1;
	1982	size_t R2 = e2 - s2 + 1;
	1983	size_t R3 = e3 - s3 + 1;
	1984
	1985	size_t r23 = r2*r3;
	1986
	1987	size_t i,j,k, index;
	1988	unsigned long radiusIndex;
	1989	double pred_value = 0, pred_err;
	1990	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	1991	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	1992	size_t totalSampleSize = R1R2R3/confparams_cpr->sampleDistance;
	1993	for(i=s1+1;i<=e1;i++)
	1994	{
	1995	for(j=s2+1;j<=e2;j++)
	1996	{
	1997	for(k=s3+1;k<=e3;k++)
	1998	{
	1999	if((i+j+k)%confparams_cpr->sampleDistance==0)
	2000	{
	2001	index = ir23+jr3+k;
	2002	pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
	2003	- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
	2004	pred_err = fabs(pred_value - oriData[index]);
	2005	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2006	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2007	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2008	intervals[radiusIndex]++;
	2009	}
	2010	}
	2011
	2012	}
	2013	}
	2014	//compute the appropriate number
	2015	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2016	size_t sum = 0;
	2017	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2018	{
	2019	sum += intervals[i];
	2020	if(sum>targetCount)
	2021	break;
	2022	}
	2023	if(i>=confparams_cpr->maxRangeRadius)
	2024	i = confparams_cpr->maxRangeRadius-1;
	2025
	2026	unsigned int accIntervals = 2*(i+1);
	2027	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2028
	2029	if(powerOf2<32)
	2030	powerOf2 = 32;
	2031
	2032	free(intervals);
	2033	return powerOf2;
	2034	}
	2035
	2036	unsigned int optimize_intervals_double_4D_subblock(double *oriData, double realPrecision,
	2037	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
	2038	{
	2039	size_t R1 = e1 - s1 + 1;
	2040	size_t R2 = e2 - s2 + 1;
	2041	size_t R3 = e3 - s3 + 1;
	2042	size_t R4 = e4 - s4 + 1;
	2043
	2044	size_t r34 = r3*r4;
	2045	size_t r234 = r2r3r4;
	2046
	2047	size_t i,j,k,l, index;
	2048	unsigned long radiusIndex;
	2049	double pred_value = 0, pred_err;
	2050	int intervals = (int)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
	2051	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(int));
	2052	size_t totalSampleSize = R1R2R3*R4/confparams_cpr->sampleDistance;
	2053	for(i=s1+1;i<=e1;i++)
	2054	{
	2055	for(j=s2+1;j<=e2;j++)
	2056	{
	2057	for(k=s3+1;k<=e3;k++)
	2058	{
	2059	for(l=s4+1;l<=e4;l++)
	2060	{
	2061	if((i+j+k+l)%confparams_cpr->sampleDistance==0)
	2062	{
	2063	index = ir234+jr34+k*r4+l;
	2064	pred_value = oriData[index-1] + oriData[index-r4] + oriData[index-r34]
	2065	- oriData[index-1-r34] - oriData[index-r4-1] - oriData[index-r4-r34] + oriData[index-r4-r34-1];
	2066	pred_err = fabs(pred_value - oriData[index]);
	2067	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	2068	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	2069	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	2070	intervals[radiusIndex]++;
	2071	}
	2072	}
	2073	}
	2074
	2075	}
	2076	}
	2077	//compute the appropriate number
	2078	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	2079	size_t sum = 0;
	2080	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	2081	{
	2082	sum += intervals[i];
	2083	if(sum>targetCount)
	2084	break;
	2085	}
	2086	if(i>=confparams_cpr->maxRangeRadius)
	2087	i = confparams_cpr->maxRangeRadius-1;
	2088
	2089	unsigned int accIntervals = 2*(i+1);
	2090	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	2091
	2092	if(powerOf2<32)
	2093	powerOf2 = 32;
	2094
	2095	free(intervals);
	2096	return powerOf2;
	2097	}
	2098
	2099	TightDataPointStorageD* SZ_compress_double_1D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2100	size_t r1, size_t s1, size_t e1)
	2101	{
	2102	size_t dataLength = e1 - s1 + 1;
	2103
	2104	unsigned int quantization_intervals;
	2105	if(exe_params->optQuantMode==1)
	2106	quantization_intervals = optimize_intervals_double_1D_subblock(oriData, realPrecision, r1, s1, e1);
	2107	else
	2108	quantization_intervals = exe_params->intvCapacity;
	2109	updateQuantizationInfo(quantization_intervals);
	2110
	2111	size_t i;
	2112	int reqLength;
	2113	double medianValue = medianValue_d;
	2114	short radExpo = getExponent_double(valueRangeSize/2);
	2115
	2116	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2117
	2118	int* type = (int) malloc(dataLengthsizeof(int));
	2119
	2120	double* spaceFillingValue = oriData + s1; //
	2121
	2122	DynamicIntArray *exactLeadNumArray;
	2123	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2124
	2125	DynamicByteArray *exactMidByteArray;
	2126	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2127
	2128	DynamicIntArray *resiBitArray;
	2129	new_DIA(&resiBitArray, DynArrayInitLen);
	2130
	2131	type[0] = 0;
	2132
	2133	unsigned char preDataBytes[8];
	2134	longToBytes_bigEndian(preDataBytes, 0);
	2135
	2136	int reqBytesLength = reqLength/8;
	2137	int resiBitsLength = reqLength%8;
	2138	double last3CmprsData[3] = {0};
	2139
	2140	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2141	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2142
	2143	//add the first data
	2144	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2145	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2146	memcpy(preDataBytes,vce->curBytes,8);
	2147	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2148	listAdd_double(last3CmprsData, vce->data);
	2149
	2150	//add the second data
	2151	type[1] = 0;
	2152	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2153	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2154	memcpy(preDataBytes,vce->curBytes,8);
	2155	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2156	listAdd_double(last3CmprsData, vce->data);
	2157
	2158	int state;
	2159	double checkRadius;
	2160	double curData;
	2161	double pred;
	2162	double predAbsErr;
	2163	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
	2164	double interval = 2*realPrecision;
	2165
	2166	for(i=2;i<dataLength;i++)
	2167	{
	2168	//printf("%.30G\n",last3CmprsData[0]);
	2169	curData = spaceFillingValue[i];
	2170	pred = 2*last3CmprsData[0] - last3CmprsData[1];
	2171	//pred = last3CmprsData[0];
	2172	predAbsErr = fabs(curData - pred);
	2173	if(predAbsErr<=checkRadius)
	2174	{
	2175	state = (predAbsErr/realPrecision+1)/2;
	2176	if(curData>=pred)
	2177	{
	2178	type[i] = exe_params->intvRadius+state;
	2179	pred = pred + state*interval;
	2180	}
	2181	else //curData<pred
	2182	{
	2183	type[i] = exe_params->intvRadius-state;
	2184	pred = pred - state*interval;
	2185	}
	2186	listAdd_double(last3CmprsData, pred);
	2187	continue;
	2188	}
	2189
	2190	//unpredictable data processing
	2191	type[i] = 0;
	2192	compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2193	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2194	memcpy(preDataBytes,vce->curBytes,8);
	2195	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2196
	2197	listAdd_double(last3CmprsData, vce->data);
	2198	}//end of for
	2199
	2200	size_t exactDataNum = exactLeadNumArray->size;
	2201
	2202	TightDataPointStorageD* tdps;
	2203
	2204	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	2205	type, exactMidByteArray->array, exactMidByteArray->size,
	2206	exactLeadNumArray->array,
	2207	resiBitArray->array, resiBitArray->size,
	2208	resiBitsLength,
	2209	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2210
	2211	//free memory
	2212	free_DIA(exactLeadNumArray);
	2213	free_DIA(resiBitArray);
	2214	free(type);
	2215	free(vce);
	2216	free(lce);
	2217	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2218
	2219	return tdps;
	2220	}
	2221
	2222
	2223	TightDataPointStorageD* SZ_compress_double_2D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2224	size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
	2225	{
	2226	unsigned int quantization_intervals;
	2227	if(exe_params->optQuantMode==1)
	2228	{
	2229	quantization_intervals = optimize_intervals_double_2D_subblock(oriData, realPrecision, r1, r2, s1, s2, e1, e2);
	2230	updateQuantizationInfo(quantization_intervals);
	2231	}
	2232	else
	2233	quantization_intervals = exe_params->intvCapacity;
	2234
	2235	size_t i,j;
	2236	int reqLength;
	2237	double pred1D, pred2D;
	2238	double diff = 0.0;
	2239	double itvNum = 0;
	2240	double P0, P1;
	2241
	2242	size_t R1 = e1 - s1 + 1;
	2243	size_t R2 = e2 - s2 + 1;
	2244	size_t dataLength = R1*R2;
	2245
	2246	P0 = (double)malloc(R2sizeof(double));
	2247	memset(P0, 0, R2*sizeof(double));
	2248	P1 = (double)malloc(R2sizeof(double));
	2249	memset(P1, 0, R2*sizeof(double));
	2250
	2251	double medianValue = medianValue_d;
	2252	short radExpo = getExponent_double(valueRangeSize/2);
	2253	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2254
	2255	int* type = (int) malloc(dataLengthsizeof(int));
	2256
	2257	double* spaceFillingValue = oriData; //
	2258
	2259	DynamicIntArray *exactLeadNumArray;
	2260	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2261
	2262	DynamicByteArray *exactMidByteArray;
	2263	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2264
	2265	DynamicIntArray *resiBitArray;
	2266	new_DIA(&resiBitArray, DynArrayInitLen);
	2267
	2268	unsigned char preDataBytes[8];
	2269	longToBytes_bigEndian(preDataBytes, 0);
	2270
	2271	int reqBytesLength = reqLength/8;
	2272	int resiBitsLength = reqLength%8;
	2273
	2274	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2275	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2276
	2277	/* Process Row-s1 data s2*/
	2278	size_t gIndex;
	2279	size_t lIndex;
	2280
	2281	gIndex = s1*r2+s2;
	2282	lIndex = 0;
	2283
	2284	type[lIndex] = 0;
	2285	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2286	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2287	memcpy(preDataBytes,vce->curBytes,8);
	2288	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2289	P1[0] = vce->data;
	2290
	2291	/* Process Row-s1 data s2+1*/
	2292	gIndex = s1*r2+(s2+1);
	2293	lIndex = 1;
	2294
	2295	pred1D = P1[0];
	2296	diff = spaceFillingValue[gIndex] - pred1D;
	2297
	2298	itvNum = fabs(diff)/realPrecision + 1;
	2299
	2300	if (itvNum < exe_params->intvCapacity)
	2301	{
	2302	if (diff < 0) itvNum = -itvNum;
	2303	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2304	P1[1] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2305	}
	2306	else
	2307	{
	2308	type[lIndex] = 0;
	2309	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2310	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2311	memcpy(preDataBytes,vce->curBytes,8);
	2312	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2313	P1[1] = vce->data;
	2314	}
	2315
	2316	/* Process Row-s1 data s2+2 --> data e2 */
	2317	for (j = 2; j < R2; j++)
	2318	{
	2319	gIndex = s1*r2+(s2+j);
	2320	lIndex = j;
	2321
	2322	pred1D = 2*P1[j-1] - P1[j-2];
	2323	diff = spaceFillingValue[gIndex] - pred1D;
	2324
	2325	itvNum = fabs(diff)/realPrecision + 1;
	2326
	2327	if (itvNum < exe_params->intvCapacity)
	2328	{
	2329	if (diff < 0) itvNum = -itvNum;
	2330	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2331	P1[j] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2332	}
	2333	else
	2334	{
	2335	type[lIndex] = 0;
	2336	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2337	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2338	memcpy(preDataBytes,vce->curBytes,8);
	2339	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2340	P1[j] = vce->data;
	2341	}
	2342	}
	2343
	2344	/* Process Row-s1+1 --> Row-e1 */
	2345	for (i = 1; i < R1; i++)
	2346	{
	2347	/* Process row-s1+i data s2 */
	2348	gIndex = (s1+i)*r2+s2;
	2349	lIndex = i*R2;
	2350
	2351	pred1D = P1[0];
	2352	diff = spaceFillingValue[gIndex] - pred1D;
	2353
	2354	itvNum = fabs(diff)/realPrecision + 1;
	2355
	2356	if (itvNum < exe_params->intvCapacity)
	2357	{
	2358	if (diff < 0) itvNum = -itvNum;
	2359	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2360	P0[0] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2361	}
	2362	else
	2363	{
	2364	type[lIndex] = 0;
	2365	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2366	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2367	memcpy(preDataBytes,vce->curBytes,8);
	2368	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2369	P0[0] = vce->data;
	2370	}
	2371
	2372	/* Process row-s1+i data s2+1 --> e2 */
	2373	for (j = 1; j < R2; j++)
	2374	{
	2375	gIndex = (s1+i)*r2+(s2+j);
	2376	lIndex = i*R2+j;
	2377
	2378	pred2D = P0[j-1] + P1[j] - P1[j-1];
	2379	diff = spaceFillingValue[gIndex] - pred2D;
	2380
	2381	itvNum = fabs(diff)/realPrecision + 1;
	2382
	2383	if (itvNum < exe_params->intvCapacity)
	2384	{
	2385	if (diff < 0) itvNum = -itvNum;
	2386	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2387	P0[j] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2388	}
	2389	else
	2390	{
	2391	type[lIndex] = 0;
	2392	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2393	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2394	memcpy(preDataBytes,vce->curBytes,8);
	2395	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2396	P0[j] = vce->data;
	2397	}
	2398	}
	2399
	2400	double *Pt;
	2401	Pt = P1;
	2402	P1 = P0;
	2403	P0 = Pt;
	2404	}
	2405
	2406	free(P0);
	2407	free(P1);
	2408	size_t exactDataNum = exactLeadNumArray->size;
	2409
	2410	TightDataPointStorageD* tdps;
	2411
	2412	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	2413	type, exactMidByteArray->array, exactMidByteArray->size,
	2414	exactLeadNumArray->array,
	2415	resiBitArray->array, resiBitArray->size,
	2416	resiBitsLength,
	2417	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2418
	2419	//free memory
	2420	free_DIA(exactLeadNumArray);
	2421	free_DIA(resiBitArray);
	2422	free(type);
	2423	free(vce);
	2424	free(lce);
	2425	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2426
	2427	return tdps;
	2428	}
	2429
	2430	TightDataPointStorageD* SZ_compress_double_3D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2431	size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, size_t e2, size_t e3)
	2432	{
	2433	unsigned int quantization_intervals;
	2434	if(exe_params->optQuantMode==1)
	2435	{
	2436	quantization_intervals = optimize_intervals_double_3D_subblock(oriData, realPrecision, r1, r2, r3, s1, s2, s3, e1, e2, e3);
	2437	updateQuantizationInfo(quantization_intervals);
	2438	}
	2439	else
	2440	quantization_intervals = exe_params->intvCapacity;
	2441
	2442	size_t i,j,k;
	2443	int reqLength;
	2444	double pred1D, pred2D, pred3D;
	2445	double diff = 0.0;
	2446	double itvNum = 0;
	2447	double P0, P1;
	2448
	2449	size_t R1 = e1 - s1 + 1;
	2450	size_t R2 = e2 - s2 + 1;
	2451	size_t R3 = e3 - s3 + 1;
	2452	size_t dataLength = R1R2R3;
	2453
	2454	size_t r23 = r2*r3;
	2455	size_t R23 = R2*R3;
	2456
	2457	P0 = (double)malloc(R23sizeof(double));
	2458	P1 = (double)malloc(R23sizeof(double));
	2459
	2460	double medianValue = medianValue_d;
	2461	short radExpo = getExponent_double(valueRangeSize/2);
	2462	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2463
	2464	int* type = (int) malloc(dataLengthsizeof(int));
	2465
	2466	double* spaceFillingValue = oriData; //
	2467
	2468	DynamicIntArray *exactLeadNumArray;
	2469	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2470
	2471	DynamicByteArray *exactMidByteArray;
	2472	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2473
	2474	DynamicIntArray *resiBitArray;
	2475	new_DIA(&resiBitArray, DynArrayInitLen);
	2476
	2477	unsigned char preDataBytes[8];
	2478	longToBytes_bigEndian(preDataBytes, 0);
	2479
	2480	int reqBytesLength = reqLength/8;
	2481	int resiBitsLength = reqLength%8;
	2482
	2483	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2484	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2485
	2486
	2487	/////////////////////////// Process layer-s1 ///////////////////////////
	2488	/* Process Row-s2 data s3*/
	2489	size_t gIndex; //global index
	2490	size_t lIndex; //local index
	2491	size_t index2D; //local 2D index
	2492
	2493	gIndex = s1r23+s2r3+s3;
	2494	lIndex = 0;
	2495	index2D = 0;
	2496
	2497	type[lIndex] = 0;
	2498	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2499	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2500	memcpy(preDataBytes,vce->curBytes,8);
	2501	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2502	P1[index2D] = vce->data;
	2503
	2504	/* Process Row-s2 data s3+1*/
	2505	gIndex = s1r23+s2r3+s3+1;
	2506	lIndex = 1;
	2507	index2D = 1;
	2508
	2509	pred1D = P1[index2D-1];
	2510	diff = spaceFillingValue[gIndex] - pred1D;
	2511
	2512	itvNum = fabs(diff)/realPrecision + 1;
	2513
	2514	if (itvNum < exe_params->intvCapacity)
	2515	{
	2516	if (diff < 0) itvNum = -itvNum;
	2517	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2518	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2519	}
	2520	else
	2521	{
	2522	type[lIndex] = 0;
	2523	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2524	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2525	memcpy(preDataBytes,vce->curBytes,8);
	2526	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2527	P1[index2D] = vce->data;
	2528	}
	2529
	2530	/* Process Row-s2 data s3+2 --> data e3 */
	2531	for (j = 2; j < R3; j++)
	2532	{
	2533	gIndex = s1r23+s2r3+s3+j;
	2534	lIndex = j;
	2535	index2D = j;
	2536
	2537	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	2538	diff = spaceFillingValue[gIndex] - pred1D;
	2539
	2540	itvNum = fabs(diff)/realPrecision + 1;
	2541
	2542	if (itvNum < exe_params->intvCapacity)
	2543	{
	2544	if (diff < 0) itvNum = -itvNum;
	2545	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2546	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2547	}
	2548	else
	2549	{
	2550	type[lIndex] = 0;
	2551	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2552	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2553	memcpy(preDataBytes,vce->curBytes,8);
	2554	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2555	P1[index2D] = vce->data;
	2556	}
	2557	}
	2558
	2559	/* Process Row-s2+1 --> Row-e2 */
	2560	for (i = 1; i < R2; i++)
	2561	{
	2562	/* Process row-s2+i data s3 */
	2563	gIndex = s1r23+(s2+i)r3+s3;
	2564	lIndex = i*R3;
	2565	index2D = i*R3;
	2566
	2567	pred1D = P1[index2D-R3];
	2568	diff = spaceFillingValue[gIndex] - pred1D;
	2569
	2570	itvNum = fabs(diff)/realPrecision + 1;
	2571
	2572	if (itvNum < exe_params->intvCapacity)
	2573	{
	2574	if (diff < 0) itvNum = -itvNum;
	2575	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2576	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2577	}
	2578	else
	2579	{
	2580	type[lIndex] = 0;
	2581	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2582	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2583	memcpy(preDataBytes,vce->curBytes,8);
	2584	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2585	P1[index2D] = vce->data;
	2586	}
	2587
	2588	/* Process row-s2+i data s3+1 --> data e3*/
	2589	for (j = 1; j < R3; j++)
	2590	{
	2591	gIndex = s1r23+(s2+i)r3+s3+j;
	2592	lIndex = i*R3+j;
	2593	index2D = i*R3+j;
	2594
	2595	pred2D = P1[index2D-1] + P1[index2D-R3] - P1[index2D-R3-1];
	2596	diff = spaceFillingValue[gIndex] - pred2D;
	2597
	2598	itvNum = fabs(diff)/realPrecision + 1;
	2599
	2600	if (itvNum < exe_params->intvCapacity)
	2601	{
	2602	if (diff < 0) itvNum = -itvNum;
	2603	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2604	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2605	}
	2606	else
	2607	{
	2608	type[lIndex] = 0;
	2609	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2610	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2611	memcpy(preDataBytes,vce->curBytes,8);
	2612	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2613	P1[index2D] = vce->data;
	2614	}
	2615	}
	2616	}
	2617
	2618
	2619	/////////////////////////// Process layer-s1+1 --> layer-e1 ///////////////////////////
	2620
	2621	for (k = 1; k < R1; k++)
	2622	{
	2623	/* Process Row-s2 data s3*/
	2624	gIndex = (s1+k)r23+s2r3+s3;
	2625	lIndex = k*R23;
	2626	index2D = 0;
	2627
	2628	pred1D = P1[index2D];
	2629	diff = spaceFillingValue[gIndex] - pred1D;
	2630
	2631	itvNum = fabs(diff)/realPrecision + 1;
	2632
	2633	if (itvNum < exe_params->intvCapacity)
	2634	{
	2635	if (diff < 0) itvNum = -itvNum;
	2636	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2637	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2638	}
	2639	else
	2640	{
	2641	type[lIndex] = 0;
	2642	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2643	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2644	memcpy(preDataBytes,vce->curBytes,8);
	2645	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2646	P0[index2D] = vce->data;
	2647	}
	2648
	2649
	2650	/* Process Row-s2 data s3+1 --> data e3 */
	2651	for (j = 1; j < R3; j++)
	2652	{
	2653	gIndex = (s1+k)r23+s2r3+s3+j;
	2654	lIndex = k*R23+j;
	2655	index2D = j;
	2656
	2657	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	2658	diff = spaceFillingValue[gIndex] - pred2D;
	2659
	2660	itvNum = fabs(diff)/realPrecision + 1;
	2661
	2662	if (itvNum < exe_params->intvCapacity)
	2663	{
	2664	if (diff < 0) itvNum = -itvNum;
	2665	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2666	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2667	}
	2668	else
	2669	{
	2670	type[lIndex] = 0;
	2671	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2672	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2673	memcpy(preDataBytes,vce->curBytes,8);
	2674	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2675	P0[index2D] = vce->data;
	2676	}
	2677	}
	2678
	2679	/* Process Row-s2+1 --> Row-e2 */
	2680	for (i = 1; i < R2; i++)
	2681	{
	2682	/* Process Row-s2+i data s3 */
	2683	gIndex = (s1+k)r23+(s2+i)r3+s3;
	2684	lIndex = kR23+iR3;
	2685	index2D = i*R3;
	2686
	2687	pred2D = P0[index2D-R3] + P1[index2D] - P1[index2D-R3];
	2688	diff = spaceFillingValue[gIndex] - pred2D;
	2689
	2690	itvNum = fabs(diff)/realPrecision + 1;
	2691
	2692	if (itvNum < exe_params->intvCapacity)
	2693	{
	2694	if (diff < 0) itvNum = -itvNum;
	2695	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2696	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2697	}
	2698	else
	2699	{
	2700	type[lIndex] = 0;
	2701	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2702	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2703	memcpy(preDataBytes,vce->curBytes,8);
	2704	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2705	P0[index2D] = vce->data;
	2706	}
	2707
	2708	/* Process Row-s2+i data s3+1 --> data e3 */
	2709	for (j = 1; j < R3; j++)
	2710	{
	2711	gIndex = (s1+k)r23+(s2+i)r3+s3+j;
	2712	lIndex = kR23+iR3+j;
	2713	index2D = i*R3+j;
	2714
	2715	pred3D = P0[index2D-1] + P0[index2D-R3]+ P1[index2D] - P0[index2D-R3-1] - P1[index2D-R3] - P1[index2D-1] + P1[index2D-R3-1];
	2716	diff = spaceFillingValue[gIndex] - pred3D;
	2717
	2718	itvNum = fabs(diff)/realPrecision + 1;
	2719
	2720	if (itvNum < exe_params->intvCapacity)
	2721	{
	2722	if (diff < 0) itvNum = -itvNum;
	2723	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2724	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2725	}
	2726	else
	2727	{
	2728	type[lIndex] = 0;
	2729	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2730	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2731	memcpy(preDataBytes,vce->curBytes,8);
	2732	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2733	P0[index2D] = vce->data;
	2734	}
	2735	}
	2736	}
	2737
	2738	double *Pt;
	2739	Pt = P1;
	2740	P1 = P0;
	2741	P0 = Pt;
	2742	}
	2743
	2744	free(P0);
	2745	free(P1);
	2746	size_t exactDataNum = exactLeadNumArray->size;
	2747
	2748	TightDataPointStorageD* tdps;
	2749
	2750	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	2751	type, exactMidByteArray->array, exactMidByteArray->size,
	2752	exactLeadNumArray->array,
	2753	resiBitArray->array, resiBitArray->size,
	2754	resiBitsLength,
	2755	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	2756
	2757	//free memory
	2758	free_DIA(exactLeadNumArray);
	2759	free_DIA(resiBitArray);
	2760	free(type);
	2761	free(vce);
	2762	free(lce);
	2763	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	2764
	2765	return tdps;
	2766	}
	2767
	2768	TightDataPointStorageD* SZ_compress_double_4D_MDQ_subblock(double *oriData, double realPrecision, double valueRangeSize, double medianValue_d,
	2769	size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, size_t s4, size_t e1, size_t e2, size_t e3, size_t e4)
	2770	{
	2771	unsigned int quantization_intervals;
	2772	if(exe_params->optQuantMode==1)
	2773	{
	2774	quantization_intervals = optimize_intervals_double_4D_subblock(oriData, realPrecision, r1, r2, r3, r4, s1, s2, s3, s4, e1, e2, e3, e4);
	2775	updateQuantizationInfo(quantization_intervals);
	2776	}
	2777	else
	2778	quantization_intervals = exe_params->intvCapacity;
	2779
	2780	size_t i,j,k;
	2781	int reqLength;
	2782	double pred1D, pred2D, pred3D;
	2783	double diff = 0.0;
	2784	double itvNum = 0;
	2785	double P0, P1;
	2786
	2787	size_t R1 = e1 - s1 + 1;
	2788	size_t R2 = e2 - s2 + 1;
	2789	size_t R3 = e3 - s3 + 1;
	2790	size_t R4 = e4 - s4 + 1;
	2791
	2792	size_t dataLength = R1R2R3*R4;
	2793
	2794	size_t r34 = r3*r4;
	2795	size_t r234 = r2r3r4;
	2796	size_t R34 = R3*R4;
	2797	size_t R234 = R2R3R4;
	2798
	2799	P0 = (double)malloc(R34sizeof(double));
	2800	P1 = (double)malloc(R34sizeof(double));
	2801
	2802	double medianValue = medianValue_d;
	2803	short radExpo = getExponent_double(valueRangeSize/2);
	2804	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
	2805
	2806	int* type = (int) malloc(dataLengthsizeof(int));
	2807
	2808	double* spaceFillingValue = oriData; //
	2809
	2810	DynamicIntArray *exactLeadNumArray;
	2811	new_DIA(&exactLeadNumArray, DynArrayInitLen);
	2812
	2813	DynamicByteArray *exactMidByteArray;
	2814	new_DBA(&exactMidByteArray, DynArrayInitLen);
	2815
	2816	DynamicIntArray *resiBitArray;
	2817	new_DIA(&resiBitArray, DynArrayInitLen);
	2818
	2819	unsigned char preDataBytes[8];
	2820	longToBytes_bigEndian(preDataBytes, 0);
	2821
	2822	int reqBytesLength = reqLength/8;
	2823	int resiBitsLength = reqLength%8;
	2824
	2825	DoubleValueCompressElement vce = (DoubleValueCompressElement)malloc(sizeof(DoubleValueCompressElement));
	2826	LossyCompressionElement lce = (LossyCompressionElement)malloc(sizeof(LossyCompressionElement));
	2827
	2828	size_t l;
	2829	for (l = 0; l < R1; l++)
	2830	{
	2831
	2832	/////////////////////////// Process layer-s2 ///////////////////////////
	2833	/* Process Row-s3 data s4*/
	2834	size_t gIndex; //global index
	2835	size_t lIndex; //local index
	2836	size_t index2D; //local 2D index
	2837
	2838	gIndex = (s1+l)r234+s2r34+s3*r4+s4;
	2839	lIndex = l*R234;
	2840	index2D = 0;
	2841
	2842	type[lIndex] = 0;
	2843	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2844	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2845	memcpy(preDataBytes,vce->curBytes,8);
	2846	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2847	P1[index2D] = vce->data;
	2848
	2849	/* Process Row-s3 data s4+1*/
	2850	gIndex = (s1+l)r234+s2r34+s3*r4+s4+1;
	2851	lIndex = l*R234+1;
	2852	index2D = 1;
	2853
	2854	pred1D = P1[index2D-1];
	2855	diff = spaceFillingValue[gIndex] - pred1D;
	2856
	2857	itvNum = fabs(diff)/realPrecision + 1;
	2858
	2859	if (itvNum < exe_params->intvCapacity)
	2860	{
	2861	if (diff < 0) itvNum = -itvNum;
	2862	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2863	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2864	}
	2865	else
	2866	{
	2867	type[lIndex] = 0;
	2868	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2869	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2870	memcpy(preDataBytes,vce->curBytes,8);
	2871	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2872	P1[index2D] = vce->data;
	2873	}
	2874
	2875	/* Process Row-s3 data s4+2 --> data e4 */
	2876	for (j = 2; j < R4; j++)
	2877	{
	2878	gIndex = (s1+l)r234+s2r34+s3*r4+s4+j;
	2879	lIndex = l*R234+j;
	2880	index2D = j;
	2881
	2882	pred1D = 2*P1[index2D-1] - P1[index2D-2];
	2883	diff = spaceFillingValue[gIndex] - pred1D;
	2884
	2885	itvNum = fabs(diff)/realPrecision + 1;
	2886
	2887	if (itvNum < exe_params->intvCapacity)
	2888	{
	2889	if (diff < 0) itvNum = -itvNum;
	2890	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2891	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2892	}
	2893	else
	2894	{
	2895	type[lIndex] = 0;
	2896	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2897	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2898	memcpy(preDataBytes,vce->curBytes,8);
	2899	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2900	P1[index2D] = vce->data;
	2901	}
	2902	}
	2903
	2904	/* Process Row-s3+1 --> Row-e3 */
	2905	for (i = 1; i < R3; i++)
	2906	{
	2907	/* Process row-s2+i data s3 */
	2908	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4;
	2909	lIndex = lR234+iR4;
	2910	index2D = i*R4;
	2911
	2912	pred1D = P1[index2D-R4];
	2913	diff = spaceFillingValue[gIndex] - pred1D;
	2914
	2915	itvNum = fabs(diff)/realPrecision + 1;
	2916
	2917	if (itvNum < exe_params->intvCapacity)
	2918	{
	2919	if (diff < 0) itvNum = -itvNum;
	2920	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2921	P1[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2922	}
	2923	else
	2924	{
	2925	type[lIndex] = 0;
	2926	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2927	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2928	memcpy(preDataBytes,vce->curBytes,8);
	2929	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2930	P1[index2D] = vce->data;
	2931	}
	2932
	2933	/* Process row-s3+i data s4+1 --> data e4*/
	2934	for (j = 1; j < R4; j++)
	2935	{
	2936	gIndex = (s1+l)r234+s2r34+(s3+i)*r4+s4+j;
	2937	lIndex = lR234+iR4+j;
	2938	index2D = i*R4+j;
	2939
	2940	pred2D = P1[index2D-1] + P1[index2D-R4] - P1[index2D-R4-1];
	2941	diff = spaceFillingValue[gIndex] - pred2D;
	2942
	2943	itvNum = fabs(diff)/realPrecision + 1;
	2944
	2945	if (itvNum < exe_params->intvCapacity)
	2946	{
	2947	if (diff < 0) itvNum = -itvNum;
	2948	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2949	P1[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2950	}
	2951	else
	2952	{
	2953	type[lIndex] = 0;
	2954	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2955	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2956	memcpy(preDataBytes,vce->curBytes,8);
	2957	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2958	P1[index2D] = vce->data;
	2959	}
	2960	}
	2961	}
	2962
	2963
	2964	/////////////////////////// Process layer-s2+1 --> layer-e2 ///////////////////////////
	2965
	2966	for (k = 1; k < R2; k++)
	2967	{
	2968	/* Process Row-s3 data s4*/
	2969	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4;
	2970	lIndex = lR234+kR34;
	2971	index2D = 0;
	2972
	2973	pred1D = P1[index2D];
	2974	diff = spaceFillingValue[gIndex] - pred1D;
	2975
	2976	itvNum = fabs(diff)/realPrecision + 1;
	2977
	2978	if (itvNum < exe_params->intvCapacity)
	2979	{
	2980	if (diff < 0) itvNum = -itvNum;
	2981	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	2982	P0[index2D] = pred1D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	2983	}
	2984	else
	2985	{
	2986	type[lIndex] = 0;
	2987	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	2988	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	2989	memcpy(preDataBytes,vce->curBytes,8);
	2990	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	2991	P0[index2D] = vce->data;
	2992	}
	2993
	2994
	2995	/* Process Row-s3 data s4+1 --> data e4 */
	2996	for (j = 1; j < R4; j++)
	2997	{
	2998	gIndex = (s1+l)r234+(s2+k)r34+s3*r4+s4+j;
	2999	lIndex = lR234+kR34+j;
	3000	index2D = j;
	3001
	3002	pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1];
	3003	diff = spaceFillingValue[gIndex] - pred2D;
	3004
	3005	itvNum = fabs(diff)/realPrecision + 1;
	3006
	3007	if (itvNum < exe_params->intvCapacity)
	3008	{
	3009	if (diff < 0) itvNum = -itvNum;
	3010	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3011	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3012	}
	3013	else
	3014	{
	3015	type[lIndex] = 0;
	3016	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3017	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3018	memcpy(preDataBytes,vce->curBytes,8);
	3019	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3020	P0[index2D] = vce->data;
	3021	}
	3022	}
	3023
	3024	/* Process Row-s3+1 --> Row-e3 */
	3025	for (i = 1; i < R3; i++)
	3026	{
	3027	/* Process Row-s3+i data s4 */
	3028	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4;
	3029	lIndex = lR234+kR34+i*R4;
	3030	index2D = i*R4;
	3031
	3032	pred2D = P0[index2D-R4] + P1[index2D] - P1[index2D-R4];
	3033	diff = spaceFillingValue[gIndex] - pred2D;
	3034
	3035	itvNum = fabs(diff)/realPrecision + 1;
	3036
	3037	if (itvNum < exe_params->intvCapacity)
	3038	{
	3039	if (diff < 0) itvNum = -itvNum;
	3040	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3041	P0[index2D] = pred2D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3042	}
	3043	else
	3044	{
	3045	type[lIndex] = 0;
	3046	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3047	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3048	memcpy(preDataBytes,vce->curBytes,8);
	3049	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3050	P0[index2D] = vce->data;
	3051	}
	3052
	3053	/* Process Row-s3+i data s4+1 --> data e4 */
	3054	for (j = 1; j < R4; j++)
	3055	{
	3056	gIndex = (s1+l)r234+(s2+k)r34+(s3+i)*r4+s4+j;
	3057	lIndex = lR234+kR34+i*R4+j;
	3058	index2D = i*R4+j;
	3059
	3060	// printf ("global index = %d, local index = %d\n", gIndex, lIndex);
	3061
	3062	pred3D = P0[index2D-1] + P0[index2D-R4]+ P1[index2D] - P0[index2D-R4-1] - P1[index2D-R4] - P1[index2D-1] + P1[index2D-R4-1];
	3063	diff = spaceFillingValue[gIndex] - pred3D;
	3064
	3065	itvNum = fabs(diff)/realPrecision + 1;
	3066
	3067	if (itvNum < exe_params->intvCapacity)
	3068	{
	3069	if (diff < 0) itvNum = -itvNum;
	3070	type[lIndex] = (int) (itvNum/2) + exe_params->intvRadius;
	3071	P0[index2D] = pred3D + 2 * (type[lIndex] - exe_params->intvRadius) * realPrecision;
	3072	}
	3073	else
	3074	{
	3075	type[lIndex] = 0;
	3076	compressSingleDoubleValue(vce, spaceFillingValue[gIndex], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
	3077	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
	3078	memcpy(preDataBytes,vce->curBytes,8);
	3079	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
	3080	P0[index2D] = vce->data;
	3081	}
	3082	}
	3083	}
	3084
	3085	double *Pt;
	3086	Pt = P1;
	3087	P1 = P0;
	3088	P0 = Pt;
	3089	}
	3090	}
	3091
	3092	free(P0);
	3093	free(P1);
	3094	size_t exactDataNum = exactLeadNumArray->size;
	3095
	3096	TightDataPointStorageD* tdps;
	3097
	3098	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
	3099	type, exactMidByteArray->array, exactMidByteArray->size,
	3100	exactLeadNumArray->array,
	3101	resiBitArray->array, resiBitArray->size,
	3102	resiBitsLength,
	3103	realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
	3104
	3105	//free memory
	3106	free_DIA(exactLeadNumArray);
	3107	free_DIA(resiBitArray);
	3108	free(type);
	3109	free(vce);
	3110	free(lce);
	3111	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
	3112
	3113	return tdps;
	3114	}
	3115
	3116	/**
	3117	*
	3118	* This is a fast implementation for optimize_intervals_double_3D()
	3119	* */
	3120	unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision){
	3121	size_t i;
	3122	size_t radiusIndex;
	3123	size_t r23=r2*r3;
	3124	double pred_value = 0, pred_err;
	3125	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3126	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
	3127	size_t totalSampleSize = 0;
	3128
	3129	size_t offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
	3130	size_t offset_count_2;
	3131	double * data_pos = oriData + r23 + r3 + offset_count;
	3132	size_t n1_count = 1, n2_count = 1; // count i,j sum
	3133	size_t len = r1 * r2 * r3;
	3134	while(data_pos - oriData < len){
	3135	totalSampleSize++;
	3136	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
	3137	pred_err = fabs(pred_value - *data_pos);
	3138	radiusIndex = (pred_err/realPrecision+1)/2;
	3139	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3140	{
	3141	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3142	}
	3143	intervals[radiusIndex]++;
	3144	offset_count += confparams_cpr->sampleDistance;
	3145	if(offset_count >= r3){
	3146	n2_count ++;
	3147	if(n2_count == r2){
	3148	n1_count ++;
	3149	n2_count = 1;
	3150	data_pos += r3;
	3151	}
	3152	offset_count_2 = (n1_count + n2_count) % confparams_cpr->sampleDistance;
	3153	data_pos += (r3 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
	3154	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
	3155	if(offset_count == 0) offset_count ++;
	3156	}
	3157	else data_pos += confparams_cpr->sampleDistance;
	3158	}
	3159	//compute the appropriate number
	3160	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3161	size_t sum = 0;
	3162	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3163	{
	3164	sum += intervals[i];
	3165	if(sum>targetCount)
	3166	break;
	3167	}
	3168	if(i>=confparams_cpr->maxRangeRadius)
	3169	i = confparams_cpr->maxRangeRadius-1;
	3170	unsigned int accIntervals = 2*(i+1);
	3171	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3172
	3173	if(powerOf2<32)
	3174	powerOf2 = 32;
	3175	free(intervals);
	3176	return powerOf2;
	3177	}
	3178
	3179	unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision)
	3180	{
	3181	size_t i;
	3182	size_t radiusIndex;
	3183	double pred_value = 0, pred_err;
	3184	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3185	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
[9ee2ce3]	3186	size_t totalSampleSize = 0;
[2c47b73]	3187
	3188	size_t offset_count = confparams_cpr->sampleDistance - 1; // count r2 offset
	3189	size_t offset_count_2;
	3190	double * data_pos = oriData + r2 + offset_count;
	3191	size_t n1_count = 1; // count i sum
	3192	size_t len = r1 * r2;
	3193	while(data_pos - oriData < len){
	3194	totalSampleSize++;
	3195	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
	3196	pred_err = fabs(pred_value - *data_pos);
	3197	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3198	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3199	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3200	intervals[radiusIndex]++;
	3201
	3202	offset_count += confparams_cpr->sampleDistance;
	3203	if(offset_count >= r2){
	3204	n1_count ++;
	3205	offset_count_2 = n1_count % confparams_cpr->sampleDistance;
	3206	data_pos += (r2 + confparams_cpr->sampleDistance - offset_count) + (confparams_cpr->sampleDistance - offset_count_2);
	3207	offset_count = (confparams_cpr->sampleDistance - offset_count_2);
	3208	if(offset_count == 0) offset_count ++;
	3209	}
	3210	else data_pos += confparams_cpr->sampleDistance;
	3211	}
	3212
	3213	//compute the appropriate number
	3214	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3215	size_t sum = 0;
	3216	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3217	{
	3218	sum += intervals[i];
	3219	if(sum>targetCount)
	3220	break;
	3221	}
	3222	if(i>=confparams_cpr->maxRangeRadius)
	3223	i = confparams_cpr->maxRangeRadius-1;
	3224	unsigned int accIntervals = 2*(i+1);
	3225	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3226
	3227	if(powerOf2<32)
	3228	powerOf2 = 32;
	3229
	3230	free(intervals);
	3231	return powerOf2;
	3232	}
	3233
	3234	unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision)
	3235	{
	3236	size_t i = 0, radiusIndex;
	3237	double pred_value = 0, pred_err;
	3238	size_t intervals = (size_t)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
	3239	memset(intervals, 0, confparams_cpr->maxRangeRadius*sizeof(size_t));
[9ee2ce3]	3240	size_t totalSampleSize = 0;
[2c47b73]	3241
	3242	double * data_pos = oriData + 2;
	3243	while(data_pos - oriData < dataLength){
	3244	totalSampleSize++;
	3245	pred_value = data_pos[-1];
	3246	pred_err = fabs(pred_value - *data_pos);
	3247	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3248	if(radiusIndex>=confparams_cpr->maxRangeRadius)
	3249	radiusIndex = confparams_cpr->maxRangeRadius - 1;
	3250	intervals[radiusIndex]++;
	3251
	3252	data_pos += confparams_cpr->sampleDistance;
	3253	}
	3254	//compute the appropriate number
	3255	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
	3256	size_t sum = 0;
	3257	for(i=0;i<confparams_cpr->maxRangeRadius;i++)
	3258	{
	3259	sum += intervals[i];
	3260	if(sum>targetCount)
	3261	break;
	3262	}
	3263	if(i>=confparams_cpr->maxRangeRadius)
	3264	i = confparams_cpr->maxRangeRadius-1;
	3265
	3266	unsigned int accIntervals = 2*(i+1);
	3267	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3268
	3269	if(powerOf2<32)
	3270	powerOf2 = 32;
	3271
	3272	free(intervals);
	3273	return powerOf2;
	3274	}
[9ee2ce3]	3275
	3276	/The above code is for sz 1.4.13; the following code is for sz 2.0/
	3277	unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
	3278	{
	3279	double mean = 0.0;
	3280	size_t len = r1 * r2;
	3281	size_t mean_distance = (int) (sqrt(len));
	3282
	3283	double * data_pos = oriData;
	3284	size_t mean_count = 0;
	3285	while(data_pos - oriData < len){
	3286	mean += *data_pos;
	3287	mean_count ++;
	3288	data_pos += mean_distance;
	3289	}
	3290	if(mean_count > 0) mean /= mean_count;
	3291	size_t range = 8192;
	3292	size_t radius = 4096;
	3293	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
	3294	memset(freq_intervals, 0, range*sizeof(size_t));
	3295
	3296	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
	3297	int sampleDistance = confparams_cpr->sampleDistance;
	3298	double predThreshold = confparams_cpr->predThreshold;
	3299
	3300	size_t i;
	3301	size_t radiusIndex;
	3302	double pred_value = 0, pred_err;
	3303	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
	3304	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
	3305
	3306	double mean_diff;
	3307	ptrdiff_t freq_index;
	3308	size_t freq_count = 0;
	3309	size_t n1_count = 1;
	3310	size_t offset_count = sampleDistance - 1;
	3311	size_t offset_count_2 = 0;
	3312	size_t sample_count = 0;
	3313	data_pos = oriData + r2 + offset_count;
	3314	while(data_pos - oriData < len){
	3315	pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
	3316	pred_err = fabs(pred_value - *data_pos);
	3317	if(pred_err < realPrecision) freq_count ++;
	3318	radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
	3319	if(radiusIndex>=maxRangeRadius)
	3320	radiusIndex = maxRangeRadius - 1;
	3321	intervals[radiusIndex]++;
	3322
	3323	mean_diff = *data_pos - mean;
	3324	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
	3325	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
	3326	if(freq_index <= 0){
	3327	freq_intervals[0] ++;
	3328	}
	3329	else if(freq_index >= range){
	3330	freq_intervals[range - 1] ++;
	3331	}
	3332	else{
	3333	freq_intervals[freq_index] ++;
	3334	}
	3335	offset_count += sampleDistance;
	3336	if(offset_count >= r2){
	3337	n1_count ++;
	3338	offset_count_2 = n1_count % sampleDistance;
	3339	data_pos += (r2 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
	3340	offset_count = (sampleDistance - offset_count_2);
	3341	if(offset_count == 0) offset_count ++;
	3342	}
	3343	else data_pos += sampleDistance;
	3344	sample_count ++;
	3345	}
	3346	max_freq = freq_count 1.0/ sample_count;
	3347
	3348	//compute the appropriate number
	3349	size_t targetCount = sample_count*predThreshold;
	3350	size_t sum = 0;
	3351	for(i=0;i<maxRangeRadius;i++)
	3352	{
	3353	sum += intervals[i];
	3354	if(sum>targetCount)
	3355	break;
	3356	}
	3357	if(i>=maxRangeRadius)
	3358	i = maxRangeRadius-1;
	3359	unsigned int accIntervals = 2*(i+1);
	3360	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3361
	3362	if(powerOf2<32)
	3363	powerOf2 = 32;
	3364
	3365	// collect frequency
	3366	size_t max_sum = 0;
	3367	size_t max_index = 0;
	3368	size_t tmp_sum;
	3369	size_t * freq_pos = freq_intervals + 1;
	3370	for(size_t i=1; i<range-2; i++){
	3371	tmp_sum = freq_pos[0] + freq_pos[1];
	3372	if(tmp_sum > max_sum){
	3373	max_sum = tmp_sum;
	3374	max_index = i;
	3375	}
	3376	freq_pos ++;
	3377	}
	3378	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
	3379	mean_freq = max_sum 1.0 / sample_count;
	3380
	3381	free(freq_intervals);
	3382	free(intervals);
	3383	return powerOf2;
	3384	}
	3385
	3386	unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double dense_pos, double * max_freq, double * mean_freq)
	3387	{
	3388	double mean = 0.0;
	3389	size_t len = r1 * r2 * r3;
	3390	size_t mean_distance = (int) (sqrt(len));
	3391	double * data_pos = oriData;
	3392	size_t offset_count = 0;
	3393	size_t offset_count_2 = 0;
	3394	size_t mean_count = 0;
	3395	while(data_pos - oriData < len){
	3396	mean += *data_pos;
	3397	mean_count ++;
	3398	data_pos += mean_distance;
	3399	offset_count += mean_distance;
	3400	offset_count_2 += mean_distance;
	3401	if(offset_count >= r3){
	3402	offset_count = 0;
	3403	data_pos -= 1;
	3404	}
	3405	if(offset_count_2 >= r2 * r3){
	3406	offset_count_2 = 0;
	3407	data_pos -= 1;
	3408	}
	3409	}
	3410	if(mean_count > 0) mean /= mean_count;
	3411	size_t range = 8192;
	3412	size_t radius = 4096;
	3413	size_t * freq_intervals = (size_t ) malloc(rangesizeof(size_t));
	3414	memset(freq_intervals, 0, range*sizeof(size_t));
	3415
	3416	unsigned int maxRangeRadius = confparams_cpr->maxRangeRadius;
	3417	int sampleDistance = confparams_cpr->sampleDistance;
	3418	double predThreshold = confparams_cpr->predThreshold;
	3419
	3420	size_t i;
	3421	size_t radiusIndex;
	3422	size_t r23=r2*r3;
	3423	double pred_value = 0, pred_err;
	3424	size_t intervals = (size_t)malloc(maxRangeRadius*sizeof(size_t));
	3425	memset(intervals, 0, maxRangeRadius*sizeof(size_t));
	3426
	3427	double mean_diff;
	3428	ptrdiff_t freq_index;
	3429	size_t freq_count = 0;
	3430	size_t sample_count = 0;
	3431
	3432	offset_count = confparams_cpr->sampleDistance - 2; // count r3 offset
	3433	data_pos = oriData + r23 + r3 + offset_count;
	3434	size_t n1_count = 1, n2_count = 1; // count i,j sum
	3435
	3436	while(data_pos - oriData < len){
	3437
	3438	pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
	3439	pred_err = fabs(pred_value - *data_pos);
	3440	if(pred_err < realPrecision) freq_count ++;
	3441	radiusIndex = (pred_err/realPrecision+1)/2;
	3442	if(radiusIndex>=maxRangeRadius)
	3443	{
	3444	radiusIndex = maxRangeRadius - 1;
	3445	}
	3446	intervals[radiusIndex]++;
	3447
	3448	mean_diff = *data_pos - mean;
	3449	if(mean_diff > 0) freq_index = (ptrdiff_t)(mean_diff/realPrecision) + radius;
	3450	else freq_index = (ptrdiff_t)(mean_diff/realPrecision) - 1 + radius;
	3451	if(freq_index <= 0){
	3452	freq_intervals[0] ++;
	3453	}
	3454	else if(freq_index >= range){
	3455	freq_intervals[range - 1] ++;
	3456	}
	3457	else{
	3458	freq_intervals[freq_index] ++;
	3459	}
	3460	offset_count += sampleDistance;
	3461	if(offset_count >= r3){
	3462	n2_count ++;
	3463	if(n2_count == r2){
	3464	n1_count ++;
	3465	n2_count = 1;
	3466	data_pos += r3;
	3467	}
	3468	offset_count_2 = (n1_count + n2_count) % sampleDistance;
	3469	data_pos += (r3 + sampleDistance - offset_count) + (sampleDistance - offset_count_2);
	3470	offset_count = (sampleDistance - offset_count_2);
	3471	if(offset_count == 0) offset_count ++;
	3472	}
	3473	else data_pos += sampleDistance;
	3474	sample_count ++;
	3475	}
	3476	max_freq = freq_count 1.0/ sample_count;
	3477
	3478	//compute the appropriate number
	3479	size_t targetCount = sample_count*predThreshold;
	3480	size_t sum = 0;
	3481	for(i=0;i<maxRangeRadius;i++)
	3482	{
	3483	sum += intervals[i];
	3484	if(sum>targetCount)
	3485	break;
	3486	}
	3487	if(i>=maxRangeRadius)
	3488	i = maxRangeRadius-1;
	3489	unsigned int accIntervals = 2*(i+1);
	3490	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
	3491
	3492	if(powerOf2<32)
	3493	powerOf2 = 32;
	3494	// collect frequency
	3495	size_t max_sum = 0;
	3496	size_t max_index = 0;
	3497	size_t tmp_sum;
	3498	size_t * freq_pos = freq_intervals + 1;
	3499	for(size_t i=1; i<range-2; i++){
	3500	tmp_sum = freq_pos[0] + freq_pos[1];
	3501	if(tmp_sum > max_sum){
	3502	max_sum = tmp_sum;
	3503	max_index = i;
	3504	}
	3505	freq_pos ++;
	3506	}
	3507	dense_pos = mean + realPrecision (ptrdiff_t)(max_index + 1 - radius);
	3508	mean_freq = max_sum 1.0 / sample_count;
	3509
	3510	free(freq_intervals);
	3511	free(intervals);
	3512	return powerOf2;
	3513	}
	3514
	3515	#define MIN(a, b) a<b? a : b
	3516	unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, double realPrecision, size_t comp_size){
	3517
	3518	unsigned int quantization_intervals;
	3519	double sz_sample_correct_freq = -1;//0.5; //-1
	3520	double dense_pos;
	3521	double mean_flush_freq;
	3522	unsigned char use_mean = 0;
	3523
	3524	if(exe_params->optQuantMode==1)
	3525	{
	3526	quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
	3527	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
	3528	updateQuantizationInfo(quantization_intervals);
	3529	}
	3530	else{
	3531	quantization_intervals = exe_params->intvCapacity;
	3532	}
	3533
	3534	// calculate block dims
	3535	size_t num_x, num_y;
	3536	size_t block_size = 16;
	3537
	3538	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
	3539	SZ_COMPUTE_2D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
	3540
	3541	size_t split_index_x, split_index_y;
	3542	size_t early_blockcount_x, early_blockcount_y;
	3543	size_t late_blockcount_x, late_blockcount_y;
	3544	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
	3545	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
	3546
	3547	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y;
	3548	size_t num_blocks = num_x * num_y;
	3549	size_t num_elements = r1 * r2;
	3550
	3551	size_t dim0_offset = r2;
	3552
	3553	int * result_type = (int ) malloc(num_elements sizeof(int));
	3554	size_t unpred_data_max_size = max_num_block_elements;
	3555	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
	3556	size_t total_unpred = 0;
	3557	size_t unpredictable_count;
	3558	double * data_pos = oriData;
	3559	int * type = result_type;
	3560	size_t offset_x, offset_y;
	3561	size_t current_blockcount_x, current_blockcount_y;
	3562
	3563	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
	3564	double * reg_params_pos = reg_params;
	3565	// move regression part out
	3566	size_t params_offset_b = num_blocks;
	3567	size_t params_offset_c = 2*num_blocks;
	3568	for(size_t i=0; i<num_x; i++){
	3569	for(size_t j=0; j<num_y; j++){
	3570	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	3571	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	3572	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	3573	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	3574
	3575	data_pos = oriData + offset_x * dim0_offset + offset_y;
	3576
	3577	{
	3578	double * cur_data_pos = data_pos;
	3579	double fx = 0.0;
	3580	double fy = 0.0;
	3581	double f = 0;
	3582	double sum_x;
	3583	double curData;
	3584	for(size_t i=0; i<current_blockcount_x; i++){
	3585	sum_x = 0;
	3586	for(size_t j=0; j<current_blockcount_y; j++){
	3587	curData = *cur_data_pos;
	3588	sum_x += curData;
	3589	fy += curData * j;
	3590	cur_data_pos ++;
	3591	}
	3592	fx += sum_x * i;
	3593	f += sum_x;
	3594	cur_data_pos += dim0_offset - current_blockcount_y;
	3595	}
	3596	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y);
	3597	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
	3598	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
	3599	reg_params_pos[params_offset_c] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2);
	3600	}
	3601
	3602	reg_params_pos ++;
	3603	}
	3604	}
	3605
	3606	//Compress coefficient arrays
	3607	double precision_a, precision_b, precision_c;
	3608	double rel_param_err = 0.15/3;
	3609	precision_a = rel_param_err * realPrecision / late_blockcount_x;
	3610	precision_b = rel_param_err * realPrecision / late_blockcount_y;
	3611	precision_c = rel_param_err * realPrecision;
	3612
	3613	double mean = 0;
	3614	use_mean = 0;
	3615	if(use_mean){
	3616	// compute mean
	3617	double sum = 0.0;
	3618	size_t mean_count = 0;
	3619	for(size_t i=0; i<num_elements; i++){
	3620	if(fabs(oriData[i] - dense_pos) < realPrecision){
	3621	sum += oriData[i];
	3622	mean_count ++;
	3623	}
	3624	}
	3625	if(mean_count > 0) mean = sum / mean_count;
	3626	}
	3627
	3628
	3629	double tmp_realPrecision = realPrecision;
	3630
	3631	// use two prediction buffers for higher performance
	3632	double * unpredictable_data = result_unpredictable_data;
	3633	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
	3634	memset(indicator, 0, num_blocks * sizeof(unsigned char));
	3635	size_t reg_count = 0;
	3636	size_t strip_dim_0 = early_blockcount_x + 1;
	3637	size_t strip_dim_1 = r2 + 1;
	3638	size_t strip_dim0_offset = strip_dim_1;
	3639	unsigned char * indicator_pos = indicator;
	3640	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
	3641	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
	3642	memset(prediction_buffer_1, 0, prediction_buffer_size);
	3643	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
	3644	memset(prediction_buffer_2, 0, prediction_buffer_size);
	3645	double * cur_pb_buf = prediction_buffer_1;
	3646	double * next_pb_buf = prediction_buffer_2;
	3647	double * cur_pb_buf_pos;
	3648	double * next_pb_buf_pos;
	3649	int intvCapacity = exe_params->intvCapacity;
	3650	int intvRadius = exe_params->intvRadius;
	3651	int use_reg = 0;
	3652
	3653	reg_params_pos = reg_params;
	3654	// compress the regression coefficients on the fly
	3655	double last_coeffcients[3] = {0.0};
	3656	int coeff_intvCapacity_sz = 65536;
	3657	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
	3658	int * coeff_type[3];
	3659	int * coeff_result_type = (int ) malloc(num_blocks3*sizeof(int));
	3660	double * coeff_unpred_data[3];
	3661	double * coeff_unpredictable_data = (double ) malloc(num_blocks3*sizeof(double));
	3662	double precision[3];
	3663	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
	3664	for(int i=0; i<3; i++){
	3665	coeff_type[i] = coeff_result_type + i * num_blocks;
	3666	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
	3667	}
	3668	int coeff_index = 0;
	3669	unsigned int coeff_unpredictable_count[3] = {0};
	3670	if(use_mean){
	3671	type = result_type;
	3672	int intvCapacity_sz = intvCapacity - 2;
	3673	for(size_t i=0; i<num_x; i++){
	3674	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	3675	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	3676	data_pos = oriData + offset_x * dim0_offset;
	3677
	3678	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
	3679	next_pb_buf_pos = next_pb_buf + 1;
	3680	double * pb_pos = cur_pb_buf_pos;
	3681	double * next_pb_pos = next_pb_buf_pos;
	3682
	3683	for(size_t j=0; j<num_y; j++){
	3684	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	3685	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	3686
	3687	/sampling: decide which predictor to use (regression or lorenzo)/
	3688	{
	3689	double * cur_data_pos;
	3690	double curData;
	3691	double pred_reg, pred_sz;
	3692	double err_sz = 0.0, err_reg = 0.0;
	3693	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
	3694	// [1, 9] [3, 7] [7, 3] [9, 1]
	3695	int count = 0;
	3696	for(int i=1; i<current_blockcount_x; i+=2){
	3697	cur_data_pos = data_pos + i * dim0_offset + i;
	3698	curData = *cur_data_pos;
	3699	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	3700	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
	3701
	3702	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
	3703
	3704	err_reg += fabs(pred_reg - curData);
	3705
	3706	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
	3707	curData = *cur_data_pos;
	3708	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	3709	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
	3710	err_sz += MIN(fabs(pred_sz - curData) + realPrecision*0.81, fabs(mean - curData));
	3711
	3712	err_reg += fabs(pred_reg - curData);
	3713
	3714	count += 2;
	3715	}
	3716
	3717	use_reg = (err_reg < err_sz);
	3718	}
	3719	if(use_reg)
	3720	{
	3721	{
	3722	/predict coefficients in current block via previous reg_block/
	3723	double cur_coeff;
	3724	double diff, itvNum;
	3725	for(int e=0; e<3; e++){
	3726	cur_coeff = reg_params_pos[e*num_blocks];
	3727	diff = cur_coeff - last_coeffcients[e];
	3728	itvNum = fabs(diff)/precision[e] + 1;
	3729	if (itvNum < coeff_intvCapacity_sz){
	3730	if (diff < 0) itvNum = -itvNum;
	3731	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	3732	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	3733	//ganrantee comporession error against the case of machine-epsilon
	3734	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	3735	coeff_type[e][coeff_index] = 0;
	3736	last_coeffcients[e] = cur_coeff;
	3737	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	3738	}
	3739	}
	3740	else{
	3741	coeff_type[e][coeff_index] = 0;
	3742	last_coeffcients[e] = cur_coeff;
	3743	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	3744	}
	3745	}
	3746	coeff_index ++;
	3747	}
	3748	double curData;
	3749	double pred;
	3750	double itvNum;
	3751	double diff;
	3752	size_t index = 0;
	3753	size_t block_unpredictable_count = 0;
	3754	double * cur_data_pos = data_pos;
	3755	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	3756	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	3757	curData = *cur_data_pos;
	3758	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3759	diff = curData - pred;
	3760	itvNum = fabs(diff)/realPrecision + 1;
	3761	if (itvNum < intvCapacity){
	3762	if (diff < 0) itvNum = -itvNum;
	3763	type[index] = (int) (itvNum/2) + intvRadius;
	3764	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3765	//ganrantee comporession error against the case of machine-epsilon
	3766	if(fabs(curData - pred)>realPrecision){
	3767	type[index] = 0;
	3768	pred = curData;
	3769	unpredictable_data[block_unpredictable_count ++] = curData;
	3770	}
	3771	}
	3772	else{
	3773	type[index] = 0;
	3774	pred = curData;
	3775	unpredictable_data[block_unpredictable_count ++] = curData;
	3776	}
	3777	index ++;
	3778	cur_data_pos ++;
	3779	}
	3780	/dealing with the last jj (boundary)/
	3781	{
	3782	size_t jj = current_blockcount_y - 1;
	3783	curData = *cur_data_pos;
	3784	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3785	diff = curData - pred;
	3786	itvNum = fabs(diff)/realPrecision + 1;
	3787	if (itvNum < intvCapacity){
	3788	if (diff < 0) itvNum = -itvNum;
	3789	type[index] = (int) (itvNum/2) + intvRadius;
	3790	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3791	//ganrantee comporession error against the case of machine-epsilon
	3792	if(fabs(curData - pred)>realPrecision){
	3793	type[index] = 0;
	3794	pred = curData;
	3795	unpredictable_data[block_unpredictable_count ++] = curData;
	3796	}
	3797	}
	3798	else{
	3799	type[index] = 0;
	3800	pred = curData;
	3801	unpredictable_data[block_unpredictable_count ++] = curData;
	3802	}
	3803
	3804	// assign value to block surfaces
	3805	pb_pos[ii * strip_dim0_offset + jj] = pred;
	3806	index ++;
	3807	cur_data_pos ++;
	3808	}
	3809	cur_data_pos += dim0_offset - current_blockcount_y;
	3810	}
	3811	/dealing with the last ii (boundary)/
	3812	{
	3813	size_t ii = current_blockcount_x - 1;
	3814	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	3815	curData = *cur_data_pos;
	3816	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3817	diff = curData - pred;
	3818	itvNum = fabs(diff)/realPrecision + 1;
	3819	if (itvNum < intvCapacity){
	3820	if (diff < 0) itvNum = -itvNum;
	3821	type[index] = (int) (itvNum/2) + intvRadius;
	3822	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3823	//ganrantee comporession error against the case of machine-epsilon
	3824	if(fabs(curData - pred)>realPrecision){
	3825	type[index] = 0;
	3826	pred = curData;
	3827	unpredictable_data[block_unpredictable_count ++] = curData;
	3828	}
	3829	}
	3830	else{
	3831	type[index] = 0;
	3832	pred = curData;
	3833	unpredictable_data[block_unpredictable_count ++] = curData;
	3834	}
	3835	// assign value to next prediction buffer
	3836	next_pb_pos[jj] = pred;
	3837	index ++;
	3838	cur_data_pos ++;
	3839	}
	3840	/dealing with the last jj (boundary)/
	3841	{
	3842	size_t jj = current_blockcount_y - 1;
	3843	curData = *cur_data_pos;
	3844	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	3845	diff = curData - pred;
	3846	itvNum = fabs(diff)/realPrecision + 1;
	3847	if (itvNum < intvCapacity){
	3848	if (diff < 0) itvNum = -itvNum;
	3849	type[index] = (int) (itvNum/2) + intvRadius;
	3850	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	3851	//ganrantee comporession error against the case of machine-epsilon
	3852	if(fabs(curData - pred)>realPrecision){
	3853	type[index] = 0;
	3854	pred = curData;
	3855	unpredictable_data[block_unpredictable_count ++] = curData;
	3856	}
	3857	}
	3858	else{
	3859	type[index] = 0;
	3860	pred = curData;
	3861	unpredictable_data[block_unpredictable_count ++] = curData;
	3862	}
	3863
	3864	// assign value to block surfaces
	3865	pb_pos[ii * strip_dim0_offset + jj] = pred;
	3866	// assign value to next prediction buffer
	3867	next_pb_pos[jj] = pred;
	3868
	3869	index ++;
	3870	cur_data_pos ++;
	3871	}
	3872	} // end ii == -1
	3873	unpredictable_count = block_unpredictable_count;
	3874	total_unpred += unpredictable_count;
	3875	unpredictable_data += unpredictable_count;
	3876	reg_count ++;
	3877	}// end use_reg
	3878	else{
	3879	// use SZ
	3880	// SZ predication
	3881	unpredictable_count = 0;
	3882	double * cur_pb_pos = pb_pos;
	3883	double * cur_data_pos = data_pos;
	3884	double curData;
	3885	double pred2D;
	3886	double itvNum, diff;
	3887	size_t index = 0;
	3888	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	3889	for(size_t jj=0; jj<current_blockcount_y; jj++){
	3890	curData = *cur_data_pos;
	3891	if(fabs(curData - mean) <= realPrecision){
	3892	// adjust type[index] to intvRadius for coherence with freq in reg
	3893	type[index] = intvRadius;
	3894	*cur_pb_pos = mean;
	3895	}
	3896	else
	3897	{
	3898	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	3899	diff = curData - pred2D;
	3900	itvNum = fabs(diff)/realPrecision + 1;
	3901	if (itvNum < intvCapacity_sz){
	3902	if (diff < 0) itvNum = -itvNum;
	3903	type[index] = (int) (itvNum/2) + intvRadius;
	3904	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	3905	if(type[index] <= intvRadius) type[index] -= 1;
	3906	//ganrantee comporession error against the case of machine-epsilon
	3907	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	3908	type[index] = 0;
	3909	*cur_pb_pos = curData;
	3910	unpredictable_data[unpredictable_count ++] = curData;
	3911	}
	3912	}
	3913	else{
	3914	type[index] = 0;
	3915	*cur_pb_pos = curData;
	3916	unpredictable_data[unpredictable_count ++] = curData;
	3917	}
	3918	}
	3919	index ++;
	3920	cur_pb_pos ++;
	3921	cur_data_pos ++;
	3922	}
	3923	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
	3924	cur_data_pos += dim0_offset - current_blockcount_y;
	3925	}
	3926	/dealing with the last ii (boundary)/
	3927	{
	3928	// ii == current_blockcount_x - 1
	3929	for(size_t jj=0; jj<current_blockcount_y; jj++){
	3930	curData = *cur_data_pos;
	3931	if(fabs(curData - mean) <= realPrecision){
	3932	// adjust type[index] to intvRadius for coherence with freq in reg
	3933	type[index] = intvRadius;
	3934	*cur_pb_pos = mean;
	3935	}
	3936	else
	3937	{
	3938	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	3939	diff = curData - pred2D;
	3940	itvNum = fabs(diff)/realPrecision + 1;
	3941	if (itvNum < intvCapacity_sz){
	3942	if (diff < 0) itvNum = -itvNum;
	3943	type[index] = (int) (itvNum/2) + intvRadius;
	3944	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	3945	if(type[index] <= intvRadius) type[index] -= 1;
	3946	//ganrantee comporession error against the case of machine-epsilon
	3947	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	3948	type[index] = 0;
	3949	*cur_pb_pos = curData;
	3950	unpredictable_data[unpredictable_count ++] = curData;
	3951	}
	3952	}
	3953	else{
	3954	type[index] = 0;
	3955	*cur_pb_pos = curData;
	3956	unpredictable_data[unpredictable_count ++] = curData;
	3957	}
	3958	}
	3959	next_pb_pos[jj] = *cur_pb_pos;
	3960	index ++;
	3961	cur_pb_pos ++;
	3962	cur_data_pos ++;
	3963	}
	3964	}
	3965	total_unpred += unpredictable_count;
	3966	unpredictable_data += unpredictable_count;
	3967	// change indicator
	3968	indicator_pos[j] = 1;
	3969	}// end SZ
	3970	reg_params_pos ++;
	3971	data_pos += current_blockcount_y;
	3972	pb_pos += current_blockcount_y;
	3973	next_pb_pos += current_blockcount_y;
	3974	type += current_blockcount_x * current_blockcount_y;
	3975	}// end j
	3976	indicator_pos += num_y;
	3977	double * tmp;
	3978	tmp = cur_pb_buf;
	3979	cur_pb_buf = next_pb_buf;
	3980	next_pb_buf = tmp;
	3981	}// end i
	3982	}// end use mean
	3983	else{
	3984	type = result_type;
	3985	int intvCapacity_sz = intvCapacity - 2;
	3986	for(size_t i=0; i<num_x; i++){
	3987	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	3988	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	3989	data_pos = oriData + offset_x * dim0_offset;
	3990
	3991	cur_pb_buf_pos = cur_pb_buf + strip_dim0_offset + 1;
	3992	next_pb_buf_pos = next_pb_buf + 1;
	3993	double * pb_pos = cur_pb_buf_pos;
	3994	double * next_pb_pos = next_pb_buf_pos;
	3995
	3996	for(size_t j=0; j<num_y; j++){
	3997	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	3998	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	3999	/sampling/
	4000	{
	4001	// sample [2i + 1, 2i + 1] [2i + 1, bs - 2i]
	4002	double * cur_data_pos;
	4003	double curData;
	4004	double pred_reg, pred_sz;
	4005	double err_sz = 0.0, err_reg = 0.0;
	4006	// [1, 1] [3, 3] [5, 5] [7, 7] [9, 9]
	4007	// [1, 9] [3, 7] [7, 3] [9, 1]
	4008	int count = 0;
	4009	for(int i=1; i<current_blockcount_x; i+=2){
	4010	cur_data_pos = data_pos + i * dim0_offset + i;
	4011	curData = *cur_data_pos;
	4012	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4013	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
	4014	err_sz += fabs(pred_sz - curData);
	4015	err_reg += fabs(pred_reg - curData);
	4016
	4017	cur_data_pos = data_pos + i * dim0_offset + (block_size - i);
	4018	curData = *cur_data_pos;
	4019	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
	4020	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * (block_size - i) + reg_params_pos[params_offset_c];
	4021	err_sz += fabs(pred_sz - curData);
	4022	err_reg += fabs(pred_reg - curData);
	4023
	4024	count += 2;
	4025	}
	4026	err_sz += realPrecision * count * 0.81;
	4027	use_reg = (err_reg < err_sz);
	4028
	4029	}
	4030	if(use_reg)
	4031	{
	4032	{
	4033	/predict coefficients in current block via previous reg_block/
	4034	double cur_coeff;
	4035	double diff, itvNum;
	4036	for(int e=0; e<3; e++){
	4037	cur_coeff = reg_params_pos[e*num_blocks];
	4038	diff = cur_coeff - last_coeffcients[e];
	4039	itvNum = fabs(diff)/precision[e] + 1;
	4040	if (itvNum < coeff_intvCapacity_sz){
	4041	if (diff < 0) itvNum = -itvNum;
	4042	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	4043	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	4044	//ganrantee comporession error against the case of machine-epsilon
	4045	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	4046	coeff_type[e][coeff_index] = 0;
	4047	last_coeffcients[e] = cur_coeff;
	4048	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4049	}
	4050	}
	4051	else{
	4052	coeff_type[e][coeff_index] = 0;
	4053	last_coeffcients[e] = cur_coeff;
	4054	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4055	}
	4056	}
	4057	coeff_index ++;
	4058	}
	4059	double curData;
	4060	double pred;
	4061	double itvNum;
	4062	double diff;
	4063	size_t index = 0;
	4064	size_t block_unpredictable_count = 0;
	4065	double * cur_data_pos = data_pos;
	4066	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4067	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4068	curData = *cur_data_pos;
	4069	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4070	diff = curData - pred;
	4071	itvNum = fabs(diff)/realPrecision + 1;
	4072	if (itvNum < intvCapacity){
	4073	if (diff < 0) itvNum = -itvNum;
	4074	type[index] = (int) (itvNum/2) + intvRadius;
	4075	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4076	//ganrantee comporession error against the case of machine-epsilon
	4077	if(fabs(curData - pred)>realPrecision){
	4078	type[index] = 0;
	4079	pred = curData;
	4080	unpredictable_data[block_unpredictable_count ++] = curData;
	4081	}
	4082	}
	4083	else{
	4084	type[index] = 0;
	4085	pred = curData;
	4086	unpredictable_data[block_unpredictable_count ++] = curData;
	4087	}
	4088	index ++;
	4089	cur_data_pos ++;
	4090	}
	4091	/dealing with the last jj (boundary)/
	4092	{
	4093	// jj == current_blockcount_y - 1
	4094	size_t jj = current_blockcount_y - 1;
	4095	curData = *cur_data_pos;
	4096	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4097	diff = curData - pred;
	4098	itvNum = fabs(diff)/realPrecision + 1;
	4099	if (itvNum < intvCapacity){
	4100	if (diff < 0) itvNum = -itvNum;
	4101	type[index] = (int) (itvNum/2) + intvRadius;
	4102	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4103	//ganrantee comporession error against the case of machine-epsilon
	4104	if(fabs(curData - pred)>realPrecision){
	4105	type[index] = 0;
	4106	pred = curData;
	4107	unpredictable_data[block_unpredictable_count ++] = curData;
	4108	}
	4109	}
	4110	else{
	4111	type[index] = 0;
	4112	pred = curData;
	4113	unpredictable_data[block_unpredictable_count ++] = curData;
	4114	}
	4115
	4116	// assign value to block surfaces
	4117	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4118	index ++;
	4119	cur_data_pos ++;
	4120	}
	4121	cur_data_pos += dim0_offset - current_blockcount_y;
	4122	}
	4123	/dealing with the last ii (boundary)/
	4124	{
	4125	size_t ii = current_blockcount_x - 1;
	4126	for(size_t jj=0; jj<current_blockcount_y - 1; jj++){
	4127	curData = *cur_data_pos;
	4128	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4129	diff = curData - pred;
	4130	itvNum = fabs(diff)/realPrecision + 1;
	4131	if (itvNum < intvCapacity){
	4132	if (diff < 0) itvNum = -itvNum;
	4133	type[index] = (int) (itvNum/2) + intvRadius;
	4134	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4135	//ganrantee comporession error against the case of machine-epsilon
	4136	if(fabs(curData - pred)>realPrecision){
	4137	type[index] = 0;
	4138	pred = curData;
	4139	unpredictable_data[block_unpredictable_count ++] = curData;
	4140	}
	4141	}
	4142	else{
	4143	type[index] = 0;
	4144	pred = curData;
	4145	unpredictable_data[block_unpredictable_count ++] = curData;
	4146	}
	4147	// assign value to next prediction buffer
	4148	next_pb_pos[jj] = pred;
	4149	index ++;
	4150	cur_data_pos ++;
	4151	}
	4152	/dealing with the last jj (boundary)/
	4153	{
	4154	// jj == current_blockcount_y - 1
	4155	size_t jj = current_blockcount_y - 1;
	4156	curData = *cur_data_pos;
	4157	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2];
	4158	diff = curData - pred;
	4159	itvNum = fabs(diff)/realPrecision + 1;
	4160	if (itvNum < intvCapacity){
	4161	if (diff < 0) itvNum = -itvNum;
	4162	type[index] = (int) (itvNum/2) + intvRadius;
	4163	pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
	4164	//ganrantee comporession error against the case of machine-epsilon
	4165	if(fabs(curData - pred)>realPrecision){
	4166	type[index] = 0;
	4167	pred = curData;
	4168	unpredictable_data[block_unpredictable_count ++] = curData;
	4169	}
	4170	}
	4171	else{
	4172	type[index] = 0;
	4173	pred = curData;
	4174	unpredictable_data[block_unpredictable_count ++] = curData;
	4175	}
	4176
	4177	// assign value to block surfaces
	4178	pb_pos[ii * strip_dim0_offset + jj] = pred;
	4179	// assign value to next prediction buffer
	4180	next_pb_pos[jj] = pred;
	4181
	4182	index ++;
	4183	cur_data_pos ++;
	4184	}
	4185	} // end ii == -1
	4186	unpredictable_count = block_unpredictable_count;
	4187	total_unpred += unpredictable_count;
	4188	unpredictable_data += unpredictable_count;
	4189	reg_count ++;
	4190	}// end use_reg
	4191	else{
	4192	// use SZ
	4193	// SZ predication
	4194	unpredictable_count = 0;
	4195	double * cur_pb_pos = pb_pos;
	4196	double * cur_data_pos = data_pos;
	4197	double curData;
	4198	double pred2D;
	4199	double itvNum, diff;
	4200	size_t index = 0;
	4201	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4202	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4203	curData = *cur_data_pos;
	4204
	4205	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4206	diff = curData - pred2D;
	4207	itvNum = fabs(diff)/realPrecision + 1;
	4208	if (itvNum < intvCapacity_sz){
	4209	if (diff < 0) itvNum = -itvNum;
	4210	type[index] = (int) (itvNum/2) + intvRadius;
	4211	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4212	//ganrantee comporession error against the case of machine-epsilon
	4213	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4214	type[index] = 0;
	4215	*cur_pb_pos = curData;
	4216	unpredictable_data[unpredictable_count ++] = curData;
	4217	}
	4218	}
	4219	else{
	4220	type[index] = 0;
	4221	*cur_pb_pos = curData;
	4222	unpredictable_data[unpredictable_count ++] = curData;
	4223	}
	4224
	4225	index ++;
	4226	cur_pb_pos ++;
	4227	cur_data_pos ++;
	4228	}
	4229	cur_pb_pos += strip_dim0_offset - current_blockcount_y;
	4230	cur_data_pos += dim0_offset - current_blockcount_y;
	4231	}
	4232	/dealing with the last ii (boundary)/
	4233	{
	4234	// ii == current_blockcount_x - 1
	4235	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4236	curData = *cur_data_pos;
	4237
	4238	pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1];
	4239	diff = curData - pred2D;
	4240	itvNum = fabs(diff)/realPrecision + 1;
	4241	if (itvNum < intvCapacity_sz){
	4242	if (diff < 0) itvNum = -itvNum;
	4243	type[index] = (int) (itvNum/2) + intvRadius;
	4244	cur_pb_pos = pred2D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4245	//ganrantee comporession error against the case of machine-epsilon
	4246	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4247	type[index] = 0;
	4248	*cur_pb_pos = curData;
	4249	unpredictable_data[unpredictable_count ++] = curData;
	4250	}
	4251	}
	4252	else{
	4253	type[index] = 0;
	4254	*cur_pb_pos = curData;
	4255	unpredictable_data[unpredictable_count ++] = curData;
	4256	}
	4257	next_pb_pos[jj] = *cur_pb_pos;
	4258	index ++;
	4259	cur_pb_pos ++;
	4260	cur_data_pos ++;
	4261	}
	4262	}
	4263	total_unpred += unpredictable_count;
	4264	unpredictable_data += unpredictable_count;
	4265	// change indicator
	4266	indicator_pos[j] = 1;
	4267	}// end SZ
	4268	reg_params_pos ++;
	4269	data_pos += current_blockcount_y;
	4270	pb_pos += current_blockcount_y;
	4271	next_pb_pos += current_blockcount_y;
	4272	type += current_blockcount_x * current_blockcount_y;
	4273	}// end j
	4274	indicator_pos += num_y;
	4275	double * tmp;
	4276	tmp = cur_pb_buf;
	4277	cur_pb_buf = next_pb_buf;
	4278	next_pb_buf = tmp;
	4279	}// end i
	4280	}
	4281	free(prediction_buffer_1);
	4282	free(prediction_buffer_2);
	4283
	4284	int stateNum = 2*quantization_intervals;
	4285	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	4286
	4287	size_t nodeCount = 0;
	4288	size_t i = 0;
	4289	init(huffmanTree, result_type, num_elements);
	4290	for (i = 0; i < stateNum; i++)
	4291	if (huffmanTree->code[i]) nodeCount++;
	4292	nodeCount = nodeCount*2-1;
	4293
	4294	unsigned char *treeBytes;
	4295	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	4296
	4297	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
	4298	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
	4299	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
	4300	unsigned char * result_pos = result;
	4301	initRandomAccessBytes(result_pos);
	4302	result_pos += meta_data_offset;
	4303
	4304	sizeToBytes(result_pos, num_elements);
	4305	result_pos += exe_params->SZ_SIZE_TYPE;
	4306
	4307	intToBytes_bigEndian(result_pos, block_size);
	4308	result_pos += sizeof(int);
	4309	doubleToBytes(result_pos, realPrecision);
	4310	result_pos += sizeof(double);
	4311	intToBytes_bigEndian(result_pos, quantization_intervals);
	4312	result_pos += sizeof(int);
	4313	intToBytes_bigEndian(result_pos, treeByteSize);
	4314	result_pos += sizeof(int);
	4315	intToBytes_bigEndian(result_pos, nodeCount);
	4316	result_pos += sizeof(int);
	4317	memcpy(result_pos, treeBytes, treeByteSize);
	4318	result_pos += treeByteSize;
	4319	free(treeBytes);
	4320
	4321	memcpy(result_pos, &use_mean, sizeof(unsigned char));
	4322	result_pos += sizeof(unsigned char);
	4323	memcpy(result_pos, &mean, sizeof(double));
	4324	result_pos += sizeof(double);
	4325
	4326	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
	4327	result_pos += indicator_size;
	4328
	4329	//convert the lead/mid/resi to byte stream
	4330	if(reg_count>0){
	4331	for(int e=0; e<3; e++){
	4332	int stateNum = 2*coeff_intvCapacity_sz;
	4333	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	4334	size_t nodeCount = 0;
	4335	init(huffmanTree, coeff_type[e], reg_count);
	4336	size_t i = 0;
	4337	for (i = 0; i < huffmanTree->stateNum; i++)
	4338	if (huffmanTree->code[i]) nodeCount++;
	4339	nodeCount = nodeCount*2-1;
	4340	unsigned char *treeBytes;
	4341	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	4342	doubleToBytes(result_pos, precision[e]);
	4343	result_pos += sizeof(double);
	4344	intToBytes_bigEndian(result_pos, coeff_intvRadius);
	4345	result_pos += sizeof(int);
	4346	intToBytes_bigEndian(result_pos, treeByteSize);
	4347	result_pos += sizeof(int);
	4348	intToBytes_bigEndian(result_pos, nodeCount);
	4349	result_pos += sizeof(int);
	4350	memcpy(result_pos, treeBytes, treeByteSize);
	4351	result_pos += treeByteSize;
	4352	free(treeBytes);
	4353	size_t typeArray_size = 0;
	4354	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
	4355	sizeToBytes(result_pos, typeArray_size);
	4356	result_pos += sizeof(size_t) + typeArray_size;
	4357	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
	4358	result_pos += sizeof(int);
	4359	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
	4360	result_pos += coeff_unpredictable_count[e]*sizeof(double);
	4361	SZ_ReleaseHuffman(huffmanTree);
	4362	}
	4363	}
	4364	free(coeff_result_type);
	4365	free(coeff_unpredictable_data);
	4366
	4367	//record the number of unpredictable data and also store them
	4368	memcpy(result_pos, &total_unpred, sizeof(size_t));
	4369	result_pos += sizeof(size_t);
	4370	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
	4371	result_pos += total_unpred * sizeof(double);
	4372	size_t typeArray_size = 0;
	4373	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
	4374	result_pos += typeArray_size;
	4375
	4376	size_t totalEncodeSize = result_pos - result;
	4377	free(indicator);
	4378	free(result_unpredictable_data);
	4379	free(result_type);
	4380	free(reg_params);
	4381
	4382	SZ_ReleaseHuffman(huffmanTree);
	4383	*comp_size = totalEncodeSize;
	4384
	4385	return result;
	4386	}
	4387
	4388	unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t comp_size){
	4389
	4390	unsigned int quantization_intervals;
	4391	double sz_sample_correct_freq = -1;//0.5; //-1
	4392	double dense_pos;
	4393	double mean_flush_freq;
	4394	unsigned char use_mean = 0;
	4395
	4396	// calculate block dims
	4397	size_t num_x, num_y, num_z;
	4398	size_t block_size = 6;
	4399	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r1, num_x, block_size);
	4400	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r2, num_y, block_size);
	4401	SZ_COMPUTE_3D_NUMBER_OF_BLOCKS(r3, num_z, block_size);
	4402
	4403	size_t split_index_x, split_index_y, split_index_z;
	4404	size_t early_blockcount_x, early_blockcount_y, early_blockcount_z;
	4405	size_t late_blockcount_x, late_blockcount_y, late_blockcount_z;
	4406	SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x);
	4407	SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y);
	4408	SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z);
	4409
	4410	size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z;
	4411	size_t num_blocks = num_x * num_y * num_z;
	4412	size_t num_elements = r1 * r2 * r3;
	4413
	4414	size_t dim0_offset = r2 * r3;
	4415	size_t dim1_offset = r3;
	4416
	4417	int * result_type = (int ) malloc(num_elements sizeof(int));
	4418	size_t unpred_data_max_size = max_num_block_elements;
	4419	double * result_unpredictable_data = (double ) malloc(unpred_data_max_size sizeof(double) * num_blocks);
	4420	size_t total_unpred = 0;
	4421	size_t unpredictable_count;
	4422	size_t max_unpred_count = 0;
	4423	double * data_pos = oriData;
	4424	int * type = result_type;
	4425	size_t type_offset;
	4426	size_t offset_x, offset_y, offset_z;
	4427	size_t current_blockcount_x, current_blockcount_y, current_blockcount_z;
	4428
	4429	double * reg_params = (double ) malloc(num_blocks 4 * sizeof(double));
	4430	double * reg_params_pos = reg_params;
	4431	// move regression part out
	4432	size_t params_offset_b = num_blocks;
	4433	size_t params_offset_c = 2*num_blocks;
	4434	size_t params_offset_d = 3*num_blocks;
	4435	for(size_t i=0; i<num_x; i++){
	4436	for(size_t j=0; j<num_y; j++){
	4437	for(size_t k=0; k<num_z; k++){
	4438	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4439	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4440	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	4441	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4442	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4443	offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
	4444
	4445	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
	4446	/Calculate regression coefficients/
	4447	{
	4448	double * cur_data_pos = data_pos;
	4449	double fx = 0.0;
	4450	double fy = 0.0;
	4451	double fz = 0.0;
	4452	double f = 0;
	4453	double sum_x, sum_y;
	4454	double curData;
	4455	for(size_t i=0; i<current_blockcount_x; i++){
	4456	sum_x = 0;
	4457	for(size_t j=0; j<current_blockcount_y; j++){
	4458	sum_y = 0;
	4459	for(size_t k=0; k<current_blockcount_z; k++){
	4460	curData = *cur_data_pos;
	4461	// f += curData;
	4462	// fx += curData * i;
	4463	// fy += curData * j;
	4464	// fz += curData * k;
	4465	sum_y += curData;
	4466	fz += curData * k;
	4467	cur_data_pos ++;
	4468	}
	4469	fy += sum_y * j;
	4470	sum_x += sum_y;
	4471	cur_data_pos += dim1_offset - current_blockcount_z;
	4472	}
	4473	fx += sum_x * i;
	4474	f += sum_x;
	4475	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	4476	}
	4477	double coeff = 1.0 / (current_blockcount_x * current_blockcount_y * current_blockcount_z);
	4478	reg_params_pos[0] = (2 * fx / (current_blockcount_x - 1) - f) * 6 * coeff / (current_blockcount_x + 1);
	4479	reg_params_pos[params_offset_b] = (2 * fy / (current_blockcount_y - 1) - f) * 6 * coeff / (current_blockcount_y + 1);
	4480	reg_params_pos[params_offset_c] = (2 * fz / (current_blockcount_z - 1) - f) * 6 * coeff / (current_blockcount_z + 1);
	4481	reg_params_pos[params_offset_d] = f * coeff - ((current_blockcount_x - 1) * reg_params_pos[0] / 2 + (current_blockcount_y - 1) * reg_params_pos[params_offset_b] / 2 + (current_blockcount_z - 1) * reg_params_pos[params_offset_c] / 2);
	4482	}
	4483	reg_params_pos ++;
	4484	}
	4485	}
	4486	}
	4487
	4488	//Compress coefficient arrays
	4489	double precision_a, precision_b, precision_c, precision_d;
	4490	double rel_param_err = 0.025;
	4491	precision_a = rel_param_err * realPrecision / late_blockcount_x;
	4492	precision_b = rel_param_err * realPrecision / late_blockcount_y;
	4493	precision_c = rel_param_err * realPrecision / late_blockcount_z;
	4494	precision_d = rel_param_err * realPrecision;
	4495
	4496	if(exe_params->optQuantMode==1)
	4497	{
	4498	quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
	4499	if(mean_flush_freq > 0.5 \|\| mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
	4500	updateQuantizationInfo(quantization_intervals);
	4501	}
	4502	else{
	4503	quantization_intervals = exe_params->intvCapacity;
	4504	}
	4505
	4506	double mean = 0;
	4507	if(use_mean){
	4508	// compute mean
	4509	double sum = 0.0;
	4510	size_t mean_count = 0;
	4511	for(size_t i=0; i<num_elements; i++){
	4512	if(fabs(oriData[i] - dense_pos) < realPrecision){
	4513	sum += oriData[i];
	4514	mean_count ++;
	4515	}
	4516	}
	4517	if(mean_count > 0) mean = sum / mean_count;
	4518	}
	4519
	4520	double tmp_realPrecision = realPrecision;
	4521
	4522	// use two prediction buffers for higher performance
	4523	double * unpredictable_data = result_unpredictable_data;
	4524	unsigned char * indicator = (unsigned char ) malloc(num_blocks sizeof(unsigned char));
	4525	memset(indicator, 0, num_blocks * sizeof(unsigned char));
	4526	size_t reg_count = 0;
	4527	size_t strip_dim_0 = early_blockcount_x + 1;
	4528	size_t strip_dim_1 = r2 + 1;
	4529	size_t strip_dim_2 = r3 + 1;
	4530	size_t strip_dim0_offset = strip_dim_1 * strip_dim_2;
	4531	size_t strip_dim1_offset = strip_dim_2;
	4532	unsigned char * indicator_pos = indicator;
	4533
	4534	size_t prediction_buffer_size = strip_dim_0 * strip_dim0_offset * sizeof(double);
	4535	double * prediction_buffer_1 = (double *) malloc(prediction_buffer_size);
	4536	memset(prediction_buffer_1, 0, prediction_buffer_size);
	4537	double * prediction_buffer_2 = (double *) malloc(prediction_buffer_size);
	4538	memset(prediction_buffer_2, 0, prediction_buffer_size);
	4539	double * cur_pb_buf = prediction_buffer_1;
	4540	double * next_pb_buf = prediction_buffer_2;
	4541	double * cur_pb_buf_pos;
	4542	double * next_pb_buf_pos;
	4543	int intvCapacity = exe_params->intvCapacity;
	4544	int intvRadius = exe_params->intvRadius;
	4545	int use_reg = 0;
	4546	double noise = realPrecision * 1.22;
	4547
	4548	reg_params_pos = reg_params;
	4549	// compress the regression coefficients on the fly
	4550	double last_coeffcients[4] = {0.0};
	4551	int coeff_intvCapacity_sz = 65536;
	4552	int coeff_intvRadius = coeff_intvCapacity_sz / 2;
	4553	int * coeff_type[4];
	4554	int * coeff_result_type = (int ) malloc(num_blocks4*sizeof(int));
	4555	double * coeff_unpred_data[4];
	4556	double * coeff_unpredictable_data = (double ) malloc(num_blocks4*sizeof(double));
	4557	double precision[4];
	4558	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
	4559	for(int i=0; i<4; i++){
	4560	coeff_type[i] = coeff_result_type + i * num_blocks;
	4561	coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
	4562	}
	4563	int coeff_index = 0;
	4564	unsigned int coeff_unpredictable_count[4] = {0};
	4565
	4566	if(use_mean){
	4567	int intvCapacity_sz = intvCapacity - 2;
	4568	for(size_t i=0; i<num_x; i++){
	4569	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4570	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4571	for(size_t j=0; j<num_y; j++){
	4572	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4573	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4574	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
	4575	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
	4576	type = result_type + type_offset;
	4577
	4578	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
	4579	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
	4580	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
	4581
	4582	size_t current_blockcount_z;
	4583	double * pb_pos = cur_pb_buf_pos;
	4584	double * next_pb_pos = next_pb_buf_pos;
	4585	size_t strip_unpredictable_count = 0;
	4586	for(size_t k=0; k<num_z; k++){
	4587	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	4588
	4589	/sampling and decide which predictor/
	4590	{
	4591	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	4592	double * cur_data_pos;
	4593	double curData;
	4594	double pred_reg, pred_sz;
	4595	double err_sz = 0.0, err_reg = 0.0;
	4596	int bmi = 0;
	4597	if(i>0 && j>0 && k>0){
	4598	for(int i=0; i<block_size; i++){
	4599	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4600	curData = *cur_data_pos;
	4601	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4602	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4603	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4604	err_reg += fabs(pred_reg - curData);
	4605
	4606	bmi = block_size - i;
	4607	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4608	curData = *cur_data_pos;
	4609	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4610	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4611	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4612	err_reg += fabs(pred_reg - curData);
	4613
	4614	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4615	curData = *cur_data_pos;
	4616	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4617	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4618	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4619	err_reg += fabs(pred_reg - curData);
	4620
	4621	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4622	curData = *cur_data_pos;
	4623	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4624	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4625	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4626	err_reg += fabs(pred_reg - curData);
	4627	}
	4628	}
	4629	else{
	4630	for(int i=1; i<block_size; i++){
	4631	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4632	curData = *cur_data_pos;
	4633	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4634	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4635	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4636	err_reg += fabs(pred_reg - curData);
	4637
	4638	bmi = block_size - i;
	4639	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4640	curData = *cur_data_pos;
	4641	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4642	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4643	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4644	err_reg += fabs(pred_reg - curData);
	4645
	4646	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4647	curData = *cur_data_pos;
	4648	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4649	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4650	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4651	err_reg += fabs(pred_reg - curData);
	4652
	4653	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4654	curData = *cur_data_pos;
	4655	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4656	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4657	err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
	4658	err_reg += fabs(pred_reg - curData);
	4659
	4660	}
	4661	}
	4662	use_reg = (err_reg < err_sz);
	4663	}
	4664	if(use_reg){
	4665	{
	4666	/predict coefficients in current block via previous reg_block/
	4667	double cur_coeff;
	4668	double diff, itvNum;
	4669	for(int e=0; e<4; e++){
	4670	cur_coeff = reg_params_pos[e*num_blocks];
	4671	diff = cur_coeff - last_coeffcients[e];
	4672	itvNum = fabs(diff)/precision[e] + 1;
	4673	if (itvNum < coeff_intvCapacity_sz){
	4674	if (diff < 0) itvNum = -itvNum;
	4675	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	4676	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	4677	//ganrantee comporession error against the case of machine-epsilon
	4678	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	4679	coeff_type[e][coeff_index] = 0;
	4680	last_coeffcients[e] = cur_coeff;
	4681	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4682	}
	4683	}
	4684	else{
	4685	coeff_type[e][coeff_index] = 0;
	4686	last_coeffcients[e] = cur_coeff;
	4687	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	4688	}
	4689	}
	4690	coeff_index ++;
	4691	}
	4692	double curData;
	4693	double pred;
	4694	double itvNum;
	4695	double diff;
	4696	size_t index = 0;
	4697	size_t block_unpredictable_count = 0;
	4698	double * cur_data_pos = data_pos;
	4699	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4700	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4701	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4702	curData = *cur_data_pos;
	4703	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	4704	diff = curData - pred;
	4705	itvNum = fabs(diff)/tmp_realPrecision + 1;
	4706	if (itvNum < intvCapacity){
	4707	if (diff < 0) itvNum = -itvNum;
	4708	type[index] = (int) (itvNum/2) + intvRadius;
	4709	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	4710	//ganrantee comporession error against the case of machine-epsilon
	4711	if(fabs(curData - pred)>tmp_realPrecision){
	4712	type[index] = 0;
	4713	pred = curData;
	4714	unpredictable_data[block_unpredictable_count ++] = curData;
	4715	}
	4716	}
	4717	else{
	4718	type[index] = 0;
	4719	pred = curData;
	4720	unpredictable_data[block_unpredictable_count ++] = curData;
	4721	}
	4722	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	4723	// assign value to block surfaces
	4724	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	4725	}
	4726	index ++;
	4727	cur_data_pos ++;
	4728	}
	4729	cur_data_pos += dim1_offset - current_blockcount_z;
	4730	}
	4731	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	4732	}
	4733	/dealing with the last ii (boundary)/
	4734	{
	4735	// ii == current_blockcount_x - 1
	4736	size_t ii = current_blockcount_x - 1;
	4737	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4738	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4739	curData = *cur_data_pos;
	4740	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	4741	diff = curData - pred;
	4742	itvNum = fabs(diff)/tmp_realPrecision + 1;
	4743	if (itvNum < intvCapacity){
	4744	if (diff < 0) itvNum = -itvNum;
	4745	type[index] = (int) (itvNum/2) + intvRadius;
	4746	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	4747	//ganrantee comporession error against the case of machine-epsilon
	4748	if(fabs(curData - pred)>tmp_realPrecision){
	4749	type[index] = 0;
	4750	pred = curData;
	4751	unpredictable_data[block_unpredictable_count ++] = curData;
	4752	}
	4753	}
	4754	else{
	4755	type[index] = 0;
	4756	pred = curData;
	4757	unpredictable_data[block_unpredictable_count ++] = curData;
	4758	}
	4759
	4760	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	4761	// assign value to block surfaces
	4762	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	4763	}
	4764	// assign value to next prediction buffer
	4765	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
	4766	index ++;
	4767	cur_data_pos ++;
	4768	}
	4769	cur_data_pos += dim1_offset - current_blockcount_z;
	4770	}
	4771	}
	4772	unpredictable_count = block_unpredictable_count;
	4773	strip_unpredictable_count += unpredictable_count;
	4774	unpredictable_data += unpredictable_count;
	4775
	4776	reg_count ++;
	4777	}
	4778	else{
	4779	// use SZ
	4780	// SZ predication
	4781	unpredictable_count = 0;
	4782	double * cur_pb_pos = pb_pos;
	4783	double * cur_data_pos = data_pos;
	4784	double curData;
	4785	double pred3D;
	4786	double itvNum, diff;
	4787	size_t index = 0;
	4788	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	4789	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4790	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4791
	4792	curData = *cur_data_pos;
	4793	if(fabs(curData - mean) <= realPrecision){
	4794	// adjust type[index] to intvRadius for coherence with freq in reg
	4795	type[index] = intvRadius;
	4796	*cur_pb_pos = mean;
	4797	}
	4798	else
	4799	{
	4800	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	4801	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	4802	diff = curData - pred3D;
	4803	itvNum = fabs(diff)/realPrecision + 1;
	4804	if (itvNum < intvCapacity_sz){
	4805	if (diff < 0) itvNum = -itvNum;
	4806	type[index] = (int) (itvNum/2) + intvRadius;
	4807	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4808	if(type[index] <= intvRadius) type[index] -= 1;
	4809	//ganrantee comporession error against the case of machine-epsilon
	4810	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4811	type[index] = 0;
	4812	*cur_pb_pos = curData;
	4813	unpredictable_data[unpredictable_count ++] = curData;
	4814	}
	4815	}
	4816	else{
	4817	type[index] = 0;
	4818	*cur_pb_pos = curData;
	4819	unpredictable_data[unpredictable_count ++] = curData;
	4820	}
	4821	}
	4822	index ++;
	4823	cur_pb_pos ++;
	4824	cur_data_pos ++;
	4825	}
	4826	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	4827	cur_data_pos += dim1_offset - current_blockcount_z;
	4828	}
	4829	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
	4830	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	4831	}
	4832	/dealing with the last ii (boundary)/
	4833	{
	4834	// ii == current_blockcount_x - 1
	4835	for(size_t jj=0; jj<current_blockcount_y; jj++){
	4836	for(size_t kk=0; kk<current_blockcount_z; kk++){
	4837
	4838	curData = *cur_data_pos;
	4839	if(fabs(curData - mean) <= realPrecision){
	4840	// adjust type[index] to intvRadius for coherence with freq in reg
	4841	type[index] = intvRadius;
	4842	*cur_pb_pos = mean;
	4843	}
	4844	else
	4845	{
	4846	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	4847	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	4848	diff = curData - pred3D;
	4849	itvNum = fabs(diff)/realPrecision + 1;
	4850	if (itvNum < intvCapacity_sz){
	4851	if (diff < 0) itvNum = -itvNum;
	4852	type[index] = (int) (itvNum/2) + intvRadius;
	4853	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	4854	if(type[index] <= intvRadius) type[index] -= 1;
	4855	//ganrantee comporession error against the case of machine-epsilon
	4856	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	4857	type[index] = 0;
	4858	*cur_pb_pos = curData;
	4859	unpredictable_data[unpredictable_count ++] = curData;
	4860	}
	4861	}
	4862	else{
	4863	type[index] = 0;
	4864	*cur_pb_pos = curData;
	4865	unpredictable_data[unpredictable_count ++] = curData;
	4866	}
	4867	}
	4868	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
	4869	index ++;
	4870	cur_pb_pos ++;
	4871	cur_data_pos ++;
	4872	}
	4873	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	4874	cur_data_pos += dim1_offset - current_blockcount_z;
	4875	}
	4876	}
	4877	strip_unpredictable_count += unpredictable_count;
	4878	unpredictable_data += unpredictable_count;
	4879	// change indicator
	4880	indicator_pos[k] = 1;
	4881	}// end SZ
	4882
	4883	reg_params_pos ++;
	4884	data_pos += current_blockcount_z;
	4885	pb_pos += current_blockcount_z;
	4886	next_pb_pos += current_blockcount_z;
	4887	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
	4888
	4889	} // end k
	4890
	4891	if(strip_unpredictable_count > max_unpred_count){
	4892	max_unpred_count = strip_unpredictable_count;
	4893	}
	4894	total_unpred += strip_unpredictable_count;
	4895	indicator_pos += num_z;
	4896	}// end j
	4897	double * tmp;
	4898	tmp = cur_pb_buf;
	4899	cur_pb_buf = next_pb_buf;
	4900	next_pb_buf = tmp;
	4901	}// end i
	4902	}
	4903	else{
	4904	int intvCapacity_sz = intvCapacity - 2;
	4905	for(size_t i=0; i<num_x; i++){
	4906	current_blockcount_x = (i < split_index_x) ? early_blockcount_x : late_blockcount_x;
	4907	offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
	4908
	4909	for(size_t j=0; j<num_y; j++){
	4910	offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
	4911	current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
	4912	data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset;
	4913	// copy bottom plane from plane buffer
	4914	// memcpy(prediction_buffer, bottom_buffer + offset_y * strip_dim1_offset, (current_blockcount_y + 1) * strip_dim1_offset * sizeof(double));
	4915	type_offset = offset_x * dim0_offset + offset_y * current_blockcount_x * dim1_offset;
	4916	type = result_type + type_offset;
	4917
	4918	// prediction buffer is (current_block_count_x + 1) * (current_block_count_y + 1) * (current_block_count_z + 1)
	4919	cur_pb_buf_pos = cur_pb_buf + offset_y * strip_dim1_offset + strip_dim0_offset + strip_dim1_offset + 1;
	4920	next_pb_buf_pos = next_pb_buf + offset_y * strip_dim1_offset + strip_dim1_offset + 1;
	4921
	4922	size_t current_blockcount_z;
	4923	double * pb_pos = cur_pb_buf_pos;
	4924	double * next_pb_pos = next_pb_buf_pos;
	4925	size_t strip_unpredictable_count = 0;
	4926	for(size_t k=0; k<num_z; k++){
	4927	current_blockcount_z = (k < split_index_z) ? early_blockcount_z : late_blockcount_z;
	4928	/sampling/
	4929	{
	4930	// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
	4931	double * cur_data_pos;
	4932	double curData;
	4933	double pred_reg, pred_sz;
	4934	double err_sz = 0.0, err_reg = 0.0;
	4935	int bmi;
	4936	if(i>0 && j>0 && k>0){
	4937	for(int i=0; i<block_size; i++){
	4938	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4939	curData = *cur_data_pos;
	4940	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4941	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4942	err_sz += fabs(pred_sz - curData) + noise;
	4943	err_reg += fabs(pred_reg - curData);
	4944
	4945	bmi = block_size - i;
	4946	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4947	curData = *cur_data_pos;
	4948	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4949	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4950	err_sz += fabs(pred_sz - curData) + noise;
	4951	err_reg += fabs(pred_reg - curData);
	4952
	4953	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4954	curData = *cur_data_pos;
	4955	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4956	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4957	err_sz += fabs(pred_sz - curData) + noise;
	4958	err_reg += fabs(pred_reg - curData);
	4959
	4960	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4961	curData = *cur_data_pos;
	4962	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4963	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4964	err_sz += fabs(pred_sz - curData) + noise;
	4965	err_reg += fabs(pred_reg - curData);
	4966	}
	4967	}
	4968	else{
	4969	for(int i=1; i<block_size; i++){
	4970	cur_data_pos = data_pos + idim0_offset + idim1_offset + i;
	4971	curData = *cur_data_pos;
	4972	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4973	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4974	err_sz += fabs(pred_sz - curData) + noise;
	4975	err_reg += fabs(pred_reg - curData);
	4976
	4977	bmi = block_size - i;
	4978	cur_data_pos = data_pos + idim0_offset + idim1_offset + bmi;
	4979	curData = *cur_data_pos;
	4980	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4981	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4982	err_sz += fabs(pred_sz - curData) + noise;
	4983	err_reg += fabs(pred_reg - curData);
	4984
	4985	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + i;
	4986	curData = *cur_data_pos;
	4987	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4988	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
	4989	err_sz += fabs(pred_sz - curData) + noise;
	4990	err_reg += fabs(pred_reg - curData);
	4991
	4992	cur_data_pos = data_pos + idim0_offset + bmidim1_offset + bmi;
	4993	curData = *cur_data_pos;
	4994	pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
	4995	pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
	4996	err_sz += fabs(pred_sz - curData) + noise;
	4997	err_reg += fabs(pred_reg - curData);
	4998	}
	4999	}
	5000	use_reg = (err_reg < err_sz);
	5001
	5002	}
	5003	if(use_reg)
	5004	{
	5005	{
	5006	/predict coefficients in current block via previous reg_block/
	5007	double cur_coeff;
	5008	double diff, itvNum;
	5009	for(int e=0; e<4; e++){
	5010	cur_coeff = reg_params_pos[e*num_blocks];
	5011	diff = cur_coeff - last_coeffcients[e];
	5012	itvNum = fabs(diff)/precision[e] + 1;
	5013	if (itvNum < coeff_intvCapacity_sz){
	5014	if (diff < 0) itvNum = -itvNum;
	5015	coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
	5016	last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
	5017	//ganrantee comporession error against the case of machine-epsilon
	5018	if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
	5019	coeff_type[e][coeff_index] = 0;
	5020	last_coeffcients[e] = cur_coeff;
	5021	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5022	}
	5023	}
	5024	else{
	5025	coeff_type[e][coeff_index] = 0;
	5026	last_coeffcients[e] = cur_coeff;
	5027	coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
	5028	}
	5029	}
	5030	coeff_index ++;
	5031	}
	5032	double curData;
	5033	double pred;
	5034	double itvNum;
	5035	double diff;
	5036	size_t index = 0;
	5037	size_t block_unpredictable_count = 0;
	5038	double * cur_data_pos = data_pos;
	5039	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5040	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5041	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5042
	5043	curData = *cur_data_pos;
	5044	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5045	diff = curData - pred;
	5046	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5047	if (itvNum < intvCapacity){
	5048	if (diff < 0) itvNum = -itvNum;
	5049	type[index] = (int) (itvNum/2) + intvRadius;
	5050	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5051	//ganrantee comporession error against the case of machine-epsilon
	5052	if(fabs(curData - pred)>tmp_realPrecision){
	5053	type[index] = 0;
	5054	pred = curData;
	5055	unpredictable_data[block_unpredictable_count ++] = curData;
	5056	}
	5057	}
	5058	else{
	5059	type[index] = 0;
	5060	pred = curData;
	5061	unpredictable_data[block_unpredictable_count ++] = curData;
	5062	}
	5063
	5064	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5065	// assign value to block surfaces
	5066	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5067	}
	5068	index ++;
	5069	cur_data_pos ++;
	5070	}
	5071	cur_data_pos += dim1_offset - current_blockcount_z;
	5072	}
	5073	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5074	}
	5075	/dealing with the last ii (boundary)/
	5076	{
	5077	// ii == current_blockcount_x - 1
	5078	size_t ii = current_blockcount_x - 1;
	5079	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5080	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5081	curData = *cur_data_pos;
	5082	pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
	5083	diff = curData - pred;
	5084	itvNum = fabs(diff)/tmp_realPrecision + 1;
	5085	if (itvNum < intvCapacity){
	5086	if (diff < 0) itvNum = -itvNum;
	5087	type[index] = (int) (itvNum/2) + intvRadius;
	5088	pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
	5089	//ganrantee comporession error against the case of machine-epsilon
	5090	if(fabs(curData - pred)>tmp_realPrecision){
	5091	type[index] = 0;
	5092	pred = curData;
	5093	unpredictable_data[block_unpredictable_count ++] = curData;
	5094	}
	5095	}
	5096	else{
	5097	type[index] = 0;
	5098	pred = curData;
	5099	unpredictable_data[block_unpredictable_count ++] = curData;
	5100	}
	5101
	5102	if((jj == current_blockcount_y - 1) \|\| (kk == current_blockcount_z - 1)){
	5103	// assign value to block surfaces
	5104	pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
	5105	}
	5106	// assign value to next prediction buffer
	5107	next_pb_pos[jj * strip_dim1_offset + kk] = pred;
	5108	index ++;
	5109	cur_data_pos ++;
	5110	}
	5111	cur_data_pos += dim1_offset - current_blockcount_z;
	5112	}
	5113	}
	5114	unpredictable_count = block_unpredictable_count;
	5115	strip_unpredictable_count += unpredictable_count;
	5116	unpredictable_data += unpredictable_count;
	5117	reg_count ++;
	5118	}
	5119	else{
	5120	// use SZ
	5121	// SZ predication
	5122	unpredictable_count = 0;
	5123	double * cur_pb_pos = pb_pos;
	5124	double * cur_data_pos = data_pos;
	5125	double curData;
	5126	double pred3D;
	5127	double itvNum, diff;
	5128	size_t index = 0;
	5129	for(size_t ii=0; ii<current_blockcount_x - 1; ii++){
	5130	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5131	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5132
	5133	curData = *cur_data_pos;
	5134	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	5135	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	5136	diff = curData - pred3D;
	5137	itvNum = fabs(diff)/realPrecision + 1;
	5138	if (itvNum < intvCapacity_sz){
	5139	if (diff < 0) itvNum = -itvNum;
	5140	type[index] = (int) (itvNum/2) + intvRadius;
	5141	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	5142	//ganrantee comporession error against the case of machine-epsilon
	5143	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	5144	type[index] = 0;
	5145	*cur_pb_pos = curData;
	5146	unpredictable_data[unpredictable_count ++] = curData;
	5147	}
	5148	}
	5149	else{
	5150	type[index] = 0;
	5151	*cur_pb_pos = curData;
	5152	unpredictable_data[unpredictable_count ++] = curData;
	5153	}
	5154	index ++;
	5155	cur_pb_pos ++;
	5156	cur_data_pos ++;
	5157	}
	5158	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	5159	cur_data_pos += dim1_offset - current_blockcount_z;
	5160	}
	5161	cur_pb_pos += strip_dim0_offset - current_blockcount_y * strip_dim1_offset;
	5162	cur_data_pos += dim0_offset - current_blockcount_y * dim1_offset;
	5163	}
	5164	/dealing with the last ii (boundary)/
	5165	{
	5166	// ii == current_blockcount_x - 1
	5167	for(size_t jj=0; jj<current_blockcount_y; jj++){
	5168	for(size_t kk=0; kk<current_blockcount_z; kk++){
	5169
	5170	curData = *cur_data_pos;
	5171	pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1]
	5172	- cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1];
	5173	diff = curData - pred3D;
	5174	itvNum = fabs(diff)/realPrecision + 1;
	5175	if (itvNum < intvCapacity_sz){
	5176	if (diff < 0) itvNum = -itvNum;
	5177	type[index] = (int) (itvNum/2) + intvRadius;
	5178	cur_pb_pos = pred3D + 2 (type[index] - intvRadius) * tmp_realPrecision;
	5179	//ganrantee comporession error against the case of machine-epsilon
	5180	if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){
	5181	type[index] = 0;
	5182	*cur_pb_pos = curData;
	5183	unpredictable_data[unpredictable_count ++] = curData;
	5184	}
	5185	}
	5186	else{
	5187	type[index] = 0;
	5188	*cur_pb_pos = curData;
	5189	unpredictable_data[unpredictable_count ++] = curData;
	5190	}
	5191	// assign value to next prediction buffer
	5192	next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
	5193	index ++;
	5194	cur_pb_pos ++;
	5195	cur_data_pos ++;
	5196	}
	5197	cur_pb_pos += strip_dim1_offset - current_blockcount_z;
	5198	cur_data_pos += dim1_offset - current_blockcount_z;
	5199	}
	5200	}
	5201	strip_unpredictable_count += unpredictable_count;
	5202	unpredictable_data += unpredictable_count;
	5203	// change indicator
	5204	indicator_pos[k] = 1;
	5205	}// end SZ
	5206
	5207	reg_params_pos ++;
	5208	data_pos += current_blockcount_z;
	5209	pb_pos += current_blockcount_z;
	5210	next_pb_pos += current_blockcount_z;
	5211	type += current_blockcount_x * current_blockcount_y * current_blockcount_z;
	5212
	5213	}
	5214
	5215	if(strip_unpredictable_count > max_unpred_count){
	5216	max_unpred_count = strip_unpredictable_count;
	5217	}
	5218	total_unpred += strip_unpredictable_count;
	5219	indicator_pos += num_z;
	5220	}
	5221	double * tmp;
	5222	tmp = cur_pb_buf;
	5223	cur_pb_buf = next_pb_buf;
	5224	next_pb_buf = tmp;
	5225	}
	5226	}
	5227
	5228	free(prediction_buffer_1);
	5229	free(prediction_buffer_2);
	5230
	5231	int stateNum = 2*quantization_intervals;
	5232	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	5233
	5234	size_t nodeCount = 0;
	5235	init(huffmanTree, result_type, num_elements);
	5236	size_t i = 0;
	5237	for (i = 0; i < huffmanTree->stateNum; i++)
	5238	if (huffmanTree->code[i]) nodeCount++;
	5239	nodeCount = nodeCount*2-1;
	5240
	5241	unsigned char *treeBytes;
	5242	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	5243
	5244	unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength;
	5245	// total size metadata # elements real precision intervals nodeCount huffman block index unpredicatable count mean unpred size elements
	5246	unsigned char * result = (unsigned char ) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + treeByteSize + num_blocks sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
	5247	unsigned char * result_pos = result;
	5248	initRandomAccessBytes(result_pos);
	5249
	5250	result_pos += meta_data_offset;
	5251
	5252	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
	5253	result_pos += exe_params->SZ_SIZE_TYPE;
	5254
	5255	intToBytes_bigEndian(result_pos, block_size);
	5256	result_pos += sizeof(int);
	5257	doubleToBytes(result_pos, realPrecision);
	5258	result_pos += sizeof(double);
	5259	intToBytes_bigEndian(result_pos, quantization_intervals);
	5260	result_pos += sizeof(int);
	5261	intToBytes_bigEndian(result_pos, treeByteSize);
	5262	result_pos += sizeof(int);
	5263	intToBytes_bigEndian(result_pos, nodeCount);
	5264	result_pos += sizeof(int);
	5265	memcpy(result_pos, treeBytes, treeByteSize);
	5266	result_pos += treeByteSize;
	5267	free(treeBytes);
	5268
	5269	memcpy(result_pos, &use_mean, sizeof(unsigned char));
	5270	result_pos += sizeof(unsigned char);
	5271	memcpy(result_pos, &mean, sizeof(double));
	5272	result_pos += sizeof(double);
	5273	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
	5274	result_pos += indicator_size;
	5275
	5276	//convert the lead/mid/resi to byte stream
	5277	if(reg_count > 0){
	5278	for(int e=0; e<4; e++){
	5279	int stateNum = 2*coeff_intvCapacity_sz;
	5280	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
	5281	size_t nodeCount = 0;
	5282	init(huffmanTree, coeff_type[e], reg_count);
	5283	size_t i = 0;
	5284	for (i = 0; i < huffmanTree->stateNum; i++)
	5285	if (huffmanTree->code[i]) nodeCount++;
	5286	nodeCount = nodeCount*2-1;
	5287	unsigned char *treeBytes;
	5288	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
	5289	doubleToBytes(result_pos, precision[e]);
	5290	result_pos += sizeof(double);
	5291	intToBytes_bigEndian(result_pos, coeff_intvRadius);
	5292	result_pos += sizeof(int);
	5293	intToBytes_bigEndian(result_pos, treeByteSize);
	5294	result_pos += sizeof(int);
	5295	intToBytes_bigEndian(result_pos, nodeCount);
	5296	result_pos += sizeof(int);
	5297	memcpy(result_pos, treeBytes, treeByteSize);
	5298	result_pos += treeByteSize;
	5299	free(treeBytes);
	5300	size_t typeArray_size = 0;
	5301	encode(huffmanTree, coeff_type[e], reg_count, result_pos + sizeof(size_t), &typeArray_size);
	5302	sizeToBytes(result_pos, typeArray_size);
	5303	result_pos += sizeof(size_t) + typeArray_size;
	5304	intToBytes_bigEndian(result_pos, coeff_unpredictable_count[e]);
	5305	result_pos += sizeof(int);
	5306	memcpy(result_pos, coeff_unpred_data[e], coeff_unpredictable_count[e]*sizeof(double));
	5307	result_pos += coeff_unpredictable_count[e]*sizeof(double);
	5308	SZ_ReleaseHuffman(huffmanTree);
	5309	}
	5310	}
	5311	free(coeff_result_type);
	5312	free(coeff_unpredictable_data);
	5313
	5314	//record the number of unpredictable data and also store them
	5315	memcpy(result_pos, &total_unpred, sizeof(size_t));
	5316	result_pos += sizeof(size_t);
	5317	memcpy(result_pos, result_unpredictable_data, total_unpred * sizeof(double));
	5318	result_pos += total_unpred * sizeof(double);
	5319	size_t typeArray_size = 0;
	5320	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
	5321	result_pos += typeArray_size;
	5322	size_t totalEncodeSize = result_pos - result;
	5323	free(indicator);
	5324	free(result_unpredictable_data);
	5325	free(result_type);
	5326	free(reg_params);
	5327
	5328
	5329	SZ_ReleaseHuffman(huffmanTree);
	5330	*comp_size = totalEncodeSize;
	5331	return result;
	5332	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: